Skip to content

Commit

Permalink
Merge pull request #19 from beeker1121/fold-width
Browse files Browse the repository at this point in the history
Conform to RFC 5895: Fold character width
  • Loading branch information
mna authored Nov 15, 2016
2 parents 8a29053 + 90f7a05 commit 57f76c2
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 16 deletions.
36 changes: 20 additions & 16 deletions purell.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import (

"github.com/PuerkitoBio/urlesc"
"golang.org/x/net/idna"
"golang.org/x/text/secure/precis"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)

// A set of normalization flags determines how a URL will
Expand Down Expand Up @@ -150,22 +150,26 @@ func MustNormalizeURLString(u string, f NormalizationFlags) string {
// NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
// It takes an URL string as input, as well as the normalization flags.
func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
if parsed, e := url.Parse(u); e != nil {
return "", e
} else {
options := make([]precis.Option, 1, 3)
options[0] = precis.IgnoreCase
if f&FlagLowercaseHost == FlagLowercaseHost {
options = append(options, precis.FoldCase())
}
options = append(options, precis.Norm(norm.NFC))
profile := precis.NewFreeform(options...)
if parsed.Host, e = idna.ToASCII(profile.NewTransformer().String(parsed.Host)); e != nil {
return "", e
}
return NormalizeURL(parsed, f), nil
parsed, err := url.Parse(u)
if err != nil {
return "", err
}

if f&FlagLowercaseHost == FlagLowercaseHost {
parsed.Host = strings.ToLower(parsed.Host)
}
panic("Unreachable code.")

// The idna package doesn't fully conform to RFC 5895
// (https://tools.ietf.org/html/rfc5895), so we do it here.
// Taken from Go 1.8 cycle source, courtesy of bradfitz.
// TODO: Remove when (if?) idna package conforms to RFC 5895.
parsed.Host = width.Fold.String(parsed.Host)
parsed.Host = norm.NFC.String(parsed.Host)
if parsed.Host, err = idna.ToASCII(parsed.Host); err != nil {
return "", err
}

return NormalizeURL(parsed, f), nil
}

// NormalizeURL returns the normalized string.
Expand Down
1 change: 1 addition & 0 deletions urlnorm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func TestUrlnorm(t *testing.T) {
"http://XBLA\u306eXbox.com": "http://xn--xblaxbox-jf4g.com", //test utf8 and unicode
"http://президент.рф": "http://xn--d1abbgf6aiiy.xn--p1ai",
"http://ПРЕЗИДЕНТ.РФ": "http://xn--d1abbgf6aiiy.xn--p1ai",
"http://ab¥ヲ₩○.com": "http://xn--ab-ida8983azmfnvs.com", //test width folding
"http://\u00e9.com": "http://xn--9ca.com",
"http://e\u0301.com": "http://xn--9ca.com",
"http://ja.wikipedia.org/wiki/%E3%82%AD%E3%83%A3%E3%82%BF%E3%83%94%E3%83%A9%E3%83%BC%E3%82%B8%E3%83%A3%E3%83%91%E3%83%B3": "http://ja.wikipedia.org/wiki/%E3%82%AD%E3%83%A3%E3%82%BF%E3%83%94%E3%83%A9%E3%83%BC%E3%82%B8%E3%83%A3%E3%83%91%E3%83%B3",
Expand Down

0 comments on commit 57f76c2

Please sign in to comment.