diff --git a/vendor/golang.org/x/net/publicsuffix/gen.go b/vendor/golang.org/x/net/publicsuffix/gen.go new file mode 100644 index 000000000..a2d499529 --- /dev/null +++ b/vendor/golang.org/x/net/publicsuffix/gen.go @@ -0,0 +1,713 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates table.go and table_test.go based on the authoritative +// public suffix list at https://publicsuffix.org/list/effective_tld_names.dat +// +// The version is derived from +// https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat +// and a human-readable form is at +// https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat +// +// To fetch a particular git revision, such as 5c70ccd250, pass +// -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat" +// and -version "an explicit version string". + +import ( + "bufio" + "bytes" + "flag" + "fmt" + "go/format" + "io" + "io/ioutil" + "net/http" + "os" + "regexp" + "sort" + "strings" + + "golang.org/x/net/idna" +) + +const ( + // These sum of these four values must be no greater than 32. + nodesBitsChildren = 9 + nodesBitsICANN = 1 + nodesBitsTextOffset = 15 + nodesBitsTextLength = 6 + + // These sum of these four values must be no greater than 32. + childrenBitsWildcard = 1 + childrenBitsNodeType = 2 + childrenBitsHi = 14 + childrenBitsLo = 14 +) + +var ( + maxChildren int + maxTextOffset int + maxTextLength int + maxHi uint32 + maxLo uint32 +) + +func max(a, b int) int { + if a < b { + return b + } + return a +} + +func u32max(a, b uint32) uint32 { + if a < b { + return b + } + return a +} + +const ( + nodeTypeNormal = 0 + nodeTypeException = 1 + nodeTypeParentOnly = 2 + numNodeType = 3 +) + +func nodeTypeStr(n int) string { + switch n { + case nodeTypeNormal: + return "+" + case nodeTypeException: + return "!" + case nodeTypeParentOnly: + return "o" + } + panic("unreachable") +} + +const ( + defaultURL = "https://publicsuffix.org/list/effective_tld_names.dat" + gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat" +) + +var ( + labelEncoding = map[string]uint32{} + labelsList = []string{} + labelsMap = map[string]bool{} + rules = []string{} + + // validSuffixRE is used to check that the entries in the public suffix + // list are in canonical form (after Punycode encoding). Specifically, + // capital letters are not allowed. + validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`) + + shaRE = regexp.MustCompile(`"sha":"([^"]+)"`) + dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`) + + comments = flag.Bool("comments", false, "generate table.go comments, for debugging") + subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging") + url = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead") + v = flag.Bool("v", false, "verbose output (to stderr)") + version = flag.String("version", "", "the effective_tld_names.dat version") +) + +func main() { + if err := main1(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} + +func main1() error { + flag.Parse() + if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 { + return fmt.Errorf("not enough bits to encode the nodes table") + } + if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 { + return fmt.Errorf("not enough bits to encode the children table") + } + if *version == "" { + if *url != defaultURL { + return fmt.Errorf("-version was not specified, and the -url is not the default one") + } + sha, date, err := gitCommit() + if err != nil { + return err + } + *version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date) + } + var r io.Reader = os.Stdin + if *url != "" { + res, err := http.Get(*url) + if err != nil { + return err + } + if res.StatusCode != http.StatusOK { + return fmt.Errorf("bad GET status for %s: %d", *url, res.Status) + } + r = res.Body + defer res.Body.Close() + } + + var root node + icann := false + br := bufio.NewReader(r) + for { + s, err := br.ReadString('\n') + if err != nil { + if err == io.EOF { + break + } + return err + } + s = strings.TrimSpace(s) + if strings.Contains(s, "BEGIN ICANN DOMAINS") { + icann = true + continue + } + if strings.Contains(s, "END ICANN DOMAINS") { + icann = false + continue + } + if s == "" || strings.HasPrefix(s, "//") { + continue + } + s, err = idna.ToASCII(s) + if err != nil { + return err + } + if !validSuffixRE.MatchString(s) { + return fmt.Errorf("bad publicsuffix.org list data: %q", s) + } + + if *subset { + switch { + case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"): + case s == "ak.us" || strings.HasSuffix(s, ".ak.us"): + case s == "ao" || strings.HasSuffix(s, ".ao"): + case s == "ar" || strings.HasSuffix(s, ".ar"): + case s == "arpa" || strings.HasSuffix(s, ".arpa"): + case s == "cy" || strings.HasSuffix(s, ".cy"): + case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"): + case s == "jp": + case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"): + case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"): + case s == "om" || strings.HasSuffix(s, ".om"): + case s == "uk" || strings.HasSuffix(s, ".uk"): + case s == "uk.com" || strings.HasSuffix(s, ".uk.com"): + case s == "tw" || strings.HasSuffix(s, ".tw"): + case s == "zw" || strings.HasSuffix(s, ".zw"): + case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"): + // xn--p1ai is Russian-Cyrillic "рф". + default: + continue + } + } + + rules = append(rules, s) + + nt, wildcard := nodeTypeNormal, false + switch { + case strings.HasPrefix(s, "*."): + s, nt = s[2:], nodeTypeParentOnly + wildcard = true + case strings.HasPrefix(s, "!"): + s, nt = s[1:], nodeTypeException + } + labels := strings.Split(s, ".") + for n, i := &root, len(labels)-1; i >= 0; i-- { + label := labels[i] + n = n.child(label) + if i == 0 { + if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly { + n.nodeType = nt + } + n.icann = n.icann && icann + n.wildcard = n.wildcard || wildcard + } + labelsMap[label] = true + } + } + labelsList = make([]string, 0, len(labelsMap)) + for label := range labelsMap { + labelsList = append(labelsList, label) + } + sort.Strings(labelsList) + + if err := generate(printReal, &root, "table.go"); err != nil { + return err + } + if err := generate(printTest, &root, "table_test.go"); err != nil { + return err + } + return nil +} + +func generate(p func(io.Writer, *node) error, root *node, filename string) error { + buf := new(bytes.Buffer) + if err := p(buf, root); err != nil { + return err + } + b, err := format.Source(buf.Bytes()) + if err != nil { + return err + } + return ioutil.WriteFile(filename, b, 0644) +} + +func gitCommit() (sha, date string, retErr error) { + res, err := http.Get(gitCommitURL) + if err != nil { + return "", "", err + } + if res.StatusCode != http.StatusOK { + return "", "", fmt.Errorf("bad GET status for %s: %d", gitCommitURL, res.Status) + } + defer res.Body.Close() + b, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", "", err + } + if m := shaRE.FindSubmatch(b); m != nil { + sha = string(m[1]) + } + if m := dateRE.FindSubmatch(b); m != nil { + date = string(m[1]) + } + if sha == "" || date == "" { + retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL) + } + return sha, date, retErr +} + +func printTest(w io.Writer, n *node) error { + fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n") + fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n") + for _, rule := range rules { + fmt.Fprintf(w, "%q,\n", rule) + } + fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n") + if err := n.walk(w, printNodeLabel); err != nil { + return err + } + fmt.Fprintf(w, "}\n") + return nil +} + +func printReal(w io.Writer, n *node) error { + const header = `// generated by go run gen.go; DO NOT EDIT + +package publicsuffix + +const version = %q + +const ( + nodesBitsChildren = %d + nodesBitsICANN = %d + nodesBitsTextOffset = %d + nodesBitsTextLength = %d + + childrenBitsWildcard = %d + childrenBitsNodeType = %d + childrenBitsHi = %d + childrenBitsLo = %d +) + +const ( + nodeTypeNormal = %d + nodeTypeException = %d + nodeTypeParentOnly = %d +) + +// numTLD is the number of top level domains. +const numTLD = %d + +` + fmt.Fprintf(w, header, *version, + nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength, + childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo, + nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children)) + + text := combineText(labelsList) + if text == "" { + return fmt.Errorf("internal error: makeText returned no text") + } + for _, label := range labelsList { + offset, length := strings.Index(text, label), len(label) + if offset < 0 { + return fmt.Errorf("internal error: could not find %q in text %q", label, text) + } + maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length) + if offset >= 1<= 1< 64 { + n, plus = 64, " +" + } + fmt.Fprintf(w, "%q%s\n", text[:n], plus) + text = text[n:] + } + + if err := n.walk(w, assignIndexes); err != nil { + return err + } + + fmt.Fprintf(w, ` + +// nodes is the list of nodes. Each node is represented as a uint32, which +// encodes the node's children, wildcard bit and node type (as an index into +// the children array), ICANN bit and text. +// +// If the table was generated with the -comments flag, there is a //-comment +// after each node's data. In it is the nodes-array indexes of the children, +// formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The +// nodeType is printed as + for normal, ! for exception, and o for parent-only +// nodes that have children but don't match a domain label in their own right. +// An I denotes an ICANN domain. +// +// The layout within the uint32, from MSB to LSB, is: +// [%2d bits] unused +// [%2d bits] children index +// [%2d bits] ICANN bit +// [%2d bits] text index +// [%2d bits] text length +var nodes = [...]uint32{ +`, + 32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength, + nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength) + if err := n.walk(w, printNode); err != nil { + return err + } + fmt.Fprintf(w, `} + +// children is the list of nodes' children, the parent's wildcard bit and the +// parent's node type. If a node has no children then their children index +// will be in the range [0, 6), depending on the wildcard bit and node type. +// +// The layout within the uint32, from MSB to LSB, is: +// [%2d bits] unused +// [%2d bits] wildcard bit +// [%2d bits] node type +// [%2d bits] high nodes index (exclusive) of children +// [%2d bits] low nodes index (inclusive) of children +var children=[...]uint32{ +`, + 32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo, + childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo) + for i, c := range childrenEncoding { + s := "---------------" + lo := c & (1<> childrenBitsLo) & (1<>(childrenBitsLo+childrenBitsHi)) & (1<>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0 + if *comments { + fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n", + c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType)) + } else { + fmt.Fprintf(w, "0x%x,\n", c) + } + } + fmt.Fprintf(w, "}\n\n") + fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<= 1<= 1<= 1< 0 && ss[0] == "" { + ss = ss[1:] + } + return ss +} + +// crush combines a list of strings, taking advantage of overlaps. It returns a +// single string that contains each input string as a substring. +func crush(ss []string) string { + maxLabelLen := 0 + for _, s := range ss { + if maxLabelLen < len(s) { + maxLabelLen = len(s) + } + } + + for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- { + prefixes := makePrefixMap(ss, prefixLen) + for i, s := range ss { + if len(s) <= prefixLen { + continue + } + mergeLabel(ss, i, prefixLen, prefixes) + } + } + + return strings.Join(ss, "") +} + +// mergeLabel merges the label at ss[i] with the first available matching label +// in prefixMap, where the last "prefixLen" characters in ss[i] match the first +// "prefixLen" characters in the matching label. +// It will merge ss[i] repeatedly until no more matches are available. +// All matching labels merged into ss[i] are replaced by "". +func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) { + s := ss[i] + suffix := s[len(s)-prefixLen:] + for _, j := range prefixes[suffix] { + // Empty strings mean "already used." Also avoid merging with self. + if ss[j] == "" || i == j { + continue + } + if *v { + fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n", + prefixLen, i, j, ss[i], ss[j], suffix) + } + ss[i] += ss[j][prefixLen:] + ss[j] = "" + // ss[i] has a new suffix, so merge again if possible. + // Note: we only have to merge again at the same prefix length. Shorter + // prefix lengths will be handled in the next iteration of crush's for loop. + // Can there be matches for longer prefix lengths, introduced by the merge? + // I believe that any such matches would by necessity have been eliminated + // during substring removal or merged at a higher prefix length. For + // instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde" + // would yield "abcde", which could be merged with "bcdef." However, in + // practice "cde" would already have been elimintated by removeSubstrings. + mergeLabel(ss, i, prefixLen, prefixes) + return + } +} + +// prefixMap maps from a prefix to a list of strings containing that prefix. The +// list of strings is represented as indexes into a slice of strings stored +// elsewhere. +type prefixMap map[string][]int + +// makePrefixMap constructs a prefixMap from a slice of strings. +func makePrefixMap(ss []string, prefixLen int) prefixMap { + prefixes := make(prefixMap) + for i, s := range ss { + // We use < rather than <= because if a label matches on a prefix equal to + // its full length, that's actually a substring match handled by + // removeSubstrings. + if prefixLen < len(s) { + prefix := s[:prefixLen] + prefixes[prefix] = append(prefixes[prefix], i) + } + } + + return prefixes +} diff --git a/vendor/golang.org/x/net/publicsuffix/list.go b/vendor/golang.org/x/net/publicsuffix/list.go new file mode 100644 index 000000000..8bbf3bcd7 --- /dev/null +++ b/vendor/golang.org/x/net/publicsuffix/list.go @@ -0,0 +1,135 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run gen.go + +// Package publicsuffix provides a public suffix list based on data from +// http://publicsuffix.org/. A public suffix is one under which Internet users +// can directly register names. +package publicsuffix // import "golang.org/x/net/publicsuffix" + +// TODO: specify case sensitivity and leading/trailing dot behavior for +// func PublicSuffix and func EffectiveTLDPlusOne. + +import ( + "fmt" + "net/http/cookiejar" + "strings" +) + +// List implements the cookiejar.PublicSuffixList interface by calling the +// PublicSuffix function. +var List cookiejar.PublicSuffixList = list{} + +type list struct{} + +func (list) PublicSuffix(domain string) string { + ps, _ := PublicSuffix(domain) + return ps +} + +func (list) String() string { + return version +} + +// PublicSuffix returns the public suffix of the domain using a copy of the +// publicsuffix.org database compiled into the library. +// +// icann is whether the public suffix is managed by the Internet Corporation +// for Assigned Names and Numbers. If not, the public suffix is privately +// managed. For example, foo.org and foo.co.uk are ICANN domains, +// foo.dyndns.org and foo.blogspot.co.uk are private domains. +// +// Use cases for distinguishing ICANN domains like foo.com from private +// domains like foo.appspot.com can be found at +// https://wiki.mozilla.org/Public_Suffix_List/Use_Cases +func PublicSuffix(domain string) (publicSuffix string, icann bool) { + lo, hi := uint32(0), uint32(numTLD) + s, suffix, wildcard := domain, len(domain), false +loop: + for { + dot := strings.LastIndex(s, ".") + if wildcard { + suffix = 1 + dot + } + if lo == hi { + break + } + f := find(s[1+dot:], lo, hi) + if f == notFound { + break + } + + u := nodes[f] >> (nodesBitsTextOffset + nodesBitsTextLength) + icann = u&(1<>= nodesBitsICANN + u = children[u&(1<>= childrenBitsLo + hi = u & (1<>= childrenBitsHi + switch u & (1<>= childrenBitsNodeType + wildcard = u&(1<>= nodesBitsTextLength + offset := x & (1<