dnsseeder/vendor/github.com/miekg/dns/idn/punycode.go

// Package idn implements encoding from and to punycode as speficied by RFC 3492.
package idn

import (
	"bytes"
	"strings"
	"unicode"

	"github.com/miekg/dns"
)

// Implementation idea from RFC itself and from from IDNA::Punycode created by
// Tatsuhiko Miyagawa <miyagawa@bulknews.net> and released under Perl Artistic
// License in 2002.

const (
	_MIN  rune = 1
	_MAX  rune = 26
	_SKEW rune = 38
	_BASE rune = 36
	_BIAS rune = 72
	_N    rune = 128
	_DAMP rune = 700

	_DELIMITER = '-'
	_PREFIX    = "xn--"
)

// ToPunycode converts unicode domain names to DNS-appropriate punycode names.
// This function would return an empty string result for domain names with
// invalid unicode strings. This function expects domain names in lowercase.
func ToPunycode(s string) string {
	tokens := dns.SplitDomainName(s)
	switch {
	case s == "":
		return ""
	case tokens == nil: // s == .
		return "."
	case s[len(s)-1] == '.':
		tokens = append(tokens, "")
	}

	for i := range tokens {
		t := encode([]byte(tokens[i]))
		if t == nil {
			return ""
		}
		tokens[i] = string(t)
	}
	return strings.Join(tokens, ".")
}

// FromPunycode returns unicode domain name from provided punycode string.
func FromPunycode(s string) string {
	tokens := dns.SplitDomainName(s)
	switch {
	case s == "":
		return ""
	case tokens == nil: // s == .
		return "."
	case s[len(s)-1] == '.':
		tokens = append(tokens, "")
	}
	for i := range tokens {
		tokens[i] = string(decode([]byte(tokens[i])))
	}
	return strings.Join(tokens, ".")
}

// digitval converts single byte into meaningful value that's used to calculate decoded unicode character.
const errdigit = 0xffff

func digitval(code rune) rune {
	switch {
	case code >= 'A' && code <= 'Z':
		return code - 'A'
	case code >= 'a' && code <= 'z':
		return code - 'a'
	case code >= '0' && code <= '9':
		return code - '0' + 26
	}
	return errdigit
}

// lettercode finds BASE36 byte (a-z0-9) based on calculated number.
func lettercode(digit rune) rune {
	switch {
	case digit >= 0 && digit <= 25:
		return digit + 'a'
	case digit >= 26 && digit <= 36:
		return digit - 26 + '0'
	}
	panic("dns: not reached")
}

// adapt calculates next bias to be used for next iteration delta.
func adapt(delta rune, numpoints int, firsttime bool) rune {
	if firsttime {
		delta /= _DAMP
	} else {
		delta /= 2
	}

	var k rune
	for delta = delta + delta/rune(numpoints); delta > (_BASE-_MIN)*_MAX/2; k += _BASE {
		delta /= _BASE - _MIN
	}

	return k + ((_BASE-_MIN+1)*delta)/(delta+_SKEW)
}

// next finds minimal rune (one with lowest codepoint value) that should be equal or above boundary.
func next(b []rune, boundary rune) rune {
	if len(b) == 0 {
		panic("dns: invalid set of runes to determine next one")
	}
	m := b[0]
	for _, x := range b[1:] {
		if x >= boundary && (m < boundary || x < m) {
			m = x
		}
	}
	return m
}

// preprune converts unicode rune to lower case. At this time it's not
// supporting all things described in RFCs
func preprune(r rune) rune {
	if unicode.IsUpper(r) {
		r = unicode.ToLower(r)
	}
	return r
}

// tfunc is a function that helps calculate each character weight
func tfunc(k, bias rune) rune {
	switch {
	case k <= bias:
		return _MIN
	case k >= bias+_MAX:
		return _MAX
	}
	return k - bias
}

// encode transforms Unicode input bytes (that represent DNS label) into
// punycode bytestream. This function would return nil if there's an invalid
// character in the label.
func encode(input []byte) []byte {
	n, bias := _N, _BIAS

	b := bytes.Runes(input)
	for i := range b {
		if !isValidRune(b[i]) {
			return nil
		}

		b[i] = preprune(b[i])
	}

	basic := make([]byte, 0, len(b))
	for _, ltr := range b {
		if ltr <= 0x7f {
			basic = append(basic, byte(ltr))
		}
	}
	basiclen := len(basic)
	fulllen := len(b)
	if basiclen == fulllen {
		return basic
	}

	var out bytes.Buffer

	out.WriteString(_PREFIX)
	if basiclen > 0 {
		out.Write(basic)
		out.WriteByte(_DELIMITER)
	}

	var (
		ltr, nextltr rune
		delta, q     rune // delta calculation (see rfc)
		t, k, cp     rune // weight and codepoint calculation
	)

	s := &bytes.Buffer{}
	for h := basiclen; h < fulllen; n, delta = n+1, delta+1 {
		nextltr = next(b, n)
		s.Truncate(0)
		s.WriteRune(nextltr)
		delta, n = delta+(nextltr-n)*rune(h+1), nextltr

		for _, ltr = range b {
			if ltr < n {
				delta++
			}
			if ltr == n {
				q = delta
				for k = _BASE; ; k += _BASE {
					t = tfunc(k, bias)
					if q < t {
						break
					}
					cp = t + ((q - t) % (_BASE - t))
					out.WriteRune(lettercode(cp))
					q = (q - t) / (_BASE - t)
				}

				out.WriteRune(lettercode(q))

				bias = adapt(delta, h+1, h == basiclen)
				h, delta = h+1, 0
			}
		}
	}
	return out.Bytes()
}

// decode transforms punycode input bytes (that represent DNS label) into Unicode bytestream
func decode(b []byte) []byte {
	src := b // b would move and we need to keep it

	n, bias := _N, _BIAS
	if !bytes.HasPrefix(b, []byte(_PREFIX)) {
		return b
	}
	out := make([]rune, 0, len(b))
	b = b[len(_PREFIX):]
	for pos := len(b) - 1; pos >= 0; pos-- {
		// only last delimiter is our interest
		if b[pos] == _DELIMITER {
			out = append(out, bytes.Runes(b[:pos])...)
			b = b[pos+1:] // trim source string
			break
		}
	}
	if len(b) == 0 {
		return src
	}
	var (
		i, oldi, w rune
		ch         byte
		t, digit   rune
		ln         int
	)

	for i = 0; len(b) > 0; i++ {
		oldi, w = i, 1
		for k := _BASE; len(b) > 0; k += _BASE {
			ch, b = b[0], b[1:]
			digit = digitval(rune(ch))
			if digit == errdigit {
				return src
			}
			i += digit * w

			t = tfunc(k, bias)
			if digit < t {
				break
			}

			w *= _BASE - t
		}
		ln = len(out) + 1
		bias = adapt(i-oldi, ln, oldi == 0)
		n += i / rune(ln)
		i = i % rune(ln)
		// insert
		out = append(out, 0)
		copy(out[i+1:], out[i:])
		out[i] = n
	}

	var ret bytes.Buffer
	for _, r := range out {
		ret.WriteRune(r)
	}
	return ret.Bytes()
}

// isValidRune checks if the character is valid. We will look for the
// character property in the code points list. For now we aren't checking special
// rules in case of contextual property
func isValidRune(r rune) bool {
	return findProperty(r) == propertyPVALID
}

// findProperty will try to check the code point property of the given
// character. It will use a binary search algorithm as we have a slice of
// ordered ranges (average case performance O(log n))
func findProperty(r rune) property {
	imin, imax := 0, len(codePoints)

	for imax >= imin {
		imid := (imin + imax) / 2

		codePoint := codePoints[imid]
		if (codePoint.start == r && codePoint.end == 0) || (codePoint.start <= r && codePoint.end >= r) {
			return codePoint.state
		}

		if (codePoint.end > 0 && codePoint.end < r) || (codePoint.end == 0 && codePoint.start < r) {
			imin = imid + 1
		} else {
			imax = imid - 1
		}
	}

	return propertyUnknown
}
Initial commit of code 9 years ago			`// Package idn implements encoding from and to punycode as speficied by RFC 3492.`
			`package idn`

			`import (`
			`"bytes"`
			`"strings"`
			`"unicode"`

			`"github.com/miekg/dns"`
			`)`

			`// Implementation idea from RFC itself and from from IDNA::Punycode created by`
			`// Tatsuhiko Miyagawa <miyagawa@bulknews.net> and released under Perl Artistic`
			`// License in 2002.`

			`const (`
			`_MIN rune = 1`
			`_MAX rune = 26`
			`_SKEW rune = 38`
			`_BASE rune = 36`
			`_BIAS rune = 72`
			`_N rune = 128`
			`_DAMP rune = 700`

			`_DELIMITER = '-'`
			`_PREFIX = "xn--"`
			`)`

			`// ToPunycode converts unicode domain names to DNS-appropriate punycode names.`
			`// This function would return an empty string result for domain names with`
			`// invalid unicode strings. This function expects domain names in lowercase.`
			`func ToPunycode(s string) string {`
			`tokens := dns.SplitDomainName(s)`
			`switch {`
			`case s == "":`
			`return ""`
			`case tokens == nil: // s == .`
			`return "."`
			`case s[len(s)-1] == '.':`
			`tokens = append(tokens, "")`
			`}`

			`for i := range tokens {`
			`t := encode([]byte(tokens[i]))`
			`if t == nil {`
			`return ""`
			`}`
			`tokens[i] = string(t)`
			`}`
			`return strings.Join(tokens, ".")`
			`}`

			`// FromPunycode returns unicode domain name from provided punycode string.`
			`func FromPunycode(s string) string {`
			`tokens := dns.SplitDomainName(s)`
			`switch {`
			`case s == "":`
			`return ""`
			`case tokens == nil: // s == .`
			`return "."`
			`case s[len(s)-1] == '.':`
			`tokens = append(tokens, "")`
			`}`
			`for i := range tokens {`
			`tokens[i] = string(decode([]byte(tokens[i])))`
			`}`
			`return strings.Join(tokens, ".")`
			`}`

			`// digitval converts single byte into meaningful value that's used to calculate decoded unicode character.`
			`const errdigit = 0xffff`

			`func digitval(code rune) rune {`
			`switch {`
			`case code >= 'A' && code <= 'Z':`
			`return code - 'A'`
			`case code >= 'a' && code <= 'z':`
			`return code - 'a'`
			`case code >= '0' && code <= '9':`
			`return code - '0' + 26`
			`}`
			`return errdigit`
			`}`

			`// lettercode finds BASE36 byte (a-z0-9) based on calculated number.`
			`func lettercode(digit rune) rune {`
			`switch {`
			`case digit >= 0 && digit <= 25:`
			`return digit + 'a'`
			`case digit >= 26 && digit <= 36:`
			`return digit - 26 + '0'`
			`}`
			`panic("dns: not reached")`
			`}`

			`// adapt calculates next bias to be used for next iteration delta.`
			`func adapt(delta rune, numpoints int, firsttime bool) rune {`
			`if firsttime {`
			`delta /= _DAMP`
			`} else {`
			`delta /= 2`
			`}`

			`var k rune`
			`for delta = delta + delta/rune(numpoints); delta > (_BASE-_MIN)*_MAX/2; k += _BASE {`
			`delta /= _BASE - _MIN`
			`}`

			`return k + ((_BASE-_MIN+1)*delta)/(delta+_SKEW)`
			`}`

			`// next finds minimal rune (one with lowest codepoint value) that should be equal or above boundary.`
			`func next(b []rune, boundary rune) rune {`
			`if len(b) == 0 {`
			`panic("dns: invalid set of runes to determine next one")`
			`}`
			`m := b[0]`
			`for _, x := range b[1:] {`
			`if x >= boundary && (m < boundary \|\| x < m) {`
			`m = x`
			`}`
			`}`
			`return m`
			`}`

			`// preprune converts unicode rune to lower case. At this time it's not`
			`// supporting all things described in RFCs`
			`func preprune(r rune) rune {`
			`if unicode.IsUpper(r) {`
			`r = unicode.ToLower(r)`
			`}`
			`return r`
			`}`

			`// tfunc is a function that helps calculate each character weight`
			`func tfunc(k, bias rune) rune {`
			`switch {`
			`case k <= bias:`
			`return _MIN`
			`case k >= bias+_MAX:`
			`return _MAX`
			`}`
			`return k - bias`
			`}`

			`// encode transforms Unicode input bytes (that represent DNS label) into`
			`// punycode bytestream. This function would return nil if there's an invalid`
			`// character in the label.`
			`func encode(input []byte) []byte {`
			`n, bias := _N, _BIAS`

			`b := bytes.Runes(input)`
			`for i := range b {`
			`if !isValidRune(b[i]) {`
			`return nil`
			`}`

			`b[i] = preprune(b[i])`
			`}`

			`basic := make([]byte, 0, len(b))`
			`for _, ltr := range b {`
			`if ltr <= 0x7f {`
			`basic = append(basic, byte(ltr))`
			`}`
			`}`
			`basiclen := len(basic)`
			`fulllen := len(b)`
			`if basiclen == fulllen {`
			`return basic`
			`}`

			`var out bytes.Buffer`

			`out.WriteString(_PREFIX)`
			`if basiclen > 0 {`
			`out.Write(basic)`
			`out.WriteByte(_DELIMITER)`
			`}`

			`var (`
			`ltr, nextltr rune`
			`delta, q rune // delta calculation (see rfc)`
			`t, k, cp rune // weight and codepoint calculation`
			`)`

			`s := &bytes.Buffer{}`
			`for h := basiclen; h < fulllen; n, delta = n+1, delta+1 {`
			`nextltr = next(b, n)`
			`s.Truncate(0)`
			`s.WriteRune(nextltr)`
			`delta, n = delta+(nextltr-n)*rune(h+1), nextltr`

			`for _, ltr = range b {`
			`if ltr < n {`
			`delta++`
			`}`
			`if ltr == n {`
			`q = delta`
			`for k = _BASE; ; k += _BASE {`
			`t = tfunc(k, bias)`
			`if q < t {`
			`break`
			`}`
			`cp = t + ((q - t) % (_BASE - t))`
			`out.WriteRune(lettercode(cp))`
			`q = (q - t) / (_BASE - t)`
			`}`

			`out.WriteRune(lettercode(q))`

			`bias = adapt(delta, h+1, h == basiclen)`
			`h, delta = h+1, 0`
			`}`
			`}`
			`}`
			`return out.Bytes()`
			`}`

			`// decode transforms punycode input bytes (that represent DNS label) into Unicode bytestream`
			`func decode(b []byte) []byte {`
			`src := b // b would move and we need to keep it`

			`n, bias := _N, _BIAS`
			`if !bytes.HasPrefix(b, []byte(_PREFIX)) {`
			`return b`
			`}`
			`out := make([]rune, 0, len(b))`
			`b = b[len(_PREFIX):]`
			`for pos := len(b) - 1; pos >= 0; pos-- {`
			`// only last delimiter is our interest`
			`if b[pos] == _DELIMITER {`
			`out = append(out, bytes.Runes(b[:pos])...)`
			`b = b[pos+1:] // trim source string`
			`break`
			`}`
			`}`
			`if len(b) == 0 {`
			`return src`
			`}`
			`var (`
			`i, oldi, w rune`
			`ch byte`
			`t, digit rune`
			`ln int`
			`)`

			`for i = 0; len(b) > 0; i++ {`
			`oldi, w = i, 1`
			`for k := _BASE; len(b) > 0; k += _BASE {`
			`ch, b = b[0], b[1:]`
			`digit = digitval(rune(ch))`
			`if digit == errdigit {`
			`return src`
			`}`
			`i += digit * w`

			`t = tfunc(k, bias)`
			`if digit < t {`
			`break`
			`}`

			`w *= _BASE - t`
			`}`
			`ln = len(out) + 1`
			`bias = adapt(i-oldi, ln, oldi == 0)`
			`n += i / rune(ln)`
			`i = i % rune(ln)`
			`// insert`
			`out = append(out, 0)`
			`copy(out[i+1:], out[i:])`
			`out[i] = n`
			`}`

			`var ret bytes.Buffer`
			`for _, r := range out {`
			`ret.WriteRune(r)`
			`}`
			`return ret.Bytes()`
			`}`

			`// isValidRune checks if the character is valid. We will look for the`
			`// character property in the code points list. For now we aren't checking special`
			`// rules in case of contextual property`
			`func isValidRune(r rune) bool {`
			`return findProperty(r) == propertyPVALID`
			`}`

			`// findProperty will try to check the code point property of the given`
			`// character. It will use a binary search algorithm as we have a slice of`
			`// ordered ranges (average case performance O(log n))`
			`func findProperty(r rune) property {`
			`imin, imax := 0, len(codePoints)`

			`for imax >= imin {`
			`imid := (imin + imax) / 2`

			`codePoint := codePoints[imid]`
			`if (codePoint.start == r && codePoint.end == 0) \|\| (codePoint.start <= r && codePoint.end >= r) {`
			`return codePoint.state`
			`}`

			`if (codePoint.end > 0 && codePoint.end < r) \|\| (codePoint.end == 0 && codePoint.start < r) {`
			`imin = imid + 1`
			`} else {`
			`imax = imid - 1`
			`}`
			`}`

			`return propertyUnknown`
			`}`