Detect XN-Labels case-insensitively #636

CBonnell · 2021-10-04T20:30:57Z

lint_idn_dnsname_malformed_unicode and lint_idn_dnsname_must_be_nfc detect XN-labels using a case-sensitive comparison of the IDNA ACE prefix ("xn--"). As a result, some "fake A labels" or labels that whose Unicode representation is not NFC-normalized may not be correctly detected.

christopher-henderson

Thank you for your submission @CBonnell !

In order to avoid the bug mentioned in my previous comment, would you mind adding the following code into v3/util/idna.go and v3/util/idna_test.go using them in your changes where appropiate? Or at least something similar?

v3/util.idna.go

/*
 * ZLint Copyright 2021 Regents of the University of Michigan
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy
 * of the License at http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package util

import (
	"regexp"

	"golang.org/x/net/idna"
)

var acePrefix = regexp.MustCompile(`^[xX][nN]--.*`)

// IsACEPrefixed checks whether-or-not the given string is prefixed
// with the case-insensitive string "xn--".
//
// This check is useful given the bug following bug report for IDNA wherein
// the ACE prefix incorrectly taken to be case-sensitive.
//
// https://github.com/golang/go/issues/48778
func IsACEPrefixed(s string) bool {
	return acePrefix.MatchString(s)
}

// IdnaToUnicode is a wrapper around idna.ToUnicode.
//
// If the provided string is ACE prefixed, then that ACE prefix
// is coerced to a lowercase "xn--" before processing by
// then idna package.
//
// This is only necessary due to the bug at https://github.com/golang/go/issues/48778
func IdnaToUnicode(s string) (string, error) {
	if IsACEPrefixed(s) {
		s = "xn--" + s[4:]
	}
	return idna.ToUnicode(s)
}

v3/util/idna_test.go

/*
 * ZLint Copyright 2021 Regents of the University of Michigan
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy
 * of the License at http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package util

import (
	"testing"

	"golang.org/x/net/idna"
)

func TestIsACEPrefixed(t *testing.T) {
	input := map[string]bool{
		"xn--zlint.org": true,
		"Xn--zlint.org": true,
		"xN--zlint.org": true,
		"XN--zlint.org": true,
		"xn--":          true,
		"Xn--":          true,
		"xN--":          true,
		"XN--":          true,
		"-xn--":         false,
		"-Xn--":         false,
		"-xN--":         false,
		"-XN--":         false,
		"":              false,
	}
	for input, want := range input {
		got := IsACEPrefixed(input)
		if got != want {
			t.Errorf("got %v want %v for input '%s'", got, want, input)
		}
	}
}

func TestIdnaToUnicode(t *testing.T) {
	type testData struct {
		input   string
		want    string
		wantErr bool
	}
	input := []testData{
		{"xn--Mnchen-Ost-9db", "München-Ost", false},
		{"xn--Mnchen-ost-9db", "München-ost", false},
		{"xn--", "", false},
		{"xN--12311613412431243.com", "xn--12311613412431243.com", true},

		{"Xn--Mnchen-Ost-9db", "München-Ost", false},
		{"Xn--Mnchen-ost-9db", "München-ost", false},
		{"Xn--", "", false},
		{"xN--12311613412431243.com", "xn--12311613412431243.com", true},

		{"xN--Mnchen-Ost-9db", "München-Ost", false},
		{"xN--Mnchen-ost-9db", "München-ost", false},
		{"xN--", "", false},
		{"xN--12311613412431243.com", "xn--12311613412431243.com", true},

		{"XN--Mnchen-Ost-9db", "München-Ost", false},
		{"XN--Mnchen-ost-9db", "München-ost", false},
		{"XN--", "", false},
		{"xN--12311613412431243.com", "xn--12311613412431243.com", true},
	}
	for _, data := range input {
		got, err := IdnaToUnicode(data.input)
		gotErr := err != nil
		if gotErr != data.wantErr || data.want != got {
			t.Errorf("got string '%s' error '%v' for test data %v", got, err, data)
		}
	}
}

// This test checks whether or not https://github.com/golang/go/issues/48778 ever got fixed.
// If it did then we can likely delete some code from utils since we don't have to handle it
// with kid gloves anymore.
func TestIdnaToUnicodeBugIsStillThere(t *testing.T) {
	s, _ := idna.ToUnicode("Xn--Mnchen-Ost-9db")
	if s == "München-Ost" {
		t.Fatal("https://github.com/golang/go/issues/48778 appears to have been fixed")
	}
}

This should result in...

v3/lints/rfc/lint_idn_dnsname_malformed_unicode.go

func (l *IDNMalformedUnicode) Execute(c *x509.Certificate) *lint.LintResult {
	for _, dns := range c.DNSNames {
		labels := strings.Split(dns, ".")
		for _, label := range labels {
			if util.IsACEPrefixed(label) {
				_, err := util.IdnaToUnicode(label)
				if err != nil {
					return &lint.LintResult{Status: lint.Error}
				}
			}
		}
	}
	return &lint.LintResult{Status: lint.Pass}
}

...and...

v3/lints/rfc/lint_idn_dnsname_must_be_nfc.go

func (l *IDNNotNFC) Execute(c *x509.Certificate) *lint.LintResult {
	for _, dns := range c.DNSNames {
		labels := strings.Split(dns, ".")
		for _, label := range labels {
			if util.IsACEPrefixed(label) {
				unicodeLabel, err := util.IdnaToUnicode(label)
				if err != nil {
					return &lint.LintResult{Status: lint.NA}
				}
				if !norm.NFC.IsNormalString(unicodeLabel) {
					return &lint.LintResult{Status: lint.Error}
				}
			}
		}
	}
	return &lint.LintResult{Status: lint.Pass}
}

christopher-henderson · 2021-10-10T18:47:27Z

v3/lints/rfc/lint_idn_dnsname_must_be_nfc.go

+			if strings.HasPrefix(labelLower, "xn--") {
+				// We need to use the lowercase label due to a bug in idna
+				// See: https://github.com/golang/go/issues/48778
+				unicodeLabel, err := idna.ToUnicode(labelLower)


Punycode is relatively complex and I do believe that coercing the entire label into lowercase does have implications on the output string. Take the following (playground link) which borrows from Wikipedia's Punycode::Examples table

package main import ( "golang.org/x/net/idna" "fmt" ) func main() { upper, _ := idna.ToUnicode("xn--Mnchen-Ost-9db") lower, _ := idna.ToUnicode("xn--Mnchen-ost-9db") fmt.Println(lower == upper) fmt.Println(upper) fmt.Println(lower) }

Here we have a label which has an ASCII literal embedded in it (the O and o) which ends up coming out of the other end raw.

Thanks @christopher-henderson. Your suggestion of creating reusable IDNA functions is definitely cleaner than my original proposal. I went ahead and incorporated your suggested implementation with some small changes.

In regard to case-sensitivity, the two lints that currently use ToUnicode won't have different results if the label is downcased (https://datatracker.ietf.org/doc/html/rfc3492#appendix-A), but the reusable functions definitely should preserve casing as you pointed out as returning a downcased U-label would be surprising API behavior.

That appendix, however, is not mandatory.

Note, however, that mixed-case annotation is not used by the ToASCII and ToUnicode operations specified in [IDNA], and therefore implementors of IDNA can disregard this appendix.

And besides, the link to the Go playground code snippet clearly shows that this particular implementation does in fact get different answers in some cases depending on the casing.

@christopher-henderson Regarding

Punycode is relatively complex and I do believe that coercing the entire label into lowercase does have implications on the output string.

and

That appendix, however, is not mandatory.

The point of that is that the mixed case preservation is the optional aspect, and purely a presentational decision. The appendix describes how one can preserve cases in the encoded string, in order to have it reflected in the display, but the underlying functions are insensitive.

With respect to host names (i.e. those for A and AAAA), we unambiguously know that they are case insensitive.

I may be misreading the comments here, but it should be unambiguous that

Punycode encoders and decoders need not support these annotations, and higher layers need not use them.

v3/lints/rfc/lint_idn_dnsname_must_be_nfc.go

v3/lints/rfc/lint_idn_dnsname_malformed_unicode.go

…/zlint into bugfix/case-insensitive-xnlabel

christopher-henderson

Looks good to me now, thank you!

Adding some more people to the review to get a second pair of eyes.

sleevi

Looks good to me as well. My caveat about just lowercasing being perfectly acceptable, but just because it’s permissible doesn’t mean we have to do it :)

CBonnell and others added 2 commits October 4, 2021 16:27

Detect XN-Labels case-insensitively

9e30a01

Merge branch 'master' into bugfix/case-insensitive-xnlabel

34f0fae

christopher-henderson added a commit that referenced this pull request Oct 10, 2021

counter proposal for #636

97a2ebc

christopher-henderson requested changes Oct 10, 2021

View reviewed changes

CBonnell added 2 commits October 12, 2021 16:04

Incorporate Chris's refactoring suggestion to create idna functions

2e093ec

Merge branch 'bugfix/case-insensitive-xnlabel' of github.com:CBonnell…

e4d26ca

…/zlint into bugfix/case-insensitive-xnlabel

CBonnell requested a review from christopher-henderson October 14, 2021 17:26

CBonnell mentioned this pull request Oct 15, 2021

Lint for Non-XN Reserved Labels #635

Merged

christopher-henderson requested review from cpu and sleevi October 16, 2021 13:42

christopher-henderson approved these changes Oct 16, 2021

View reviewed changes

sleevi approved these changes Oct 16, 2021

View reviewed changes

christopher-henderson merged commit 0508b86 into zmap:master Oct 16, 2021

CBonnell deleted the bugfix/case-insensitive-xnlabel branch October 19, 2021 18:51

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Detect XN-Labels case-insensitively #636

Detect XN-Labels case-insensitively #636

CBonnell commented Oct 4, 2021

christopher-henderson left a comment

christopher-henderson Oct 10, 2021

CBonnell Oct 12, 2021

christopher-henderson Oct 16, 2021

sleevi Oct 16, 2021

christopher-henderson left a comment

sleevi left a comment

Detect XN-Labels case-insensitively #636

Detect XN-Labels case-insensitively #636

Conversation

CBonnell commented Oct 4, 2021

christopher-henderson left a comment

Choose a reason for hiding this comment

christopher-henderson Oct 10, 2021

Choose a reason for hiding this comment

CBonnell Oct 12, 2021

Choose a reason for hiding this comment

christopher-henderson Oct 16, 2021

Choose a reason for hiding this comment

sleevi Oct 16, 2021

Choose a reason for hiding this comment

christopher-henderson left a comment

Choose a reason for hiding this comment

sleevi left a comment

Choose a reason for hiding this comment