Skip to content

Commit

Permalink
Fix counting of escape sequences when splitting TXT strings (#1540)
Browse files Browse the repository at this point in the history
`endingToTxtSlice`, used by TXT, SPF and a few other types, parses a
string such as `"hello world"` from an RR's content in a zone file.
These strings are limited to 255 characters, and `endingToTxtSlice`
automatically splits them if they're longer than that. However, it
didn't count the length correctly: escape sequences such as `\\` or
`\123` were counted as multiple characters (2 and 4 respectively in
these examples), but they should only count as one character because
they represent a single byte in wire format (which is where this 255
character limit comes from). This commit fixes that.
  • Loading branch information
janik-cloudflare authored Apr 17, 2024
1 parent ba039c8 commit e4ef594
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 25 deletions.
39 changes: 31 additions & 8 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1098,18 +1098,41 @@ func TestTXT(t *testing.T) {
}
}

// Test TXT record with chunk larger than 255 bytes, they should be split up, by the parser
s := ""
for i := 0; i < 255; i++ {
s += "a"
// Test TXT record with string larger than 255 bytes that should be split
// up by the parser. Add some escape sequences too to ensure their length
// is counted correctly.
s := `"\;\\\120` + strings.Repeat("a", 255) + `b"`
rr, err = NewRR(`test.local. 60 IN TXT ` + s)
if err != nil {
t.Error("failed to parse empty-string TXT record", err)
}
if rr.(*TXT).Txt[1] != "aaab" {
t.Errorf("Txt should have two strings, last one must be 'aaab', but is %s", rr.(*TXT).Txt[1])
}
s += "b"
rr, err = NewRR(`test.local. 60 IN TXT "` + s + `"`)
rrContent := strings.Replace(rr.String(), rr.Header().String(), "", 1)
expectedRRContent := `";\\x` + strings.Repeat("a", 252) + `" "aaab"`
if expectedRRContent != rrContent {
t.Errorf("Expected TXT RR content to be %#q but got %#q", expectedRRContent, rrContent)
}

// Test TXT record that is already split up into strings of len <= 255.
s = fmt.Sprintf(
"%q %q %q %q %q %q",
strings.Repeat(`a`, 255),
strings.Repeat("b", 255),
strings.Repeat("c", 255),
strings.Repeat("d", 0),
strings.Repeat("e", 1),
strings.Repeat("f", 123),
)
rr, err = NewRR(`test.local. 60 IN TXT ` + s)
if err != nil {
t.Error("failed to parse empty-string TXT record", err)
}
if rr.(*TXT).Txt[1] != "b" {
t.Errorf("Txt should have two chunk, last one my be 'b', but is %s", rr.(*TXT).Txt[1])
rrContent = strings.Replace(rr.String(), rr.Header().String(), "", 1)
expectedRRContent = s // same as input
if expectedRRContent != rrContent {
t.Errorf("Expected TXT RR content to be %#q but got %#q", expectedRRContent, rrContent)
}
}

Expand Down
59 changes: 42 additions & 17 deletions scan_rr.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,21 @@ func endingToTxtSlice(c *zlexer, errstr string) ([]string, *ParseError) {
switch l.value {
case zString:
empty = false
if len(l.token) > 255 {
// split up tokens that are larger than 255 into 255-chunks
sx := []string{}
p, i := 0, 255
for {
if i <= len(l.token) {
sx = append(sx, l.token[p:i])
} else {
sx = append(sx, l.token[p:])
break

}
p, i = p+255, i+255
// split up tokens that are larger than 255 into 255-chunks
sx := []string{}
p := 0
for {
i := escapedStringOffset(l.token[p:], 255)
if i != -1 && p+i != len(l.token) {
sx = append(sx, l.token[p:p+i])
} else {
sx = append(sx, l.token[p:])
break

}
s = append(s, sx...)
break
p += i
}

s = append(s, l.token)
s = append(s, sx...)
case zBlank:
if quote {
// zBlank can only be seen in between txt parts.
Expand Down Expand Up @@ -1920,3 +1916,32 @@ func (rr *APL) parse(c *zlexer, o string) *ParseError {
rr.Prefixes = prefixes
return nil
}

// escapedStringOffset finds the offset within a string (which may contain escape
// sequences) that corresponds to a certain byte offset. If the input offset is
// out of bounds, -1 is returned.
func escapedStringOffset(s string, byteOffset int) int {
if byteOffset == 0 {
return 0
}

offset := 0
for i := 0; i < len(s); i++ {
offset += 1

// Skip escape sequences
if s[i] != '\\' {
// Not an escape sequence; nothing to do.
} else if isDDD(s[i+1:]) {
i += 3
} else {
i++
}

if offset >= byteOffset {
return i + 1
}
}

return -1
}
28 changes: 28 additions & 0 deletions scan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,31 @@ func BenchmarkZoneParser(b *testing.B) {
}
}
}

func TestEscapedStringOffset(t *testing.T) {
var cases = []struct {
input string
inputOffset int
expectedOffset int
}{
{"simple string with no escape sequences", 20, 20},
{"simple string with no escape sequences", 500, -1},
{`\;\088\\\;\120\\`, 0, 0},
{`\;\088\\\;\120\\`, 1, 2},
{`\;\088\\\;\120\\`, 2, 6},
{`\;\088\\\;\120\\`, 3, 8},
{`\;\088\\\;\120\\`, 4, 10},
{`\;\088\\\;\120\\`, 5, 14},
{`\;\088\\\;\120\\`, 6, 16},
{`\;\088\\\;\120\\`, 7, -1},
}
for i, test := range cases {
outputOffset := escapedStringOffset(test.input, test.inputOffset)
if outputOffset != test.expectedOffset {
t.Errorf(
"Test %d (input %#q offset %d) returned offset %d but expected %d",
i, test.input, test.inputOffset, outputOffset, test.expectedOffset,
)
}
}
}

0 comments on commit e4ef594

Please sign in to comment.