Skip to content

Commit

Permalink
baggage: Fix invalid percent-encoded octet sequences (#5528)
Browse files Browse the repository at this point in the history
# Goal

Replace the percent encoded octet sequence with the replacement code
point (U+FFFD) when it doesn't match the UTF-8 encoding schema.

Issue: #5519

Current behavior: 

```
package main

import (
	"fmt"
	"log"
	"unicode/utf8"

	"go.opentelemetry.io/otel/baggage"
)

func main() {
	kv := "k=aa%ffcc"

	b, err := baggage.Parse(kv)
	if err != nil {
		log.Fatal(err)
	}
	val := b.Members()[0].Value()

	fmt.Println(len(val)) # 5
	fmt.Println(utf8.ValidString(val)) # false
}
```

Expected behavior:
```
package main

import (
	"fmt"
	"log"
	"unicode/utf8"

	"go.opentelemetry.io/otel/baggage"
)

func main() {
	kv := "k=aa%ffcc"

	b, err := baggage.Parse(kv)
	if err != nil {
		log.Fatal(err)
	}
	val := b.Members()[0].Value()

	fmt.Println(len(val)) # 7
	fmt.Println(utf8.ValidString(val)) # true
}
```

## Benchmark

- `go test -bench=BenchmarkParse -count 20 > old.txt`
```
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/baggage
BenchmarkParse-10    	 1548118	       774.3 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1547653	       786.0 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1544949	       770.5 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1558972	       770.2 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1554973	       774.7 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1550200	       779.6 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1545100	       774.3 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1549634	       777.5 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1552530	       769.6 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1536499	       855.0 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1552244	       770.4 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1560225	       767.4 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1562738	       772.3 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1556679	       838.9 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1562500	       777.1 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1530901	       836.5 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1000000	      1372 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1534678	       780.3 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1366180	       822.4 ns/op	     864 B/op	       8 allocs/op
BenchmarkParse-10    	 1539852	       796.8 ns/op	     864 B/op	       8 allocs/op
PASS
ok  	go.opentelemetry.io/otel/baggage	40.839s

```

- `go test -bench=BenchmarkParse -count 20 > new.txt`
```
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/baggage
BenchmarkParse-10    	 1355893	       886.6 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1349192	       883.1 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1363053	       880.4 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1372404	       875.7 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1359979	       880.7 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1360497	       874.7 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1375520	       870.2 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1375268	       882.8 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1361998	       964.8 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1373461	       961.5 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1378065	       872.6 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1377290	       879.0 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1362094	       885.6 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1352175	       915.9 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1364914	       887.9 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1355782	       890.5 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1361848	      1245 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1163396	       878.8 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1370886	       916.6 ns/op	     888 B/op	       9 allocs/op
BenchmarkParse-10    	 1340149	      1175 ns/op	     888 B/op	       9 allocs/op
PASS
ok  	go.opentelemetry.io/otel/baggage	44.347s
```

- `benchstat old.txt new.txt`
```
goos: darwin
goarch: arm64
pkg: go.opentelemetry.io/otel/baggage
         │   old.txt   │               new.txt               │
         │   sec/op    │   sec/op     vs base                │
Parse-10   777.3n ± 3%   884.4n ± 4%  +13.77% (p=0.000 n=20)

         │  old.txt   │              new.txt              │
         │    B/op    │    B/op     vs base               │
Parse-10   864.0 ± 0%   888.0 ± 0%  +2.78% (p=0.000 n=20)

         │  old.txt   │              new.txt               │
         │ allocs/op  │ allocs/op   vs base                │
Parse-10   8.000 ± 0%   9.000 ± 0%  +12.50% (p=0.000 n=20)
```

---------

Co-authored-by: Damien Mathieu <42@dmathieu.com>
Co-authored-by: Robert Pająk <pellared@hotmail.com>
  • Loading branch information
3 people authored Jul 8, 2024
1 parent 61ff66c commit 7e0af51
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Fix stale timestamps reported by the last-value aggregation. (#5517)
- Indicate the `Exporter` in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` must be created by the `New` method. (#5521)
- Improved performance in all `{Bool,Int64,Float64,String}SliceValue` functions of `go.opentelemetry.io/attributes` by reducing the number of allocations. (#5549)
- Replace invalid percent-encoded octet sequences with replacement char in `go.opentelemetry.io/otel/baggage`. (#5528)

## [1.27.0/0.49.0/0.3.0] 2024-05-21

Expand Down
36 changes: 32 additions & 4 deletions baggage/baggage.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,19 +294,45 @@ func parseMember(member string) (Member, error) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key)
}

val := strings.TrimSpace(v)
if !validateValue(val) {
rawVal := strings.TrimSpace(v)
if !validateValue(rawVal) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, v)
}

// Decode a percent-encoded value.
value, err := url.PathUnescape(val)
unescapeVal, err := url.PathUnescape(rawVal)
if err != nil {
return newInvalidMember(), fmt.Errorf("%w: %w", errInvalidValue, err)
}

value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)
return Member{key: key, value: value, properties: props, hasData: true}, nil
}

// replaceInvalidUTF8Sequences replaces invalid UTF-8 sequences with '�'.
func replaceInvalidUTF8Sequences(cap int, unescapeVal string) string {
if utf8.ValidString(unescapeVal) {
return unescapeVal
}
// W3C baggage spec:
// https://github.com/w3c/baggage/blob/8c215efbeebd3fa4b1aceb937a747e56444f22f3/baggage/HTTP_HEADER_FORMAT.md?plain=1#L69

var b strings.Builder
b.Grow(cap)
for i := 0; i < len(unescapeVal); {
r, size := utf8.DecodeRuneInString(unescapeVal[i:])
if r == utf8.RuneError && size == 1 {
// Invalid UTF-8 sequence found, replace it with '�'
_, _ = b.WriteString("�")
} else {
_, _ = b.WriteRune(r)
}
i += size
}

return b.String()
}

// validate ensures m conforms to the W3C Baggage specification.
// A key must be an ASCII string, returning an error otherwise.
func (m Member) validate() error {
Expand Down Expand Up @@ -607,10 +633,12 @@ func parsePropertyInternal(s string) (p Property, ok bool) {
}

// Decode a percent-encoded value.
value, err := url.PathUnescape(s[valueStart:valueEnd])
rawVal := s[valueStart:valueEnd]
unescapeVal, err := url.PathUnescape(rawVal)
if err != nil {
return
}
value := replaceInvalidUTF8Sequences(len(rawVal), unescapeVal)

ok = true
p.key = s[keyStart:keyEnd]
Expand Down
62 changes: 61 additions & 1 deletion baggage/baggage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"slices"
"strings"
"testing"
"unicode/utf8"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -469,6 +470,18 @@ func TestBaggageParse(t *testing.T) {
in: tooManyMembers,
err: errMemberNumber,
},
{
name: "percent-encoded octet sequences do not match the UTF-8 encoding scheme",
in: "k=aa%ffcc;p=d%fff",
want: baggage.List{
"k": {
Value: "aa�cc",
Properties: []baggage.Property{
{Key: "p", Value: "d�f", HasValue: true},
},
},
},
},
}

for _, tc := range testcases {
Expand All @@ -480,6 +493,53 @@ func TestBaggageParse(t *testing.T) {
}
}

func TestBaggageParseValue(t *testing.T) {
testcases := []struct {
name string
in string
valueWant string
valueWantSize int
}{
{
name: "percent encoded octet sequence matches UTF-8 encoding scheme",
in: "k=aa%26cc",
valueWant: "aa&cc",
valueWantSize: 5,
},
{
name: "percent encoded octet sequence doesn't match UTF-8 encoding scheme",
in: "k=aa%ffcc",
valueWant: "aa�cc",
valueWantSize: 7,
},
{
name: "multiple percent encoded octet sequences don't match UTF-8 encoding scheme",
in: "k=aa%ffcc%fedd%fa",
valueWant: "aa�cc�dd�",
valueWantSize: 15,
},
{
name: "raw value",
in: "k=aacc",
valueWant: "aacc",
valueWantSize: 4,
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
b, err := Parse(tc.in)
assert.Empty(t, err)

val := b.Members()[0].Value()

assert.EqualValues(t, val, tc.valueWant)
assert.Equal(t, len(val), tc.valueWantSize)
assert.True(t, utf8.ValidString(val))
})
}
}

func TestBaggageString(t *testing.T) {
testcases := []struct {
name string
Expand Down Expand Up @@ -979,7 +1039,7 @@ func BenchmarkParse(b *testing.B) {
b.ReportAllocs()

for i := 0; i < b.N; i++ {
benchBaggage, _ = Parse(`userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value`)
benchBaggage, _ = Parse("userId=alice,serverNode = DF28 , isProduction = false,hasProp=stuff;propKey;propWValue=value, invalidUtf8=pr%ffo%ffp%fcValue")
}
}

Expand Down

0 comments on commit 7e0af51

Please sign in to comment.