Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

proto: reject invalid UTF-8 in strings #499

Merged
merged 1 commit into from
Jan 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions proto/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2184,6 +2184,21 @@ func TestConcurrentMarshal(t *testing.T) {
}
}

func TestInvalidUTF8(t *testing.T) {
const wire = "\x12\x04\xde\xea\xca\xfe"

var m GoTest
if err := Unmarshal([]byte(wire), &m); err == nil {
t.Errorf("Unmarshal error: got nil, want non-nil")
}

m.Reset()
m.Table = String(wire[2:])
if _, err := Marshal(&m); err == nil {
t.Errorf("Marshal error: got nil, want non-nil")
}
}

// Benchmarks

func testMsg() *GoTest {
Expand Down
3 changes: 3 additions & 0 deletions proto/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ package proto

import (
"encoding/json"
"errors"
"fmt"
"log"
"reflect"
Expand All @@ -279,6 +280,8 @@ import (
// This variable is temporary and will go away soon. Do not rely on it.
var Proto3UnknownFields = false

var errInvalidUTF8 = errors.New("proto: invalid UTF-8 string")

// Message is implemented by generated protocol buffer messages.
type Message interface {
Reset()
Expand Down
13 changes: 13 additions & 0 deletions proto/table_marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"strings"
"sync"
"sync/atomic"
"unicode/utf8"
)

// a sizer takes a pointer to a field and the size of its tag, computes the size of
Expand Down Expand Up @@ -1983,6 +1984,9 @@ func appendBoolPackedSlice(b []byte, ptr pointer, wiretag uint64, _ bool) ([]byt
}
func appendStringValue(b []byte, ptr pointer, wiretag uint64, _ bool) ([]byte, error) {
v := *ptr.toString()
if !utf8.ValidString(v) {
return nil, errInvalidUTF8
}
b = appendVarint(b, wiretag)
b = appendVarint(b, uint64(len(v)))
b = append(b, v...)
Expand All @@ -1993,6 +1997,9 @@ func appendStringValueNoZero(b []byte, ptr pointer, wiretag uint64, _ bool) ([]b
if v == "" {
return b, nil
}
if !utf8.ValidString(v) {
return nil, errInvalidUTF8
}
b = appendVarint(b, wiretag)
b = appendVarint(b, uint64(len(v)))
b = append(b, v...)
Expand All @@ -2004,6 +2011,9 @@ func appendStringPtr(b []byte, ptr pointer, wiretag uint64, _ bool) ([]byte, err
return b, nil
}
v := *p
if !utf8.ValidString(v) {
return nil, errInvalidUTF8
}
b = appendVarint(b, wiretag)
b = appendVarint(b, uint64(len(v)))
b = append(b, v...)
Expand All @@ -2012,6 +2022,9 @@ func appendStringPtr(b []byte, ptr pointer, wiretag uint64, _ bool) ([]byte, err
func appendStringSlice(b []byte, ptr pointer, wiretag uint64, _ bool) ([]byte, error) {
s := *ptr.toStringSlice()
for _, v := range s {
if !utf8.ValidString(v) {
return nil, errInvalidUTF8
}
b = appendVarint(b, wiretag)
b = appendVarint(b, uint64(len(v)))
b = append(b, v...)
Expand Down
10 changes: 10 additions & 0 deletions proto/table_unmarshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"strings"
"sync"
"sync/atomic"
"unicode/utf8"
)

// Unmarshal is the entry point from the generated .pb.go files.
Expand Down Expand Up @@ -1514,6 +1515,9 @@ func unmarshalStringValue(b []byte, f pointer, w int) ([]byte, error) {
return nil, io.ErrUnexpectedEOF
}
v := string(b[:x])
if !utf8.ValidString(v) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a thought, but we could test the values with utf8.Valid before converting them to a string.

I mean, the code is essentially the same, but if we can validate it sooner, why not?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no performance penalty for utf8.ValidString since it doesn't convert the string to a []byte and call utf8.Valid under the hood.

I'm going to keep it as is since utf8.ValidString(v) is more readable than utf8.Valid(b[:x]), where you don't have to consciously think about slicing b.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, good call. 👍

return nil, errInvalidUTF8
}
*f.toString() = v
return b[x:], nil
}
Expand All @@ -1531,6 +1535,9 @@ func unmarshalStringPtr(b []byte, f pointer, w int) ([]byte, error) {
return nil, io.ErrUnexpectedEOF
}
v := string(b[:x])
if !utf8.ValidString(v) {
return nil, errInvalidUTF8
}
*f.toStringPtr() = &v
return b[x:], nil
}
Expand All @@ -1548,6 +1555,9 @@ func unmarshalStringSlice(b []byte, f pointer, w int) ([]byte, error) {
return nil, io.ErrUnexpectedEOF
}
v := string(b[:x])
if !utf8.ValidString(v) {
return nil, errInvalidUTF8
}
s := f.toStringSlice()
*s = append(*s, v)
return b[x:], nil
Expand Down