diff --git a/codec.go b/codec.go index e3014c6..135438a 100644 --- a/codec.go +++ b/codec.go @@ -22,8 +22,7 @@ package uuid import ( - "bytes" - "encoding/hex" + "errors" "fmt" ) @@ -45,11 +44,77 @@ func FromBytesOrNil(input []byte) UUID { return uuid } +var errInvalidFormat = errors.New("uuid: invalid UUID format") + +func fromHexChar(c byte) byte { + switch { + case '0' <= c && c <= '9': + return c - '0' + case 'a' <= c && c <= 'f': + return c - 'a' + 10 + case 'A' <= c && c <= 'F': + return c - 'A' + 10 + } + return 255 +} + +// Parse parses the UUID stored in the string text. Parsing and supported +// formats are the same as UnmarshalText. +func (u *UUID) Parse(s string) error { + switch len(s) { + case 32: // hash + case 36: // canonical + case 34, 38: + if s[0] != '{' || s[len(s)-1] != '}' { + return fmt.Errorf("uuid: incorrect UUID format in string %q", s) + } + s = s[1 : len(s)-1] + case 41, 45: + if s[:9] != "urn:uuid:" { + return fmt.Errorf("uuid: incorrect UUID format in string %q", s[:9]) + } + s = s[9:] + default: + return fmt.Errorf("uuid: incorrect UUID length %d in string %q", len(s), s) + } + // canonical + if len(s) == 36 { + if s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-' { + return fmt.Errorf("uuid: incorrect UUID format in string %q", s) + } + for i, x := range [16]byte{ + 0, 2, 4, 6, + 9, 11, + 14, 16, + 19, 21, + 24, 26, 28, 30, 32, 34, + } { + v1 := fromHexChar(s[x]) + v2 := fromHexChar(s[x+1]) + if v1|v2 == 255 { + return errInvalidFormat + } + u[i] = (v1 << 4) | v2 + } + return nil + } + // hash like + for i := 0; i < 32; i += 2 { + v1 := fromHexChar(s[i]) + v2 := fromHexChar(s[i+1]) + if v1|v2 == 255 { + return errInvalidFormat + } + u[i/2] = (v1 << 4) | v2 + } + return nil +} + // FromString returns a UUID parsed from the input string. // Input is expected in a form accepted by UnmarshalText. -func FromString(input string) (UUID, error) { - u := UUID{} - err := u.UnmarshalText([]byte(input)) +func FromString(text string) (UUID, error) { + var u UUID + err := u.Parse(text) return u, err } @@ -66,7 +131,9 @@ func FromStringOrNil(input string) UUID { // MarshalText implements the encoding.TextMarshaler interface. // The encoding is the same as returned by the String() method. func (u UUID) MarshalText() ([]byte, error) { - return []byte(u.String()), nil + var buf [36]byte + encodeHex(buf[:], u) + return buf[:], nil } // UnmarshalText implements the encoding.TextUnmarshaler interface. @@ -103,96 +170,52 @@ func (u UUID) MarshalText() ([]byte, error) { // braced := '{' plain '}' | '{' hashlike '}' // urn := URN ':' UUID-NID ':' plain // -func (u *UUID) UnmarshalText(text []byte) error { - switch len(text) { - case 32: - return u.decodeHashLike(text) +func (u *UUID) UnmarshalText(b []byte) error { + switch len(b) { + case 32: // hash + case 36: // canonical case 34, 38: - return u.decodeBraced(text) - case 36: - return u.decodeCanonical(text) + if b[0] != '{' || b[len(b)-1] != '}' { + return fmt.Errorf("uuid: incorrect UUID format in string %q", b) + } + b = b[1 : len(b)-1] case 41, 45: - return u.decodeURN(text) + if string(b[:9]) != "urn:uuid:" { + return fmt.Errorf("uuid: incorrect UUID format in string %q", b[:9]) + } + b = b[9:] default: - return fmt.Errorf("uuid: incorrect UUID length %d in string %q", len(text), text) - } -} - -// decodeCanonical decodes UUID strings that are formatted as defined in RFC-4122 (section 3): -// "6ba7b810-9dad-11d1-80b4-00c04fd430c8". -func (u *UUID) decodeCanonical(t []byte) error { - if t[8] != '-' || t[13] != '-' || t[18] != '-' || t[23] != '-' { - return fmt.Errorf("uuid: incorrect UUID format in string %q", t) + return fmt.Errorf("uuid: incorrect UUID length %d in string %q", len(b), b) } - - src := t - dst := u[:] - - for i, byteGroup := range byteGroups { - if i > 0 { - src = src[1:] // skip dash + if len(b) == 36 { + if b[8] != '-' || b[13] != '-' || b[18] != '-' || b[23] != '-' { + return fmt.Errorf("uuid: incorrect UUID format in string %q", b) } - _, err := hex.Decode(dst[:byteGroup/2], src[:byteGroup]) - if err != nil { - return err + for i, x := range [16]byte{ + 0, 2, 4, 6, + 9, 11, + 14, 16, + 19, 21, + 24, 26, 28, 30, 32, 34, + } { + v1 := fromHexChar(b[x]) + v2 := fromHexChar(b[x+1]) + if v1|v2 == 255 { + return errInvalidFormat + } + u[i] = (v1 << 4) | v2 } - src = src[byteGroup:] - dst = dst[byteGroup/2:] - } - - return nil -} - -// decodeHashLike decodes UUID strings that are using the following format: -// "6ba7b8109dad11d180b400c04fd430c8". -func (u *UUID) decodeHashLike(t []byte) error { - src := t[:] - dst := u[:] - - _, err := hex.Decode(dst, src) - return err -} - -// decodeBraced decodes UUID strings that are using the following formats: -// "{6ba7b810-9dad-11d1-80b4-00c04fd430c8}" -// "{6ba7b8109dad11d180b400c04fd430c8}". -func (u *UUID) decodeBraced(t []byte) error { - l := len(t) - - if t[0] != '{' || t[l-1] != '}' { - return fmt.Errorf("uuid: incorrect UUID format in string %q", t) + return nil } - - return u.decodePlain(t[1 : l-1]) -} - -// decodeURN decodes UUID strings that are using the following formats: -// "urn:uuid:6ba7b810-9dad-11d1-80b4-00c04fd430c8" -// "urn:uuid:6ba7b8109dad11d180b400c04fd430c8". -func (u *UUID) decodeURN(t []byte) error { - total := len(t) - - urnUUIDPrefix := t[:9] - - if !bytes.Equal(urnUUIDPrefix, urnPrefix) { - return fmt.Errorf("uuid: incorrect UUID format in string %q", t) - } - - return u.decodePlain(t[9:total]) -} - -// decodePlain decodes UUID strings that are using the following formats: -// "6ba7b810-9dad-11d1-80b4-00c04fd430c8" or in hash-like format -// "6ba7b8109dad11d180b400c04fd430c8". -func (u *UUID) decodePlain(t []byte) error { - switch len(t) { - case 32: - return u.decodeHashLike(t) - case 36: - return u.decodeCanonical(t) - default: - return fmt.Errorf("uuid: incorrect UUID length %d in string %q", len(t), t) + for i := 0; i < 32; i += 2 { + v1 := fromHexChar(b[i]) + v2 := fromHexChar(b[i+1]) + if v1|v2 == 255 { + return errInvalidFormat + } + u[i/2] = (v1 << 4) | v2 } + return nil } // MarshalBinary implements the encoding.BinaryMarshaler interface. diff --git a/codec_test.go b/codec_test.go index f156d1c..d2fcf6e 100644 --- a/codec_test.go +++ b/codec_test.go @@ -28,6 +28,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strings" "testing" ) @@ -90,7 +91,7 @@ type fromStringTest struct { } // Run runs the FromString test in a subtest of t, named by fst.variant. -func (fst fromStringTest) Run(t *testing.T) { +func (fst fromStringTest) TestFromString(t *testing.T) { t.Run(fst.variant, func(t *testing.T) { got, err := FromString(fst.input) if err != nil { @@ -102,6 +103,19 @@ func (fst fromStringTest) Run(t *testing.T) { }) } +func (fst fromStringTest) TestUnmarshalText(t *testing.T) { + t.Run(fst.variant, func(t *testing.T) { + var u UUID + err := u.UnmarshalText([]byte(fst.input)) + if err != nil { + t.Fatalf("FromBytes(%q) (%s): %v", fst.input, fst.variant, err) + } + if want := codecTestData; !bytes.Equal(u[:], want[:]) { + t.Fatalf("FromBytes(%q) (%s) = %v, want %v", fst.input, fst.variant, u, want) + } + }) +} + // fromStringTests contains UUID variants that are expected to be parsed // successfully by UnmarshalText / FromString. // @@ -171,7 +185,7 @@ var invalidFromStringInputs = []string{ func TestFromString(t *testing.T) { t.Run("Valid", func(t *testing.T) { for _, fst := range fromStringTests { - fst.Run(t) + fst.TestFromString(t) } }) t.Run("Invalid", func(t *testing.T) { @@ -201,6 +215,35 @@ func TestFromStringOrNil(t *testing.T) { }) } +func TestUnmarshalText(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + for _, fst := range fromStringTests { + fst.TestUnmarshalText(t) + } + }) + t.Run("Invalid", func(t *testing.T) { + for _, s := range invalidFromStringInputs { + var u UUID + err := u.UnmarshalText([]byte(s)) + if err == nil { + t.Errorf("FromBytes(%q): want err != nil, got %v", s, u) + } + } + }) +} + +// Test that UnmarshalText() and Parse() return identical errors +func TestUnmarshalTextParseErrors(t *testing.T) { + for _, s := range invalidFromStringInputs { + var u UUID + e1 := u.UnmarshalText([]byte(s)) + e2 := u.Parse(s) + if e1 == nil || e1.Error() != e2.Error() { + t.Errorf("%q: errors don't match: UnmarshalText: %v Parse: %v", s, e1, e2) + } + } +} + func TestMarshalBinary(t *testing.T) { got, err := codecTestUUID.MarshalBinary() if err != nil { @@ -227,11 +270,50 @@ func TestDecodePlainWithWrongLength(t *testing.T) { u := UUID{} - if u.decodePlain(arg) == nil { - t.Errorf("%v.decodePlain(%q): should return error, but it did not", u, arg) + if u.UnmarshalText(arg) == nil { + t.Errorf("%v.UnmarshalText(%q): should return error, but it did not", u, arg) } } +func TestFromHexChar(t *testing.T) { + const hextable = "0123456789abcdef" + + t.Run("Valid", func(t *testing.T) { + t.Run("Lower", func(t *testing.T) { + for i, c := range []byte(hextable) { + x := fromHexChar(c) + if int(x) != i { + t.Errorf("fromHexChar(%c): got %d want %d", c, x, i) + } + } + }) + t.Run("Upper", func(t *testing.T) { + for i, c := range []byte(strings.ToUpper(hextable)) { + x := fromHexChar(c) + if int(x) != i { + t.Errorf("fromHexChar(%c): got %d want %d", c, x, i) + } + } + }) + }) + + t.Run("Invalid", func(t *testing.T) { + skip := make(map[byte]bool) + for _, c := range []byte(hextable + strings.ToUpper(hextable)) { + skip[c] = true + } + for i := 0; i < 256; i++ { + c := byte(i) + if !skip[c] { + v := fromHexChar(c) + if v != 255 { + t.Errorf("fromHexChar(%c): got %d want: %d", c, v, 255) + } + } + } + }) +} + var stringBenchmarkSink string func BenchmarkString(b *testing.B) { @@ -264,6 +346,42 @@ func BenchmarkFromString(b *testing.B) { }) } +func BenchmarkUnmarshalText(b *testing.B) { + b.Run("canonical", func(b *testing.B) { + text := []byte(Must(FromString("6ba7b810-9dad-11d1-80b4-00c04fd430c8")).String()) + u := new(UUID) + if err := u.UnmarshalText(text); err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = u.UnmarshalText(text) + } + }) + b.Run("urn", func(b *testing.B) { + text := []byte(Must(FromString("urn:uuid:6ba7b810-9dad-11d1-80b4-00c04fd430c8")).String()) + u := new(UUID) + if err := u.UnmarshalText(text); err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = u.UnmarshalText(text) + } + }) + b.Run("braced", func(b *testing.B) { + text := []byte(Must(FromString("{6ba7b810-9dad-11d1-80b4-00c04fd430c8}")).String()) + u := new(UUID) + if err := u.UnmarshalText(text); err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = u.UnmarshalText(text) + } + }) +} + func BenchmarkMarshalBinary(b *testing.B) { for i := 0; i < b.N; i++ { codecTestUUID.MarshalBinary() @@ -276,6 +394,16 @@ func BenchmarkMarshalText(b *testing.B) { } } +func BenchmarkParseV4(b *testing.B) { + const text = "f52a747a-983f-45f7-90b5-e84d70f470dd" + for i := 0; i < b.N; i++ { + var u UUID + if err := u.Parse(text); err != nil { + b.Fatal(err) + } + } +} + var seedFuzzCorpus = flag.Bool("seed_fuzz_corpus", false, "seed fuzz test corpus") func TestSeedFuzzCorpus(t *testing.T) { diff --git a/sql.go b/sql.go index 6f254a4..cf00871 100644 --- a/sql.go +++ b/sql.go @@ -22,9 +22,7 @@ package uuid import ( - "bytes" "database/sql/driver" - "encoding/json" "fmt" ) @@ -49,7 +47,9 @@ func (u *UUID) Scan(src interface{}) error { return u.UnmarshalText(src) case string: - return u.UnmarshalText([]byte(src)) + uu, err := FromString(src) + *u = uu + return err } return fmt.Errorf("uuid: cannot convert %T to UUID", src) @@ -83,27 +83,30 @@ func (u *NullUUID) Scan(src interface{}) error { return u.UUID.Scan(src) } +var nullJSON = []byte("null") + // MarshalJSON marshals the NullUUID as null or the nested UUID func (u NullUUID) MarshalJSON() ([]byte, error) { if !u.Valid { - return json.Marshal(nil) + return nullJSON, nil } - - return json.Marshal(u.UUID) + var buf [38]byte + buf[0] = '"' + encodeHex(buf[1:37], u.UUID) + buf[37] = '"' + return buf[:], nil } // UnmarshalJSON unmarshals a NullUUID func (u *NullUUID) UnmarshalJSON(b []byte) error { - if bytes.Equal(b, []byte("null")) { + if string(b) == "null" { u.UUID, u.Valid = Nil, false return nil } - - if err := json.Unmarshal(b, &u.UUID); err != nil { - return err + if n := len(b); n >= 2 && b[0] == '"' { + b = b[1 : n-1] } - - u.Valid = true - - return nil + err := u.UUID.UnmarshalText(b) + u.Valid = (err == nil) + return err } diff --git a/sql_test.go b/sql_test.go index 4aa22f7..a2c1b03 100644 --- a/sql_test.go +++ b/sql_test.go @@ -291,6 +291,7 @@ func testNullUUIDUnmarshalJSONNull(t *testing.T) { t.Fatalf("u.UUID = %v, want %v", u.UUID, Nil) } } + func testNullUUIDUnmarshalJSONValid(t *testing.T) { var u NullUUID @@ -318,3 +319,47 @@ func testNullUUIDUnmarshalJSONMalformed(t *testing.T) { t.Fatal("json.Unmarshal err = , want error") } } + +func BenchmarkNullMarshalJSON(b *testing.B) { + b.Run("Valid", func(b *testing.B) { + u, err := FromString("6ba7b810-9dad-11d1-80b4-00c04fd430c8") + if err != nil { + b.Fatal(err) + } + n := NullUUID{UUID: u, Valid: true} + for i := 0; i < b.N; i++ { + n.MarshalJSON() + } + }) + b.Run("Invalid", func(b *testing.B) { + n := NullUUID{Valid: false} + for i := 0; i < b.N; i++ { + n.MarshalJSON() + } + }) +} + +func BenchmarkNullUnmarshalJSON(b *testing.B) { + baseUUID, err := FromString("6ba7b810-9dad-11d1-80b4-00c04fd430c8") + if err != nil { + b.Fatal(err) + } + data, err := json.Marshal(&baseUUID) + if err != nil { + b.Fatal(err) + } + + b.Run("Valid", func(b *testing.B) { + var u NullUUID + for i := 0; i < b.N; i++ { + u.UnmarshalJSON(data) + } + }) + b.Run("Invalid", func(b *testing.B) { + invalid := []byte("null") + var n NullUUID + for i := 0; i < b.N; i++ { + n.UnmarshalJSON(invalid) + } + }) +} diff --git a/uuid.go b/uuid.go index f314b84..b09dcc6 100644 --- a/uuid.go +++ b/uuid.go @@ -44,8 +44,6 @@ import ( "encoding/binary" "encoding/hex" "fmt" - "io" - "strings" "time" ) @@ -133,12 +131,6 @@ func TimestampFromV6(u UUID) (Timestamp, error) { return Timestamp(uint64(low) + (uint64(mid) << 12) + (uint64(hi) << 28)), nil } -// String parse helpers. -var ( - urnPrefix = []byte("urn:uuid:") - byteGroups = []int{8, 4, 4, 4, 12} -) - // Nil is the nil UUID, as specified in RFC-4122, that has all 128 bits set to // zero. var Nil = UUID{} @@ -182,22 +174,26 @@ func (u UUID) Bytes() []byte { return u[:] } +// encodeHex encodes the canonical RFC-4122 form of UUID u into the +// first 36 bytes dst. +func encodeHex(dst []byte, u UUID) { + hex.Encode(dst, u[0:4]) + dst[8] = '-' + hex.Encode(dst[9:13], u[4:6]) + dst[13] = '-' + hex.Encode(dst[14:18], u[6:8]) + dst[18] = '-' + hex.Encode(dst[19:23], u[8:10]) + dst[23] = '-' + hex.Encode(dst[24:], u[10:]) +} + // String returns a canonical RFC-4122 string representation of the UUID: // xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx. func (u UUID) String() string { - buf := make([]byte, 36) - - hex.Encode(buf[0:8], u[0:4]) - buf[8] = '-' - hex.Encode(buf[9:13], u[4:6]) - buf[13] = '-' - hex.Encode(buf[14:18], u[6:8]) - buf[18] = '-' - hex.Encode(buf[19:23], u[8:10]) - buf[23] = '-' - hex.Encode(buf[24:], u[10:]) - - return string(buf) + var buf [36]byte + encodeHex(buf[:], u) + return string(buf[:]) } // Format implements fmt.Formatter for UUID values. @@ -210,52 +206,41 @@ func (u UUID) String() string { // All other verbs not handled directly by the fmt package (like '%p') are unsupported and will return // "%!verb(uuid.UUID=value)" as recommended by the fmt package. func (u UUID) Format(f fmt.State, c rune) { + if c == 'v' && f.Flag('#') { + fmt.Fprintf(f, "%#v", [Size]byte(u)) + return + } switch c { case 'x', 'X': - s := hex.EncodeToString(u.Bytes()) + b := make([]byte, 32) + hex.Encode(b, u[:]) if c == 'X' { - s = strings.Map(toCapitalHexDigits, s) - } - _, _ = io.WriteString(f, s) - case 'v': - var s string - if f.Flag('#') { - s = fmt.Sprintf("%#v", [Size]byte(u)) - } else { - s = u.String() + toUpperHex(b) } - _, _ = io.WriteString(f, s) - case 's', 'S': - s := u.String() + _, _ = f.Write(b) + case 'v', 's', 'S': + b, _ := u.MarshalText() if c == 'S' { - s = strings.Map(toCapitalHexDigits, s) + toUpperHex(b) } - _, _ = io.WriteString(f, s) + _, _ = f.Write(b) case 'q': - _, _ = io.WriteString(f, `"`+u.String()+`"`) + b := make([]byte, 38) + b[0] = '"' + encodeHex(b[1:], u) + b[37] = '"' + _, _ = f.Write(b) default: // invalid/unsupported format verb fmt.Fprintf(f, "%%!%c(uuid.UUID=%s)", c, u.String()) } } -func toCapitalHexDigits(ch rune) rune { - // convert a-f hex digits to A-F - switch ch { - case 'a': - return 'A' - case 'b': - return 'B' - case 'c': - return 'C' - case 'd': - return 'D' - case 'e': - return 'E' - case 'f': - return 'F' - default: - return ch +func toUpperHex(b []byte) { + for i, c := range b { + if 'a' <= c && c <= 'f' { + b[i] = c - ('a' - 'A') + } } } diff --git a/uuid_test.go b/uuid_test.go index 2bbbb9d..3fa5920 100644 --- a/uuid_test.go +++ b/uuid_test.go @@ -24,6 +24,7 @@ package uuid import ( "bytes" "fmt" + "io" "testing" "time" ) @@ -254,3 +255,23 @@ func TestTimestampFromV6(t *testing.T) { } } } + +func BenchmarkFormat(b *testing.B) { + var tests = []string{ + "%s", + "%S", + "%q", + "%x", + "%X", + "%v", + "%+v", + "%#v", + } + for _, x := range tests { + b.Run(x[1:], func(b *testing.B) { + for i := 0; i < b.N; i++ { + fmt.Fprintf(io.Discard, x, &codecTestUUID) + } + }) + } +}