Skip to content

Commit

Permalink
fields: add Match optimizations
Browse files Browse the repository at this point in the history
Do some precomputation to speed up Match: convert the field name to
bytes, and pick an efficient case-folding comparison function.

These are taken from the encoding/json code. The files fold.go and fold_test.go
are exact copies of the 1.7.3 versions in encoding/json.

Change-Id: I69e748b2e6a65f0c83441a1791cb3d7f71c86094
Reviewed-on: https://code-review.googlesource.com/9551
Reviewed-by: Michael Darakananda <pongad@google.com>
Reviewed-by: Jun Mukai <mukai@google.com>
  • Loading branch information
jba committed Nov 28, 2016
1 parent 73d4824 commit 8ee8376
Show file tree
Hide file tree
Showing 4 changed files with 326 additions and 21 deletions.
40 changes: 24 additions & 16 deletions internal/fields/fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ type Field struct {
Type reflect.Type // field type
Index []int // index sequence, for reflect.Value.FieldByIndex
ParsedTag interface{} // third return value of the parseTag function

nameBytes []byte
equalFold func(s, t []byte) bool
}

// A Cache records information about the fields of struct types.
Expand Down Expand Up @@ -140,11 +143,10 @@ func (l List) MatchBytes(name []byte) *Field {
var f *Field
for i := range l {
ff := &l[i]
nameBytes := []byte(ff.Name)
if bytes.Equal(nameBytes, name) {
if bytes.Equal(ff.nameBytes, name) {
return ff
}
if f == nil && bytes.EqualFold(nameBytes, name) {
if f == nil && ff.equalFold(ff.nameBytes, name) {
f = ff
}
}
Expand Down Expand Up @@ -288,19 +290,7 @@ func (c *Cache) listFields(t reflect.Type) []Field {
if !exported {
continue
}
name := tagName
if name == "" {
name = f.Name
}
sf := Field{
Name: name,
NameFromTag: tagName != "",
Type: f.Type,
ParsedTag: other,
}
sf.Index = append(sf.Index, scan.index...)
sf.Index = append(sf.Index, i)
fields = append(fields, sf)
fields = append(fields, newField(f, tagName, other, scan.index, i))
if count[t] > 1 {
// If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate.
Expand Down Expand Up @@ -332,6 +322,24 @@ func (c *Cache) listFields(t reflect.Type) []Field {
return fields
}

func newField(f reflect.StructField, tagName string, other interface{}, index []int, i int) Field {
name := tagName
if name == "" {
name = f.Name
}
sf := Field{
Name: name,
NameFromTag: tagName != "",
Type: f.Type,
ParsedTag: other,
nameBytes: []byte(name),
}
sf.equalFold = foldFunc(sf.nameBytes)
sf.Index = append(sf.Index, index...)
sf.Index = append(sf.Index, i)
return sf
}

// byName sorts field by name, breaking ties with depth, then breaking ties
// with index sequence.
type byName []Field
Expand Down
22 changes: 17 additions & 5 deletions internal/fields/fields_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,12 @@ func TestParsedTag(t *testing.T) {
X int `json:"name,omitempty"`
}
got := NewCache(jsonTagParser).Fields(reflect.TypeOf(S{}))
want := List{
want := []*Field{
{Name: "name", NameFromTag: true, Type: intType,
Index: []int{0}, ParsedTag: []string{"omitempty"}},
}
if !reflect.DeepEqual(got, want) {
t.Errorf("got\n%+v\nwant\n%+v", got, want)
if msg, ok := compareFields(got, want); !ok {
t.Error(msg)
}
}

Expand All @@ -279,13 +279,25 @@ func compareFields(got []Field, want []*Field) (msg string, ok bool) {
}
for i, g := range got {
w := *want[i]
if !reflect.DeepEqual(g, w) {
if !fieldsEqual(&g, &w) {
return fmt.Sprintf("got %+v, want %+v", g, w), false
}
}
return "", true
}

// Need this because Field contains a function, which cannot be compared even
// by reflect.DeepEqual.
func fieldsEqual(f1, f2 *Field) bool {
if f1 == nil || f2 == nil {
return f1 == f2
}
return f1.Name == f2.Name &&
f1.NameFromTag == f2.NameFromTag &&
f1.Type == f2.Type &&
reflect.DeepEqual(f1.ParsedTag, f2.ParsedTag)
}

// Set the fields of dst from those of src.
// dst must be a pointer to a struct value.
// src must be a struct value.
Expand Down Expand Up @@ -346,7 +358,7 @@ func TestMatchingField(t *testing.T) {
// Untagged embedded structs disappear.
{"S4", nil},
} {
if got := fields.Match(test.name); !reflect.DeepEqual(got, test.want) {
if got := fields.Match(test.name); !fieldsEqual(got, test.want) {
t.Errorf("match %q:\ngot %+v\nwant %+v", test.name, got, test.want)
}
}
Expand Down
156 changes: 156 additions & 0 deletions internal/fields/fold.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package fields

// This file was copied from https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/fold.go.
// Only the license and package were changed.

import (
"bytes"
"unicode/utf8"
)

const (
caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
kelvin = '\u212a'
smallLongEss = '\u017f'
)

// foldFunc returns one of four different case folding equivalence
// functions, from most general (and slow) to fastest:
//
// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
// 3) asciiEqualFold, no special, but includes non-letters (including _)
// 4) simpleLetterEqualFold, no specials, no non-letters.
//
// The letters S and K are special because they map to 3 runes, not just 2:
// * S maps to s and to U+017F 'ſ' Latin small letter long s
// * k maps to K and to U+212A 'K' Kelvin sign
// See https://play.golang.org/p/tTxjOc0OGo
//
// The returned function is specialized for matching against s and
// should only be given s. It's not curried for performance reasons.
func foldFunc(s []byte) func(s, t []byte) bool {
nonLetter := false
special := false // special letter
for _, b := range s {
if b >= utf8.RuneSelf {
return bytes.EqualFold
}
upper := b & caseMask
if upper < 'A' || upper > 'Z' {
nonLetter = true
} else if upper == 'K' || upper == 'S' {
// See above for why these letters are special.
special = true
}
}
if special {
return equalFoldRight
}
if nonLetter {
return asciiEqualFold
}
return simpleLetterEqualFold
}

// equalFoldRight is a specialization of bytes.EqualFold when s is
// known to be all ASCII (including punctuation), but contains an 's',
// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
// See comments on foldFunc.
func equalFoldRight(s, t []byte) bool {
for _, sb := range s {
if len(t) == 0 {
return false
}
tb := t[0]
if tb < utf8.RuneSelf {
if sb != tb {
sbUpper := sb & caseMask
if 'A' <= sbUpper && sbUpper <= 'Z' {
if sbUpper != tb&caseMask {
return false
}
} else {
return false
}
}
t = t[1:]
continue
}
// sb is ASCII and t is not. t must be either kelvin
// sign or long s; sb must be s, S, k, or K.
tr, size := utf8.DecodeRune(t)
switch sb {
case 's', 'S':
if tr != smallLongEss {
return false
}
case 'k', 'K':
if tr != kelvin {
return false
}
default:
return false
}
t = t[size:]

}
if len(t) > 0 {
return false
}
return true
}

// asciiEqualFold is a specialization of bytes.EqualFold for use when
// s is all ASCII (but may contain non-letters) and contains no
// special-folding letters.
// See comments on foldFunc.
func asciiEqualFold(s, t []byte) bool {
if len(s) != len(t) {
return false
}
for i, sb := range s {
tb := t[i]
if sb == tb {
continue
}
if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
if sb&caseMask != tb&caseMask {
return false
}
} else {
return false
}
}
return true
}

// simpleLetterEqualFold is a specialization of bytes.EqualFold for
// use when s is all ASCII letters (no underscores, etc) and also
// doesn't contain 'k', 'K', 's', or 'S'.
// See comments on foldFunc.
func simpleLetterEqualFold(s, t []byte) bool {
if len(s) != len(t) {
return false
}
for i, b := range s {
if b&caseMask != t[i]&caseMask {
return false
}
}
return true
}
Loading

0 comments on commit 8ee8376

Please sign in to comment.