Skip to content

Commit

Permalink
Merge pull request #7698 from vmg/vmg/normalize
Browse files Browse the repository at this point in the history
perf: cache reserved bind variables in queries
  • Loading branch information
systay authored Mar 18, 2021
2 parents 0958ad5 + 00a6ebd commit 2a1c7b4
Show file tree
Hide file tree
Showing 28 changed files with 1,132 additions and 1,076 deletions.
4 changes: 2 additions & 2 deletions go/test/fuzzing/parser_fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ func FuzzIsDML(data []byte) int {
}

func FuzzNormalizer(data []byte) int {
stmt, err := sqlparser.Parse(string(data))
stmt, reservedVars, err := sqlparser.Parse2(string(data))
if err != nil {
return -1
}
prefix := "bv"
bv := make(map[string]*querypb.BindVariable)
sqlparser.Normalize(stmt, bv, prefix)
sqlparser.Normalize(stmt, reservedVars, bv, prefix)
return 1
}

Expand Down
4 changes: 2 additions & 2 deletions go/vt/sqlparser/ast_rewriting.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ type RewriteASTResult struct {
}

// PrepareAST will normalize the query
func PrepareAST(in Statement, bindVars map[string]*querypb.BindVariable, prefix string, parameterize bool, keyspace string) (*RewriteASTResult, error) {
func PrepareAST(in Statement, reservedVars BindVars, bindVars map[string]*querypb.BindVariable, prefix string, parameterize bool, keyspace string) (*RewriteASTResult, error) {
if parameterize {
err := Normalize(in, bindVars, prefix)
err := Normalize(in, reservedVars, bindVars, prefix)
if err != nil {
return nil, err
}
Expand Down
13 changes: 7 additions & 6 deletions go/vt/sqlparser/normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@ import (
querypb "vitess.io/vitess/go/vt/proto/query"
)

// BindVars is a set of reserved bind variables from a SQL statement
type BindVars map[string]struct{}

// Normalize changes the statement to use bind values, and
// updates the bind vars to those values. The supplied prefix
// is used to generate the bind var names. The function ensures
// that there are no collisions with existing bind vars.
// Within Select constructs, bind vars are deduped. This allows
// us to identify vindex equality. Otherwise, every value is
// treated as distinct.
func Normalize(stmt Statement, bindVars map[string]*querypb.BindVariable, prefix string) error {
nz := newNormalizer(stmt, bindVars, prefix)
func Normalize(stmt Statement, known BindVars, bindVars map[string]*querypb.BindVariable, prefix string) error {
nz := newNormalizer(known, bindVars, prefix)
_, err := Rewrite(stmt, nz.WalkStatement, nil)
if err != nil {
return err
Expand All @@ -49,11 +52,11 @@ type normalizer struct {
err error
}

func newNormalizer(stmt Statement, bindVars map[string]*querypb.BindVariable, prefix string) *normalizer {
func newNormalizer(reserved map[string]struct{}, bindVars map[string]*querypb.BindVariable, prefix string) *normalizer {
return &normalizer{
bindVars: bindVars,
prefix: prefix,
reserved: GetBindvars(stmt),
reserved: reserved,
counter: 1,
vals: make(map[string]string),
}
Expand Down Expand Up @@ -226,8 +229,6 @@ func (nz *normalizer) newName() string {
}

// GetBindvars returns a map of the bind vars referenced in the statement.
// TODO(sougou); This function gets called again from vtgate/planbuilder.
// Ideally, this should be done only once.
func GetBindvars(stmt Statement) map[string]struct{} {
bindvars := make(map[string]struct{})
_ = Walk(func(node SQLNode) (kontinue bool, err error) {
Expand Down
38 changes: 35 additions & 3 deletions go/vt/sqlparser/normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,10 @@ func TestNormalize(t *testing.T) {
t.Error(err)
continue
}
known := GetBindvars(stmt)
bv := make(map[string]*querypb.BindVariable)
require.NoError(t,
Normalize(stmt, bv, prefix))
Normalize(stmt, known, bv, prefix))
outstmt := String(stmt)
if outstmt != tc.outstmt {
t.Errorf("Query:\n%s:\n%s, want\n%s", tc.in, outstmt, tc.outstmt)
Expand Down Expand Up @@ -269,12 +270,43 @@ BenchmarkNormalize-8 500000 3620 ns/op 1461 B/op
*/
func BenchmarkNormalize(b *testing.B) {
sql := "select 'abcd', 20, 30.0, eid from a where 1=eid and name='3'"
ast, err := Parse(sql)
ast, reservedVars, err := Parse2(sql)
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
require.NoError(b,
Normalize(ast, map[string]*querypb.BindVariable{}, ""))
Normalize(ast, reservedVars, map[string]*querypb.BindVariable{}, ""))
}
}

func BenchmarkNormalizeTraces(b *testing.B) {
for _, trace := range []string{"django_queries.txt", "lobsters.sql.gz"} {
b.Run(trace, func(b *testing.B) {
queries := loadQueries(b, trace)
if len(queries) > 10000 {
queries = queries[:10000]
}

parsed := make([]Statement, 0, len(queries))
reservedVars := make([]BindVars, 0, len(queries))
for _, q := range queries {
pp, kb, err := Parse2(q)
if err != nil {
b.Fatal(err)
}
parsed = append(parsed, pp)
reservedVars = append(reservedVars, kb)
}

b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
for i, query := range parsed {
_ = Normalize(query, reservedVars[i], map[string]*querypb.BindVariable{}, "")
}
}
})
}
}
23 changes: 15 additions & 8 deletions go/vt/sqlparser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,27 +68,34 @@ func yyParsePooled(yylex yyLexer) int {
// a set of types, define the function as iTypeName.
// This will help avoid name collisions.

// Parse parses the SQL in full and returns a Statement, which
// is the AST representation of the query. If a DDL statement
// Parse2 parses the SQL in full and returns a Statement, which
// is the AST representation of the query, and a set of BindVars, which are all the
// bind variables that were found in the original SQL query. If a DDL statement
// is partially parsed but still contains a syntax error, the
// error is ignored and the DDL is returned anyway.
func Parse(sql string) (Statement, error) {
func Parse2(sql string) (Statement, BindVars, error) {
tokenizer := NewStringTokenizer(sql)
if yyParsePooled(tokenizer) != 0 {
if tokenizer.partialDDL != nil {
if typ, val := tokenizer.Scan(); typ != 0 {
return nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val))
return nil, nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val))
}
log.Warningf("ignoring error parsing DDL '%s': %v", sql, tokenizer.LastError)
tokenizer.ParseTree = tokenizer.partialDDL
return tokenizer.ParseTree, nil
return tokenizer.ParseTree, tokenizer.BindVars, nil
}
return nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error())
return nil, nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error())
}
if tokenizer.ParseTree == nil {
return nil, ErrEmpty
return nil, nil, ErrEmpty
}
return tokenizer.ParseTree, nil
return tokenizer.ParseTree, tokenizer.BindVars, nil
}

// Parse behaves like Parse2 but does not return a set of bind variables
func Parse(sql string) (Statement, error) {
stmt, _, err := Parse2(sql)
return stmt, err
}

// ParseStrictDDL is the same as Parse except it errors on
Expand Down
4 changes: 2 additions & 2 deletions go/vt/sqlparser/redact_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ func RedactSQLQuery(sql string) (string, error) {
bv := map[string]*querypb.BindVariable{}
sqlStripped, comments := SplitMarginComments(sql)

stmt, err := Parse(sqlStripped)
stmt, reservedVars, err := Parse2(sqlStripped)
if err != nil {
return "", err
}

prefix := "redacted"
err = Normalize(stmt, bv, prefix)
err = Normalize(stmt, reservedVars, bv, prefix)
if err != nil {
return "", err
}
Expand Down
Loading

0 comments on commit 2a1c7b4

Please sign in to comment.