diff --git a/go/vt/vtgate/planbuilder/operators/subquery_builder.go b/go/vt/vtgate/planbuilder/operators/subquery_builder.go index c2256df06f4..cd2abb66a48 100644 --- a/go/vt/vtgate/planbuilder/operators/subquery_builder.go +++ b/go/vt/vtgate/planbuilder/operators/subquery_builder.go @@ -169,7 +169,9 @@ func createSubquery( sqc := &SubQueryBuilder{totalID: totalID, subqID: subqID, outerID: outerID} predicates, joinCols := sqc.inspectStatement(ctx, subq.Select) - correlated := !ctx.SemTable.RecursiveDeps(subq).IsEmpty() + + subqDependencies := ctx.SemTable.RecursiveDeps(subq) + correlated := subqDependencies.KeepOnly(outerID).NotEmpty() opInner := translateQueryToOp(ctx, subq.Select) diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.json b/go/vt/vtgate/planbuilder/testdata/select_cases.json index ef6df93441e..11d15afd8bc 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.json @@ -4321,6 +4321,85 @@ ] } }, + { + "comment": "Subquery with `IN` condition using columns with matching lookup vindexes, impossible conditions and limit clause", + "query": "SELECT music.id FROM music WHERE music.id IN (SELECT * FROM (SELECT music.id FROM music WHERE music.user_id IN (1, 2, 3) AND 1 = 0 AND music.col1 = 'bar' LIMIT 0, 100) _inner)", + "plan": { + "Type": "Complex", + "QueryType": "SELECT", + "Original": "SELECT music.id FROM music WHERE music.id IN (SELECT * FROM (SELECT music.id FROM music WHERE music.user_id IN (1, 2, 3) AND 1 = 0 AND music.col1 = 'bar' LIMIT 0, 100) _inner)", + "Instructions": { + "OperatorType": "UncorrelatedSubquery", + "Variant": "PulloutIn", + "PulloutVars": [ + "__sq_has_values", + "__sq1" + ], + "Inputs": [ + { + "InputName": "SubQuery", + "OperatorType": "Limit", + "Count": "100", + "Offset": "0", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "None", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id from (select music.id from music where 1 != 1) as _inner where 1 != 1", + "Query": "select id from (select music.id from music where 0) as _inner limit 100" + } + ] + }, + { + "InputName": "Outer", + "OperatorType": "VindexLookup", + "Variant": "IN", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "Values": [ + "::__sq1" + ], + "Vindex": "music_user_map", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "IN", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `name`, keyspace_id from name_user_vdx where 1 != 1", + "Query": "select `name`, keyspace_id from name_user_vdx where `name` in ::__vals", + "Values": [ + "::name" + ], + "Vindex": "user_index" + }, + { + "OperatorType": "Route", + "Variant": "ByDestination", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music where 1 != 1", + "Query": "select music.id from music where :__sq_has_values and music.id in ::__vals" + } + ] + } + ] + }, + "TablesUsed": [ + "user.music" + ] + } + }, { "comment": "Subquery with `IN` condition using columns with matching lookup vindexes", "query": "SELECT music.id FROM music WHERE music.id IN (SELECT music.id FROM music WHERE music.user_id IN (1, 2, 3)) and music.user_id = 5", @@ -5879,8 +5958,231 @@ "Sharded": true }, "FieldQuery": "select 1 from (select id as uid from `user` where 1 != 1) as t, `user` where 1 != 1", - "Query": "select 1 from (select id as uid from `user`) as t, `user` where t.uid = `user`.id", - "Table": "`user`" + "Query": "select 1 from (select id as uid from `user`) as t, `user` where t.uid = `user`.id" + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "Window function with IN clause - ROW_NUMBER with PARTITION BY routing column", + "query": "SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn FROM user WHERE id IN (1,2,3,4)", + "plan": { + "Type": "MultiShard", + "QueryType": "SELECT", + "Original": "SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn FROM user WHERE id IN (1,2,3,4)", + "Instructions": { + "OperatorType": "Route", + "Variant": "IN", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn from `user` where 1 != 1", + "Query": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn from `user` where id in ::__vals", + "Values": [ + "(1, 2, 3, 4)" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "UNION ALL with window functions - each branch has window function with different primary vindex values", + "query": "SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn FROM user WHERE id = 1 UNION ALL SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn FROM user WHERE id = 2", + "plan": { + "Type": "Complex", + "QueryType": "SELECT", + "Original": "SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn FROM user WHERE id = 1 UNION ALL SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn FROM user WHERE id = 2", + "Instructions": { + "OperatorType": "Concatenate", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn from `user` where 1 != 1", + "Query": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn from `user` where id = 1", + "Values": [ + "1" + ], + "Vindex": "user_index" + }, + { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn from `user` where 1 != 1", + "Query": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn from `user` where id = 2", + "Values": [ + "2" + ], + "Vindex": "user_index" + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "Window function with RANK on single shard - PARTITION BY includes primary vindex", + "query": "SELECT id, textcol1, RANK() OVER (PARTITION BY id, textcol1 ORDER BY id) as rnk FROM user WHERE id = 5", + "plan": { + "Type": "Passthrough", + "QueryType": "SELECT", + "Original": "SELECT id, textcol1, RANK() OVER (PARTITION BY id, textcol1 ORDER BY id) as rnk FROM user WHERE id = 5", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id, textcol1, rank() over (partition by id, textcol1 order by id asc) as rnk from `user` where 1 != 1", + "Query": "select id, textcol1, rank() over (partition by id, textcol1 order by id asc) as rnk from `user` where id = 5", + "Values": [ + "5" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "Multiple window functions with same partition in single shard query", + "query": "SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn, RANK() OVER (PARTITION BY id ORDER BY intcol) as rnk FROM user WHERE id = 100", + "plan": { + "Type": "Passthrough", + "QueryType": "SELECT", + "Original": "SELECT id, intcol, ROW_NUMBER() OVER (PARTITION BY id ORDER BY intcol) as rn, RANK() OVER (PARTITION BY id ORDER BY intcol) as rnk FROM user WHERE id = 100", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn, rank() over (partition by id order by intcol asc) as rnk from `user` where 1 != 1", + "Query": "select id, intcol, row_number() over (partition by id order by intcol asc) as rn, rank() over (partition by id order by intcol asc) as rnk from `user` where id = 100", + "Values": [ + "100" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "Window function with composite vindex (multi-column) - PARTITION BY includes all primary vindex columns", + "query": "SELECT cola, colb, colc, RANK() OVER (PARTITION BY cola, colb ORDER BY colc) as rnk FROM multicol_tbl WHERE cola = 'A' AND colb = 'B'", + "plan": { + "Type": "Passthrough", + "QueryType": "SELECT", + "Original": "SELECT cola, colb, colc, RANK() OVER (PARTITION BY cola, colb ORDER BY colc) as rnk FROM multicol_tbl WHERE cola = 'A' AND colb = 'B'", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select cola, colb, colc, rank() over (partition by cola, colb order by colc asc) as rnk from multicol_tbl where 1 != 1", + "Query": "select cola, colb, colc, rank() over (partition by cola, colb order by colc asc) as rnk from multicol_tbl where cola = 'A' and colb = 'B'", + "Values": [ + "'A'", + "'B'" + ], + "Vindex": "multicolIdx" + }, + "TablesUsed": [ + "user.multicol_tbl" + ] + } + }, + { + "comment": "window function in subquery on unsharded table", + "query": "select * from (select rank() over (partition by col) as r from unsharded) as t", + "plan": { + "Type": "Passthrough", + "QueryType": "SELECT", + "Original": "select * from (select rank() over (partition by col) as r from unsharded) as t", + "Instructions": { + "OperatorType": "Route", + "Variant": "Unsharded", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select * from (select rank() over (partition by col) as r from unsharded where 1 != 1) as t where 1 != 1", + "Query": "select * from (select rank() over (partition by col) as r from unsharded) as t" + }, + "TablesUsed": [ + "main.unsharded" + ] + } + }, + { + "comment": "window function in subquery on sharded table with single shard predicate", + "query": "select * from (select rank() over (partition by col) as r from user where id = 1) as t", + "plan": { + "Type": "Passthrough", + "QueryType": "SELECT", + "Original": "select * from (select rank() over (partition by col) as r from user where id = 1) as t", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select r from (select rank() over (partition by col) as r from `user` where 1 != 1) as t where 1 != 1", + "Query": "select r from (select rank() over (partition by col) as r from `user` where id = 1) as t", + "Values": [ + "1" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.user" + ] + } + }, + { + "comment": "window function in subquery on sharded table with outer predicate", + "query": "select * from (select rank() over (partition by col) as r, id from user) as t where id = 1", + "plan": { + "Type": "Passthrough", + "QueryType": "SELECT", + "Original": "select * from (select rank() over (partition by col) as r, id from user) as t where id = 1", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select r, id from (select rank() over (partition by col) as r, id from `user` where 1 != 1) as t where 1 != 1", + "Query": "select r, id from (select rank() over (partition by col) as r, id from `user` where id = 1) as t", + "Values": [ + "1" + ], + "Vindex": "user_index" }, "TablesUsed": [ "user.user" diff --git a/go/vt/vtgate/semantics/semantic_table.go b/go/vt/vtgate/semantics/semantic_table.go index 6683fd5b2f9..e9c126f9db0 100644 --- a/go/vt/vtgate/semantics/semantic_table.go +++ b/go/vt/vtgate/semantics/semantic_table.go @@ -545,7 +545,20 @@ func (st *SemTable) CopySemanticInfo(from, to sqlparser.SQLNode) { if !ok { return } - st.CopyDependencies(f, t) + + // Not all expressions are valid map keys + if !ValidAsMapKey(t) || !ValidAsMapKey(f) { + return + } + + if _, ok := t.(*sqlparser.ColName); ok { + // If this is introducing a new column, we should copy all dependencies over + // as we can't recalculate them later + st.CopyDependencies(f, t) + } else { + // Otherwise, we only copy over the type information + st.CopyExprInfo(f, t) + } case *sqlparser.Union: t, ok := to.(*sqlparser.Union) if !ok { diff --git a/go/vt/vtgate/semantics/semantic_table_test.go b/go/vt/vtgate/semantics/semantic_table_test.go index 1f324215326..133d56e32f9 100644 --- a/go/vt/vtgate/semantics/semantic_table_test.go +++ b/go/vt/vtgate/semantics/semantic_table_test.go @@ -23,9 +23,12 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/sqltypes" querypb "vitess.io/vitess/go/vt/proto/query" vschemapb "vitess.io/vitess/go/vt/proto/vschema" "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vtgate/evalengine" "vitess.io/vitess/go/vt/vtgate/vindexes" ) @@ -979,3 +982,121 @@ func TestHasNonLiteralForeignKeyUpdate(t *testing.T) { }) } } + +func TestCopySemanticInfoNonColName(t *testing.T) { + t.Run("copies no semantic information when the source has no semantic information", func(t *testing.T) { + parser := sqlparser.NewTestParser() + + col, err := parser.ParseExpr("id") + require.NoError(t, err) + + from, err := parser.ParseExpr("lower(id)") + require.NoError(t, err) + + to, err := parser.ParseExpr("upper(id)") + require.NoError(t, err) + + semTable := EmptySemTable() + tableSet := SingleTableSet(0) + + semTable.Recursive[col] = tableSet + semTable.Direct[col] = tableSet + semTable.ExprTypes[col] = evalengine.NewType(sqltypes.VarChar, collations.CollationUtf8mb4ID) + + semTable.CopySemanticInfo(from, to) + + require.NotContains(t, semTable.ExprTypes, to) + require.NotContains(t, semTable.Recursive, to) + require.NotContains(t, semTable.Direct, to) + }) + + t.Run("copies only the expression type when the source has semantic information", func(t *testing.T) { + parser := sqlparser.NewTestParser() + + col, err := parser.ParseExpr("id") + require.NoError(t, err) + + from, err := parser.ParseExpr("lower(id)") + require.NoError(t, err) + + to, err := parser.ParseExpr("upper(id)") + require.NoError(t, err) + + semTable := EmptySemTable() + tableSet := SingleTableSet(0) + + semTable.Recursive[col] = tableSet + semTable.Direct[col] = tableSet + semTable.ExprTypes[col] = evalengine.NewType(sqltypes.VarChar, collations.CollationUtf8mb4ID) + + semTable.Recursive[from] = tableSet + semTable.Direct[from] = tableSet + semTable.ExprTypes[from] = evalengine.NewType(sqltypes.VarChar, collations.CollationUtf8mb4ID) + + semTable.CopySemanticInfo(from, to) + + require.Contains(t, semTable.ExprTypes, to) + require.Equal(t, semTable.ExprTypes[from], semTable.ExprTypes[to]) + require.NotContains(t, semTable.Recursive, to) + require.NotContains(t, semTable.Direct, to) + }) +} + +func TestCopySemanticInfoIntoColName(t *testing.T) { + t.Run("copies all semantic information when the source has semantic information", func(t *testing.T) { + parser := sqlparser.NewTestParser() + + col, err := parser.ParseExpr("id") + require.NoError(t, err) + + from, err := parser.ParseExpr("lower(id)") + require.NoError(t, err) + + to, err := parser.ParseExpr("derived.id") + require.NoError(t, err) + + semTable := EmptySemTable() + tableSet := SingleTableSet(0) + + semTable.Recursive[col] = tableSet + semTable.Direct[col] = tableSet + semTable.ExprTypes[col] = evalengine.NewType(sqltypes.VarChar, collations.CollationUtf8mb4ID) + + semTable.Recursive[from] = tableSet + semTable.Direct[from] = tableSet + semTable.ExprTypes[from] = evalengine.NewType(sqltypes.VarChar, collations.CollationUtf8mb4ID) + + semTable.CopySemanticInfo(from, to) + + require.Contains(t, semTable.ExprTypes, to) + require.Contains(t, semTable.Recursive, to) + require.Contains(t, semTable.Direct, to) + }) + + t.Run("does not copy semantic information when the source has no semantic information", func(t *testing.T) { + parser := sqlparser.NewTestParser() + + col, err := parser.ParseExpr("id") + require.NoError(t, err) + + from, err := parser.ParseExpr("lower(id)") + require.NoError(t, err) + + to, err := parser.ParseExpr("derived.id") + require.NoError(t, err) + + semTable := EmptySemTable() + + tableSet := SingleTableSet(0) + + semTable.Recursive[col] = tableSet + semTable.Direct[col] = tableSet + semTable.ExprTypes[col] = evalengine.NewType(sqltypes.VarChar, collations.CollationUtf8mb4ID) + + semTable.CopySemanticInfo(from, to) + + require.NotContains(t, semTable.ExprTypes, to) + require.NotContains(t, semTable.Recursive, to) + require.NotContains(t, semTable.Direct, to) + }) +}