From 4b9737042712cbd4c1500e9aa1d10ef8519eeef8 Mon Sep 17 00:00:00 2001 From: Nathan Lowe Date: Fri, 1 Apr 2022 14:19:20 -0700 Subject: [PATCH 01/12] pkg/cloud/azure: Support specifying Azure environments in storage URLs The Azure Storage cloud provider learned a new parameter, AZURE_ENVIRONMENT, which specifies which azure environment the storage account in question belongs to. This allows cockroach to backup and restore data to Azure Storage Accounts outside the main Azure Public Cloud. For backwards compatibility, this defaults to "AzurePublicCloud" if AZURE_ENVIRONMENT is not specified. Fixes #47163 Release note (general): When using Azure Cloud Storage for data operations, cockroach now calculates the Storage Account URL from the provided AZURE_ENVIRONMENT query parameter. This defaults to AzurePublicCloud if not specified to maintain backwards compatibility. --- pkg/cloud/azure/BUILD.bazel | 3 ++ pkg/cloud/azure/azure_storage.go | 14 +++++- pkg/cloud/azure/azure_storage_test.go | 70 ++++++++++++++++++++++++--- pkg/roachpb/api.proto | 1 + 4 files changed, 81 insertions(+), 7 deletions(-) diff --git a/pkg/cloud/azure/BUILD.bazel b/pkg/cloud/azure/BUILD.bazel index a1339dca3678..bfbd2b84b232 100644 --- a/pkg/cloud/azure/BUILD.bazel +++ b/pkg/cloud/azure/BUILD.bazel @@ -15,6 +15,7 @@ go_library( "//pkg/util/ioctx", "//pkg/util/tracing", "@com_github_azure_azure_storage_blob_go//azblob", + "@com_github_azure_go_autorest_autorest//azure", "@com_github_cockroachdb_errors//:errors", "@com_github_gogo_protobuf//types", ], @@ -27,10 +28,12 @@ go_test( deps = [ "//pkg/cloud", "//pkg/cloud/cloudtestutils", + "//pkg/roachpb", "//pkg/security", "//pkg/settings/cluster", "//pkg/testutils/skip", "//pkg/util/leaktest", + "@com_github_azure_go_autorest_autorest//azure", "@com_github_cockroachdb_errors//:errors", "@com_github_stretchr_testify//require", ], diff --git a/pkg/cloud/azure/azure_storage.go b/pkg/cloud/azure/azure_storage.go index 310124458493..c44d186c522a 100644 --- a/pkg/cloud/azure/azure_storage.go +++ b/pkg/cloud/azure/azure_storage.go @@ -19,6 +19,7 @@ import ( "strings" "github.com/Azure/azure-storage-blob-go/azblob" + "github.com/Azure/go-autorest/autorest/azure" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/cloud" "github.com/cockroachdb/cockroach/pkg/roachpb" @@ -36,6 +37,8 @@ const ( AzureAccountNameParam = "AZURE_ACCOUNT_NAME" // AzureAccountKeyParam is the query parameter for account_key in an azure URI. AzureAccountKeyParam = "AZURE_ACCOUNT_KEY" + // AzureEnvironmentKeyParam is the query parameter for the environment name in an azure URI. + AzureEnvironmentKeyParam = "AZURE_ENVIRONMENT" ) func parseAzureURL( @@ -48,6 +51,7 @@ func parseAzureURL( Prefix: uri.Path, AccountName: uri.Query().Get(AzureAccountNameParam), AccountKey: uri.Query().Get(AzureAccountKeyParam), + Environment: uri.Query().Get(AzureEnvironmentKeyParam), } if conf.AzureConfig.AccountName == "" { return conf, errors.Errorf("azure uri missing %q parameter", AzureAccountNameParam) @@ -55,6 +59,10 @@ func parseAzureURL( if conf.AzureConfig.AccountKey == "" { return conf, errors.Errorf("azure uri missing %q parameter", AzureAccountKeyParam) } + if conf.AzureConfig.Environment == "" { + // Default to AzurePublicCloud if not specified for backwards compatibility + conf.AzureConfig.Environment = azure.PublicCloud.Name + } conf.AzureConfig.Prefix = strings.TrimLeft(conf.AzureConfig.Prefix, "/") return conf, nil } @@ -81,8 +89,12 @@ func makeAzureStorage( if err != nil { return nil, errors.Wrap(err, "azure credential") } + env, err := azure.EnvironmentFromName(conf.Environment) + if err != nil { + return nil, errors.Wrap(err, "azure environment") + } p := azblob.NewPipeline(credential, azblob.PipelineOptions{}) - u, err := url.Parse(fmt.Sprintf("https://%s.blob.core.windows.net", conf.AccountName)) + u, err := url.Parse(fmt.Sprintf("https://%s.blob.%s", conf.AccountName, env.StorageEndpointSuffix)) if err != nil { return nil, errors.Wrap(err, "azure: account name is not valid") } diff --git a/pkg/cloud/azure/azure_storage_test.go b/pkg/cloud/azure/azure_storage_test.go index e120d98b6dc8..52447ed4e3d0 100644 --- a/pkg/cloud/azure/azure_storage_test.go +++ b/pkg/cloud/azure/azure_storage_test.go @@ -11,13 +11,17 @@ package azure import ( + "context" + "encoding/base64" "fmt" "net/url" "os" "testing" + "github.com/Azure/go-autorest/autorest/azure" "github.com/cockroachdb/cockroach/pkg/cloud" "github.com/cockroachdb/cockroach/pkg/cloud/cloudtestutils" + "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/testutils/skip" @@ -27,25 +31,30 @@ import ( ) type azureConfig struct { - account, key, bucket string + account, key, bucket, environment string } func (a azureConfig) filePath(f string) string { - return fmt.Sprintf("azure://%s/%s?%s=%s&%s=%s", + return fmt.Sprintf("azure://%s/%s?%s=%s&%s=%s&%s=%s", a.bucket, f, AzureAccountKeyParam, url.QueryEscape(a.key), - AzureAccountNameParam, url.QueryEscape(a.account)) + AzureAccountNameParam, url.QueryEscape(a.account), + AzureEnvironmentKeyParam, url.QueryEscape(a.environment)) } func getAzureConfig() (azureConfig, error) { cfg := azureConfig{ - account: os.Getenv("AZURE_ACCOUNT_NAME"), - key: os.Getenv("AZURE_ACCOUNT_KEY"), - bucket: os.Getenv("AZURE_CONTAINER"), + account: os.Getenv("AZURE_ACCOUNT_NAME"), + key: os.Getenv("AZURE_ACCOUNT_KEY"), + bucket: os.Getenv("AZURE_CONTAINER"), + environment: azure.PublicCloud.Name, } if cfg.account == "" || cfg.key == "" || cfg.bucket == "" { return azureConfig{}, errors.New("AZURE_ACCOUNT_NAME, AZURE_ACCOUNT_KEY, AZURE_CONTAINER must all be set") } + if v, ok := os.LookupEnv(AzureEnvironmentKeyParam); ok { + cfg.environment = v + } return cfg, nil } func TestAzure(t *testing.T) { @@ -80,3 +89,52 @@ func TestAntagonisticAzureRead(t *testing.T) { cloudtestutils.CheckAntagonisticRead(t, conf, testSettings) } + +func TestParseAzureURL(t *testing.T) { + t.Run("Defaults to Public Cloud when AZURE_ENVIRONEMNT unset", func(t *testing.T) { + u, err := url.Parse("azure://container/path?AZURE_ACCOUNT_NAME=account&AZURE_ACCOUNT_KEY=key") + require.NoError(t, err) + + sut, err := parseAzureURL(cloud.ExternalStorageURIContext{}, u) + require.NoError(t, err) + + require.Equal(t, azure.PublicCloud.Name, sut.AzureConfig.Environment) + }) + + t.Run("Can Override AZURE_ENVIRONMENT", func(t *testing.T) { + u, err := url.Parse("azure://container/path?AZURE_ACCOUNT_NAME=account&AZURE_ACCOUNT_KEY=key&AZURE_ENVIRONMENT=AzureUSGovernmentCloud") + require.NoError(t, err) + + sut, err := parseAzureURL(cloud.ExternalStorageURIContext{}, u) + require.NoError(t, err) + + require.Equal(t, azure.USGovernmentCloud.Name, sut.AzureConfig.Environment) + }) +} + +func TestMakeAzureStorageURLFromEnvironment(t *testing.T) { + for _, tt := range []struct { + environment string + expected string + }{ + {environment: azure.PublicCloud.Name, expected: "https://account.blob.core.windows.net/container"}, + {environment: azure.USGovernmentCloud.Name, expected: "https://account.blob.core.usgovcloudapi.net/container"}, + } { + t.Run(tt.environment, func(t *testing.T) { + sut, err := makeAzureStorage(context.Background(), cloud.ExternalStorageContext{}, roachpb.ExternalStorage{ + AzureConfig: &roachpb.ExternalStorage_Azure{ + Container: "container", + Prefix: "path", + AccountName: "account", + AccountKey: base64.StdEncoding.EncodeToString([]byte("key")), + Environment: tt.environment, + }, + }) + + require.NoError(t, err) + + u := sut.(*azureStorage).container.URL() + require.Equal(t, tt.expected, u.String()) + }) + } +} diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index a2fb4ece8002..89504c588d62 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -1404,6 +1404,7 @@ message ExternalStorage { string account_name = 3; string account_key = 4; + string environment = 5; } message FileTable { // User interacting with the external storage. This is used to check access From 5bb488d8c54c9122fecf938d3694ab5d24b377ad Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 8 Apr 2022 15:21:41 -0400 Subject: [PATCH 02/12] opt: simplify fetching outer column in CustomFuncs.findComputedColJoinEquality Previously, `CustomFuncs.findComputedColJoinEquality` used `CustomFuncs.OuterCols` to retrieve the outer columns of computed column expressions. `CustomFuncs.OuterCols` returns the cached outer columns in the expression if it is a `memo.ScalarPropsExpr`, and falls back to calculating the outer columns with `memo.BuildSharedProps` otherwise. Computed column expressions are never `memo.ScalarPropsExpr`s, so we use just use `memo.BuildSharedProps` directly. Release note: None --- pkg/sql/opt/xform/join_funcs.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 444d9ada68e9..382aa387f1e9 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -1247,7 +1247,9 @@ func (c *CustomFuncs) findComputedColJoinEquality( if !ok { return nil, false } - if !c.OuterCols(expr).SubsetOf(eqCols) { + var sharedProps props.Shared + memo.BuildSharedProps(expr, &sharedProps, c.e.evalCtx) + if !sharedProps.OuterCols.SubsetOf(eqCols) { return nil, false } return expr, true From 063120ca8d076dbd00a817c6fa27214fa2ad32cb Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 8 Apr 2022 15:45:44 -0400 Subject: [PATCH 03/12] opt: make RemapCols a method on Factory instead of CustomFuncs Release note: None --- pkg/sql/opt/norm/factory.go | 30 ++++++++++++++++++++++++++++++ pkg/sql/opt/norm/general_funcs.go | 26 +------------------------- pkg/sql/opt/norm/select_funcs.go | 4 ++-- pkg/sql/opt/xform/general_funcs.go | 4 ++-- pkg/sql/opt/xform/join_funcs.go | 8 ++++---- 5 files changed, 39 insertions(+), 33 deletions(-) diff --git a/pkg/sql/opt/norm/factory.go b/pkg/sql/opt/norm/factory.go index f65ba641c7ea..494518569dab 100644 --- a/pkg/sql/opt/norm/factory.go +++ b/pkg/sql/opt/norm/factory.go @@ -422,3 +422,33 @@ func (f *Factory) ConstructConstVal(d tree.Datum, t *types.T) opt.ScalarExpr { } return f.ConstructConst(d, t) } + +// ---------------------------------------------------------------------- +// +// Convenience functions. +// +// ---------------------------------------------------------------------- + +// RemapCols remaps columns IDs in the input ScalarExpr by replacing occurrences +// of the keys of colMap with the corresponding values. If column IDs are +// encountered in the input ScalarExpr that are not keys in colMap, they are not +// remapped. +func (f *Factory) RemapCols(scalar opt.ScalarExpr, colMap opt.ColMap) opt.ScalarExpr { + // Recursively walk the scalar sub-tree looking for references to columns + // that need to be replaced and then replace them appropriately. + var replace ReplaceFunc + replace = func(e opt.Expr) opt.Expr { + switch t := e.(type) { + case *memo.VariableExpr: + dstCol, ok := colMap.Get(int(t.Col)) + if !ok { + // The column ID is not in colMap so no replacement is required. + return e + } + return f.ConstructVariable(opt.ColumnID(dstCol)) + } + return f.Replace(e, replace) + } + + return replace(scalar).(opt.ScalarExpr) +} diff --git a/pkg/sql/opt/norm/general_funcs.go b/pkg/sql/opt/norm/general_funcs.go index 0cd3d5559bd9..a000649c8172 100644 --- a/pkg/sql/opt/norm/general_funcs.go +++ b/pkg/sql/opt/norm/general_funcs.go @@ -274,7 +274,7 @@ func (c *CustomFuncs) DuplicateColumnIDs( table opt.TableID, cols opt.ColSet, ) (opt.TableID, opt.ColSet) { md := c.mem.Metadata() - newTableID := md.DuplicateTable(table, c.RemapCols) + newTableID := md.DuplicateTable(table, c.f.RemapCols) // Build a new set of column IDs from the new TableMeta. var newColIDs opt.ColSet @@ -287,30 +287,6 @@ func (c *CustomFuncs) DuplicateColumnIDs( return newTableID, newColIDs } -// RemapCols remaps columns IDs in the input ScalarExpr by replacing occurrences -// of the keys of colMap with the corresponding values. If column IDs are -// encountered in the input ScalarExpr that are not keys in colMap, they are not -// remapped. -func (c *CustomFuncs) RemapCols(scalar opt.ScalarExpr, colMap opt.ColMap) opt.ScalarExpr { - // Recursively walk the scalar sub-tree looking for references to columns - // that need to be replaced and then replace them appropriately. - var replace ReplaceFunc - replace = func(e opt.Expr) opt.Expr { - switch t := e.(type) { - case *memo.VariableExpr: - dstCol, ok := colMap.Get(int(t.Col)) - if !ok { - // The column ID is not in colMap so no replacement is required. - return e - } - return c.f.ConstructVariable(opt.ColumnID(dstCol)) - } - return c.f.Replace(e, replace) - } - - return replace(scalar).(opt.ScalarExpr) -} - // ---------------------------------------------------------------------- // // Outer column functions diff --git a/pkg/sql/opt/norm/select_funcs.go b/pkg/sql/opt/norm/select_funcs.go index d2a0c4f0eb8f..8406bd278cc0 100644 --- a/pkg/sql/opt/norm/select_funcs.go +++ b/pkg/sql/opt/norm/select_funcs.go @@ -53,7 +53,7 @@ func (c *CustomFuncs) MapSetOpFilterLeft( filter *memo.FiltersItem, set *memo.SetPrivate, ) opt.ScalarExpr { colMap := makeMapFromColLists(set.OutCols, set.LeftCols) - return c.RemapCols(filter.Condition, colMap) + return c.f.RemapCols(filter.Condition, colMap) } // MapSetOpFilterRight maps the filter onto the right expression by replacing @@ -64,7 +64,7 @@ func (c *CustomFuncs) MapSetOpFilterRight( filter *memo.FiltersItem, set *memo.SetPrivate, ) opt.ScalarExpr { colMap := makeMapFromColLists(set.OutCols, set.RightCols) - return c.RemapCols(filter.Condition, colMap) + return c.f.RemapCols(filter.Condition, colMap) } // makeMapFromColLists maps each column ID in src to a column ID in dst. The diff --git a/pkg/sql/opt/xform/general_funcs.go b/pkg/sql/opt/xform/general_funcs.go index e84f8e4c5cae..da85ffd1959a 100644 --- a/pkg/sql/opt/xform/general_funcs.go +++ b/pkg/sql/opt/xform/general_funcs.go @@ -95,7 +95,7 @@ func (c *CustomFuncs) remapScanColsInScalarExpr( dstCol := dst.Table.ColumnID(ord) colMap.Set(int(srcCol), int(dstCol)) } - return c.RemapCols(scalar, colMap) + return c.e.f.RemapCols(scalar, colMap) } // RemapJoinColsInFilter returns a new FiltersExpr where columns in leftSrc's @@ -140,7 +140,7 @@ func (c *CustomFuncs) remapJoinColsInScalarExpr( dstCol := rightDst.Table.ColumnID(ord) colMap.Set(int(srcCol), int(dstCol)) } - return c.RemapCols(scalar, colMap) + return c.e.f.RemapCols(scalar, colMap) } // checkConstraintFilters generates all filters that we can derive from the diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 382aa387f1e9..05083de68738 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -435,7 +435,7 @@ func (c *CustomFuncs) generateLookupJoinsImpl( // Project the computed column expression, mapping all columns // in rightEq to corresponding columns in leftEq. - projection := c.e.f.ConstructProjectionsItem(c.RemapCols(expr, eqColMap), compEqCol) + projection := c.e.f.ConstructProjectionsItem(c.e.f.RemapCols(expr, eqColMap), compEqCol) inputProjections = append(inputProjections, projection) lookupJoin.KeyCols = append(lookupJoin.KeyCols, compEqCol) rightSideCols = append(rightSideCols, idxCol) @@ -1175,11 +1175,11 @@ func (c *CustomFuncs) mapInvertedJoin( srcColsToDstCols.Set(int(invertedSourceCol), int(newInvertedSourceCol)) invertedJoin.Table = newTabID - invertedJoin.InvertedExpr = c.RemapCols(invertedJoin.InvertedExpr, srcColsToDstCols) + invertedJoin.InvertedExpr = c.e.f.RemapCols(invertedJoin.InvertedExpr, srcColsToDstCols) invertedJoin.Cols = invertedJoin.Cols.Difference(indexCols).Union(newIndexCols) - constFilters := c.RemapCols(&invertedJoin.ConstFilters, srcColsToDstCols).(*memo.FiltersExpr) + constFilters := c.e.f.RemapCols(&invertedJoin.ConstFilters, srcColsToDstCols).(*memo.FiltersExpr) invertedJoin.ConstFilters = *constFilters - on := c.RemapCols(&invertedJoin.On, srcColsToDstCols).(*memo.FiltersExpr) + on := c.e.f.RemapCols(&invertedJoin.On, srcColsToDstCols).(*memo.FiltersExpr) invertedJoin.On = *on } From ada09ad634a86789b8439c7aa8289d28475ff331 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 8 Apr 2022 16:10:55 -0400 Subject: [PATCH 04/12] opt: use partial-index-reduced filters when building lookup expressions This commit makes a minor change to `generateLookupJoinsImpl`. Previously, equality filters were extracted from the original `ON` filters. Now they are extracted from filters that have been reduced by partial index implication. This has no effect on behavior because equality filters that reference columns in two tables cannot exist in partial index predicates, so they will never be eliminated during partial index implication. Release note: None --- pkg/sql/opt/xform/join_funcs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 05083de68738..43a4df14376d 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -505,7 +505,7 @@ func (c *CustomFuncs) generateLookupJoinsImpl( var eqFilters memo.FiltersExpr extractEqualityFilter := func(leftCol, rightCol opt.ColumnID) memo.FiltersItem { return memo.ExtractJoinEqualityFilter( - leftCol, rightCol, inputProps.OutputCols, rightCols, on, + leftCol, rightCol, inputProps.OutputCols, rightCols, onFilters, ) } eqFilters, constraintFilters, rightSideCols = c.findFiltersForIndexLookup( From d27f2c545b71695ab7979fd313eebfc21b9ecd51 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 27 Apr 2022 12:15:51 -0400 Subject: [PATCH 05/12] SQUASH BEFORE MERGING This commit will be squashed before merging. It copies code verbatim from join_funcs.go to lookupjoin/constraint_builder.go to make it easy to review the changes made when moving code in the following commit. --- pkg/sql/opt/lookupjoin/constraint_builder.go | 468 +++++++++++++++++++ pkg/sql/opt/xform/join_funcs.go | 383 --------------- 2 files changed, 468 insertions(+), 383 deletions(-) create mode 100644 pkg/sql/opt/lookupjoin/constraint_builder.go diff --git a/pkg/sql/opt/lookupjoin/constraint_builder.go b/pkg/sql/opt/lookupjoin/constraint_builder.go new file mode 100644 index 000000000000..28df6ebc6f6e --- /dev/null +++ b/pkg/sql/opt/lookupjoin/constraint_builder.go @@ -0,0 +1,468 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package lookupjoin + +import ( + "fmt" + + "github.com/cockroachdb/cockroach/pkg/sql/opt" + "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" + "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" + "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" + "github.com/cockroachdb/cockroach/pkg/sql/opt/props" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" +) + +func stub() { + // Find the longest prefix of index key columns that are constrained by + // an equality with another column or a constant. + numIndexKeyCols := index.LaxKeyColumnCount() + + var constraintFilters memo.FiltersExpr + allFilters := append(onFilters, optionalFilters...) + + // Check if the first column in the index either: + // + // 1. Has an equality constraint. + // 2. Is a computed column for which an equality constraint can be + // generated. + // 3. Is constrained to a constant value or values. + // + // This check doesn't guarantee that we will find lookup join key + // columns, but it avoids unnecessary work in most cases. + firstIdxCol := scanPrivate.Table.IndexColumnID(index, 0) + if _, ok := rightEq.Find(firstIdxCol); !ok { + if _, ok := c.findComputedColJoinEquality(scanPrivate.Table, firstIdxCol, rightEqSet); !ok { + if _, _, ok := c.findJoinFilterConstants(allFilters, firstIdxCol); !ok { + return + } + } + } + + shouldBuildMultiSpanLookupJoin := false + + // All the lookup conditions must apply to the prefix of the index and so + // the projected columns created must be created in order. + for j := 0; j < numIndexKeyCols; j++ { + idxCol := scanPrivate.Table.IndexColumnID(index, j) + if eqIdx, ok := rightEq.Find(idxCol); ok { + lookupJoin.KeyCols = append(lookupJoin.KeyCols, leftEq[eqIdx]) + rightSideCols = append(rightSideCols, idxCol) + continue + } + + // If the column is computed and an equality constraint can be + // synthesized for it, we can project a column from the join's input + // that can be used as a key column. We create the projection here, + // and construct a Project expression that wraps the join's input + // below. See findComputedColJoinEquality for the requirements to + // synthesize a computed column equality constraint. + if expr, ok := c.findComputedColJoinEquality(scanPrivate.Table, idxCol, rightEqSet); ok { + colMeta := md.ColumnMeta(idxCol) + compEqCol := md.AddColumn(fmt.Sprintf("%s_eq", colMeta.Alias), colMeta.Type) + + // Lazily initialize eqColMap. + if eqColMap.Empty() { + for i := range rightEq { + eqColMap.Set(int(rightEq[i]), int(leftEq[i])) + } + } + + // Project the computed column expression, mapping all columns + // in rightEq to corresponding columns in leftEq. + projection := c.e.f.ConstructProjectionsItem(c.e.f.RemapCols(expr, eqColMap), compEqCol) + inputProjections = append(inputProjections, projection) + lookupJoin.KeyCols = append(lookupJoin.KeyCols, compEqCol) + rightSideCols = append(rightSideCols, idxCol) + continue + } + + // Try to find a filter that constrains this column to non-NULL + // constant values. We cannot use a NULL value because the lookup + // join implements logic equivalent to simple equality between + // columns (where NULL never equals anything). + foundVals, allIdx, ok := c.findJoinFilterConstants(allFilters, idxCol) + if ok && len(foundVals) == 1 { + // If a single constant value was found, project it in the input + // and use it as an equality column. + idxColType := c.e.f.Metadata().ColumnMeta(idxCol).Type + constColID := c.e.f.Metadata().AddColumn( + fmt.Sprintf("lookup_join_const_col_@%d", idxCol), + idxColType, + ) + inputProjections = append(inputProjections, c.e.f.ConstructProjectionsItem( + c.e.f.ConstructConstVal(foundVals[0], idxColType), + constColID, + )) + constraintFilters = append(constraintFilters, allFilters[allIdx]) + lookupJoin.KeyCols = append(lookupJoin.KeyCols, constColID) + rightSideCols = append(rightSideCols, idxCol) + continue + } + + var foundRange bool + if !ok { + // If constant values were not found, try to find a filter that + // constrains this index column to a range. + _, foundRange = c.findJoinFilterRange(allFilters, idxCol) + } + + // If more than one constant value or a range to constrain the index + // column was found, use a LookupExpr rather than KeyCols. + if len(foundVals) > 1 || foundRange { + shouldBuildMultiSpanLookupJoin = true + } + + // Either multiple constant values or a range were found, or the + // index column cannot be constrained. In all cases, we cannot + // continue on to the next index column, so we break out of the + // loop. + break + } + + if shouldBuildMultiSpanLookupJoin { + // Some of the index columns were constrained to multiple constant + // values or a range expression, so we cannot build a lookup join + // with KeyCols. As an alternative, we store all the filters needed + // for the lookup in LookupExpr, which will be used to construct + // spans at execution time. Each input row will generate multiple + // spans to lookup in the index. + // + // For example, if the index cols are (region, id) and the + // LookupExpr is `region in ('east', 'west') AND id = input.id`, + // each input row will generate two spans to be scanned in the + // lookup: + // + // [/'east'/ - /'east'/] + // [/'west'/ - /'west'/] + // + // Where is the value of input.id for the current input row. + var eqFilters memo.FiltersExpr + extractEqualityFilter := func(leftCol, rightCol opt.ColumnID) memo.FiltersItem { + return memo.ExtractJoinEqualityFilter( + leftCol, rightCol, inputProps.OutputCols, rightCols, onFilters, + ) + } + eqFilters, constraintFilters, rightSideCols = c.findFiltersForIndexLookup( + allFilters, scanPrivate.Table, index, leftEq, rightEq, extractEqualityFilter, + ) + lookupJoin.LookupExpr = append(eqFilters, constraintFilters...) + + // Reset KeyCols since we're not using it anymore. + lookupJoin.KeyCols = opt.ColList{} + // Reset the input projections since we don't need any constant + // values projected. + inputProjections = nil + } +} + +// findComputedColJoinEquality returns the computed column expression of col and +// ok=true when a join equality constraint can be generated for the column. This +// is possible when: +// +// 1. col is non-nullable. +// 2. col is a computed column. +// 3. Columns referenced in the computed expression are a subset of columns +// that already have equality constraints. +// +// For example, consider the table and query: +// +// CREATE TABLE a ( +// a INT +// ) +// +// CREATE TABLE bc ( +// b INT, +// c INT NOT NULL AS (b + 1) STORED +// ) +// +// SELECT * FROM a JOIN b ON a = b +// +// We can add an equality constraint for c because c is a function of b and b +// has an equality constraint in the join predicate: +// +// SELECT * FROM a JOIN b ON a = b AND a + 1 = c +// +// Condition (1) is required to prevent generating invalid equality constraints +// for computed column expressions that can evaluate to NULL even when the +// columns referenced in the expression are non-NULL. For example, consider the +// table and query: +// +// CREATE TABLE a ( +// a INT +// ) +// +// CREATE TABLE bc ( +// b INT, +// c INT AS (CASE WHEN b > 0 THEN NULL ELSE -1 END) STORED +// ) +// +// SELECT a, b FROM a JOIN b ON a = b +// +// The following is an invalid transformation: a row such as (a=1, b=1) would no +// longer be returned because NULL=NULL is false. +// +// SELECT a, b FROM a JOIN b ON a = b AND (CASE WHEN a > 0 THEN NULL ELSE -1 END) = c +// +// TODO(mgartner): We can relax condition (1) to allow nullable columns if it +// can be proven that the expression will never evaluate to NULL. We can use +// memo.ExprIsNeverNull to determine this, passing both NOT NULL and equality +// columns as notNullCols. +func (c *CustomFuncs) findComputedColJoinEquality( + tabID opt.TableID, col opt.ColumnID, eqCols opt.ColSet, +) (_ opt.ScalarExpr, ok bool) { + tabMeta := c.e.mem.Metadata().TableMeta(tabID) + tab := c.e.mem.Metadata().Table(tabID) + if tab.Column(tabID.ColumnOrdinal(col)).IsNullable() { + return nil, false + } + expr, ok := tabMeta.ComputedColExpr(col) + if !ok { + return nil, false + } + var sharedProps props.Shared + memo.BuildSharedProps(expr, &sharedProps, c.e.evalCtx) + if !sharedProps.OuterCols.SubsetOf(eqCols) { + return nil, false + } + return expr, true +} + +// findFiltersForIndexLookup finds the equality and constraint filters in +// filters that can be used to constrain the given index. Constraint filters +// can be either constants or inequality conditions. +func (c *CustomFuncs) findFiltersForIndexLookup( + filters memo.FiltersExpr, + tabID opt.TableID, + index cat.Index, + leftEq, rightEq opt.ColList, + extractEqualityFilter func(opt.ColumnID, opt.ColumnID) memo.FiltersItem, +) (eqFilters, constraintFilters memo.FiltersExpr, rightSideCols opt.ColList) { + numIndexKeyCols := index.LaxKeyColumnCount() + + eqFilters = make(memo.FiltersExpr, 0, len(filters)) + rightSideCols = make(opt.ColList, 0, len(filters)) + + // All the lookup conditions must apply to the prefix of the index. + for j := 0; j < numIndexKeyCols; j++ { + idxCol := tabID.IndexColumnID(index, j) + if eqIdx, ok := rightEq.Find(idxCol); ok { + eqFilter := extractEqualityFilter(leftEq[eqIdx], rightEq[eqIdx]) + eqFilters = append(eqFilters, eqFilter) + rightSideCols = append(rightSideCols, idxCol) + continue + } + + var foundRange bool + // Try to find a filter that constrains this column to non-NULL + // constant values. We cannot use a NULL value because the lookup + // join implements logic equivalent to simple equality between + // columns (where NULL never equals anything). + values, allIdx, foundConstFilter := c.findJoinFilterConstants(filters, idxCol) + if !foundConstFilter { + // If there's no const filters look for an inequality range. + allIdx, foundRange = c.findJoinFilterRange(filters, idxCol) + if !foundRange { + break + } + } + + if constraintFilters == nil { + constraintFilters = make(memo.FiltersExpr, 0, numIndexKeyCols-j) + } + + // Construct a constant filter as an equality, IN expression, or + // inequality. These are the only types of expressions currently + // supported by the lookupJoiner for building lookup spans. + if foundConstFilter { + constFilter := filters[allIdx] + if !c.isCanonicalLookupJoinFilter(constFilter) { + constFilter = c.makeConstFilter(idxCol, values) + } + constraintFilters = append(constraintFilters, constFilter) + } + // Non-canonical range filters aren't supported and are already filtered + // out by findJoinFilterRange above. + if foundRange { + constraintFilters = append(constraintFilters, filters[allIdx]) + // Generating additional columns after a range isn't helpful so stop here. + break + } + } + + if len(eqFilters) == 0 { + // We couldn't find equality columns which we can lookup. + return nil, nil, nil + } + + return eqFilters, constraintFilters, rightSideCols +} + +// makeConstFilter builds a filter that constrains the given column to the given +// set of constant values. This is performed by either constructing an equality +// expression or an IN expression. +func (c *CustomFuncs) makeConstFilter(col opt.ColumnID, values tree.Datums) memo.FiltersItem { + if len(values) == 1 { + return c.e.f.ConstructFiltersItem(c.e.f.ConstructEq( + c.e.f.ConstructVariable(col), + c.e.f.ConstructConstVal(values[0], values[0].ResolvedType()), + )) + } + elems := make(memo.ScalarListExpr, len(values)) + elemTypes := make([]*types.T, len(values)) + for i := range values { + typ := values[i].ResolvedType() + elems[i] = c.e.f.ConstructConstVal(values[i], typ) + elemTypes[i] = typ + } + return c.e.f.ConstructFiltersItem(c.e.f.ConstructIn( + c.e.f.ConstructVariable(col), + c.e.f.ConstructTuple(elems, types.MakeTuple(elemTypes)), + )) +} + +// findJoinFilterConstants tries to find a filter that is exactly equivalent to +// constraining the given column to a constant value or a set of constant +// values. If successful, the constant values and the index of the constraining +// FiltersItem are returned. If multiple filters match, the one that minimizes +// the number of returned values is chosen. Note that the returned constant +// values do not contain NULL. +func (c *CustomFuncs) findJoinFilterConstants( + filters memo.FiltersExpr, col opt.ColumnID, +) (values tree.Datums, filterIdx int, ok bool) { + var bestValues tree.Datums + var bestFilterIdx int + for filterIdx := range filters { + props := filters[filterIdx].ScalarProps() + if props.TightConstraints { + constCol, constVals, ok := props.Constraints.HasSingleColumnConstValues(c.e.evalCtx) + if !ok || constCol != col { + continue + } + hasNull := false + for i := range constVals { + if constVals[i] == tree.DNull { + hasNull = true + break + } + } + if !hasNull && (bestValues == nil || len(bestValues) > len(constVals)) { + bestValues = constVals + bestFilterIdx = filterIdx + } + } + } + if bestValues == nil { + return nil, -1, false + } + return bestValues, bestFilterIdx, true +} + +// findJoinFilterRange tries to find an inequality range for this column. +func (c *CustomFuncs) findJoinFilterRange( + filters memo.FiltersExpr, col opt.ColumnID, +) (filterIdx int, ok bool) { + // canAdvance returns whether non-nil, non-NULL datum can be "advanced" + // (i.e. both Next and Prev can be called on it). + canAdvance := func(val tree.Datum) bool { + if val.IsMax(c.e.evalCtx) { + return false + } + _, ok := val.Next(c.e.evalCtx) + if !ok { + return false + } + if val.IsMin(c.e.evalCtx) { + return false + } + _, ok = val.Prev(c.e.evalCtx) + return ok + } + for filterIdx := range filters { + props := filters[filterIdx].ScalarProps() + if props.TightConstraints && props.Constraints.Length() > 0 { + constraintObj := props.Constraints.Constraint(0) + constraintCol := constraintObj.Columns.Get(0) + // Non-canonical filters aren't yet supported for range spans like + // they are for const spans so filter those out here (const spans + // from non-canonical filters can be turned into a canonical filter, + // see makeConstFilter). We only support 1 span in the execution + // engine so check that. + if constraintCol.ID() != col || constraintObj.Spans.Count() != 1 || + !c.isCanonicalLookupJoinFilter(filters[filterIdx]) { + continue + } + span := constraintObj.Spans.Get(0) + // If we have a datum for either end of the span, we have to ensure + // that it can be "advanced" if the corresponding span boundary is + // exclusive. + // + // This limitation comes from the execution that must be able to + // "advance" the start boundary, but since we don't know the + // direction of the index here, we have to check both ends of the + // span. + if !span.StartKey().IsEmpty() && !span.StartKey().IsNull() { + val := span.StartKey().Value(0) + if span.StartBoundary() == constraint.ExcludeBoundary { + if !canAdvance(val) { + continue + } + } + } + if !span.EndKey().IsEmpty() && !span.EndKey().IsNull() { + val := span.EndKey().Value(0) + if span.EndBoundary() == constraint.ExcludeBoundary { + if !canAdvance(val) { + continue + } + } + } + return filterIdx, true + } + } + return -1, false +} + +// isCanonicalLookupJoinFilter returns true for the limited set of expr's that are +// supported by the lookup joiner at execution time. +func (c *CustomFuncs) isCanonicalLookupJoinFilter(filter memo.FiltersItem) bool { + isVar := func(expr opt.Expr) bool { + _, ok := expr.(*memo.VariableExpr) + return ok + } + var isCanonicalInequality func(expr opt.Expr) bool + isCanonicalInequality = func(expr opt.Expr) bool { + switch t := expr.(type) { + case *memo.RangeExpr: + return isCanonicalInequality(t.And) + case *memo.AndExpr: + return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) + case *memo.GeExpr: + return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) + case *memo.GtExpr: + return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) + case *memo.LeExpr: + return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) + case *memo.LtExpr: + return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) + } + return isVar(expr) || opt.IsConstValueOp(expr) + } + switch t := filter.Condition.(type) { + case *memo.EqExpr: + return isVar(t.Left) && opt.IsConstValueOp(t.Right) + case *memo.InExpr: + return isVar(t.Left) && memo.CanExtractConstTuple(t.Right) + default: + return isCanonicalInequality(t) + } +} diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 43a4df14376d..10357e154f7e 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -368,31 +368,6 @@ func (c *CustomFuncs) generateLookupJoinsImpl( return } - // Find the longest prefix of index key columns that are constrained by - // an equality with another column or a constant. - numIndexKeyCols := index.LaxKeyColumnCount() - - var constraintFilters memo.FiltersExpr - allFilters := append(onFilters, optionalFilters...) - - // Check if the first column in the index either: - // - // 1. Has an equality constraint. - // 2. Is a computed column for which an equality constraint can be - // generated. - // 3. Is constrained to a constant value or values. - // - // This check doesn't guarantee that we will find lookup join key - // columns, but it avoids unnecessary work in most cases. - firstIdxCol := scanPrivate.Table.IndexColumnID(index, 0) - if _, ok := rightEq.Find(firstIdxCol); !ok { - if _, ok := c.findComputedColJoinEquality(scanPrivate.Table, firstIdxCol, rightEqSet); !ok { - if _, _, ok := c.findJoinFilterConstants(allFilters, firstIdxCol); !ok { - return - } - } - } - lookupJoin := memo.LookupJoinExpr{Input: input} lookupJoin.JoinPrivate = *joinPrivate lookupJoin.JoinType = joinType @@ -404,122 +379,6 @@ func (c *CustomFuncs) generateLookupJoinsImpl( rightSideCols := make(opt.ColList, 0, numIndexKeyCols) var inputProjections memo.ProjectionsExpr - shouldBuildMultiSpanLookupJoin := false - - // All the lookup conditions must apply to the prefix of the index and so - // the projected columns created must be created in order. - for j := 0; j < numIndexKeyCols; j++ { - idxCol := scanPrivate.Table.IndexColumnID(index, j) - if eqIdx, ok := rightEq.Find(idxCol); ok { - lookupJoin.KeyCols = append(lookupJoin.KeyCols, leftEq[eqIdx]) - rightSideCols = append(rightSideCols, idxCol) - continue - } - - // If the column is computed and an equality constraint can be - // synthesized for it, we can project a column from the join's input - // that can be used as a key column. We create the projection here, - // and construct a Project expression that wraps the join's input - // below. See findComputedColJoinEquality for the requirements to - // synthesize a computed column equality constraint. - if expr, ok := c.findComputedColJoinEquality(scanPrivate.Table, idxCol, rightEqSet); ok { - colMeta := md.ColumnMeta(idxCol) - compEqCol := md.AddColumn(fmt.Sprintf("%s_eq", colMeta.Alias), colMeta.Type) - - // Lazily initialize eqColMap. - if eqColMap.Empty() { - for i := range rightEq { - eqColMap.Set(int(rightEq[i]), int(leftEq[i])) - } - } - - // Project the computed column expression, mapping all columns - // in rightEq to corresponding columns in leftEq. - projection := c.e.f.ConstructProjectionsItem(c.e.f.RemapCols(expr, eqColMap), compEqCol) - inputProjections = append(inputProjections, projection) - lookupJoin.KeyCols = append(lookupJoin.KeyCols, compEqCol) - rightSideCols = append(rightSideCols, idxCol) - continue - } - - // Try to find a filter that constrains this column to non-NULL - // constant values. We cannot use a NULL value because the lookup - // join implements logic equivalent to simple equality between - // columns (where NULL never equals anything). - foundVals, allIdx, ok := c.findJoinFilterConstants(allFilters, idxCol) - if ok && len(foundVals) == 1 { - // If a single constant value was found, project it in the input - // and use it as an equality column. - idxColType := c.e.f.Metadata().ColumnMeta(idxCol).Type - constColID := c.e.f.Metadata().AddColumn( - fmt.Sprintf("lookup_join_const_col_@%d", idxCol), - idxColType, - ) - inputProjections = append(inputProjections, c.e.f.ConstructProjectionsItem( - c.e.f.ConstructConstVal(foundVals[0], idxColType), - constColID, - )) - constraintFilters = append(constraintFilters, allFilters[allIdx]) - lookupJoin.KeyCols = append(lookupJoin.KeyCols, constColID) - rightSideCols = append(rightSideCols, idxCol) - continue - } - - var foundRange bool - if !ok { - // If constant values were not found, try to find a filter that - // constrains this index column to a range. - _, foundRange = c.findJoinFilterRange(allFilters, idxCol) - } - - // If more than one constant value or a range to constrain the index - // column was found, use a LookupExpr rather than KeyCols. - if len(foundVals) > 1 || foundRange { - shouldBuildMultiSpanLookupJoin = true - } - - // Either multiple constant values or a range were found, or the - // index column cannot be constrained. In all cases, we cannot - // continue on to the next index column, so we break out of the - // loop. - break - } - - if shouldBuildMultiSpanLookupJoin { - // Some of the index columns were constrained to multiple constant - // values or a range expression, so we cannot build a lookup join - // with KeyCols. As an alternative, we store all the filters needed - // for the lookup in LookupExpr, which will be used to construct - // spans at execution time. Each input row will generate multiple - // spans to lookup in the index. - // - // For example, if the index cols are (region, id) and the - // LookupExpr is `region in ('east', 'west') AND id = input.id`, - // each input row will generate two spans to be scanned in the - // lookup: - // - // [/'east'/ - /'east'/] - // [/'west'/ - /'west'/] - // - // Where is the value of input.id for the current input row. - var eqFilters memo.FiltersExpr - extractEqualityFilter := func(leftCol, rightCol opt.ColumnID) memo.FiltersItem { - return memo.ExtractJoinEqualityFilter( - leftCol, rightCol, inputProps.OutputCols, rightCols, onFilters, - ) - } - eqFilters, constraintFilters, rightSideCols = c.findFiltersForIndexLookup( - allFilters, scanPrivate.Table, index, leftEq, rightEq, extractEqualityFilter, - ) - lookupJoin.LookupExpr = append(eqFilters, constraintFilters...) - - // Reset KeyCols since we're not using it anymore. - lookupJoin.KeyCols = opt.ColList{} - // Reset the input projections since we don't need any constant - // values projected. - inputProjections = nil - } - if len(lookupJoin.KeyCols) == 0 && len(lookupJoin.LookupExpr) == 0 { // We couldn't find equality columns which we can lookup. return @@ -745,111 +604,6 @@ func (c *CustomFuncs) generateLookupJoinsImpl( }) } -// findFiltersForIndexLookup finds the equality and constraint filters in -// filters that can be used to constrain the given index. Constraint filters -// can be either constants or inequality conditions. -func (c *CustomFuncs) findFiltersForIndexLookup( - filters memo.FiltersExpr, - tabID opt.TableID, - index cat.Index, - leftEq, rightEq opt.ColList, - extractEqualityFilter func(opt.ColumnID, opt.ColumnID) memo.FiltersItem, -) (eqFilters, constraintFilters memo.FiltersExpr, rightSideCols opt.ColList) { - numIndexKeyCols := index.LaxKeyColumnCount() - - eqFilters = make(memo.FiltersExpr, 0, len(filters)) - rightSideCols = make(opt.ColList, 0, len(filters)) - - // All the lookup conditions must apply to the prefix of the index. - for j := 0; j < numIndexKeyCols; j++ { - idxCol := tabID.IndexColumnID(index, j) - if eqIdx, ok := rightEq.Find(idxCol); ok { - eqFilter := extractEqualityFilter(leftEq[eqIdx], rightEq[eqIdx]) - eqFilters = append(eqFilters, eqFilter) - rightSideCols = append(rightSideCols, idxCol) - continue - } - - var foundRange bool - // Try to find a filter that constrains this column to non-NULL - // constant values. We cannot use a NULL value because the lookup - // join implements logic equivalent to simple equality between - // columns (where NULL never equals anything). - values, allIdx, foundConstFilter := c.findJoinFilterConstants(filters, idxCol) - if !foundConstFilter { - // If there's no const filters look for an inequality range. - allIdx, foundRange = c.findJoinFilterRange(filters, idxCol) - if !foundRange { - break - } - } - - if constraintFilters == nil { - constraintFilters = make(memo.FiltersExpr, 0, numIndexKeyCols-j) - } - - // Construct a constant filter as an equality, IN expression, or - // inequality. These are the only types of expressions currently - // supported by the lookupJoiner for building lookup spans. - if foundConstFilter { - constFilter := filters[allIdx] - if !c.isCanonicalLookupJoinFilter(constFilter) { - constFilter = c.makeConstFilter(idxCol, values) - } - constraintFilters = append(constraintFilters, constFilter) - } - // Non-canonical range filters aren't supported and are already filtered - // out by findJoinFilterRange above. - if foundRange { - constraintFilters = append(constraintFilters, filters[allIdx]) - // Generating additional columns after a range isn't helpful so stop here. - break - } - } - - if len(eqFilters) == 0 { - // We couldn't find equality columns which we can lookup. - return nil, nil, nil - } - - return eqFilters, constraintFilters, rightSideCols -} - -// isCanonicalLookupJoinFilter returns true for the limited set of expr's that are -// supported by the lookup joiner at execution time. -func (c *CustomFuncs) isCanonicalLookupJoinFilter(filter memo.FiltersItem) bool { - isVar := func(expr opt.Expr) bool { - _, ok := expr.(*memo.VariableExpr) - return ok - } - var isCanonicalInequality func(expr opt.Expr) bool - isCanonicalInequality = func(expr opt.Expr) bool { - switch t := expr.(type) { - case *memo.RangeExpr: - return isCanonicalInequality(t.And) - case *memo.AndExpr: - return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) - case *memo.GeExpr: - return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) - case *memo.GtExpr: - return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) - case *memo.LeExpr: - return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) - case *memo.LtExpr: - return isCanonicalInequality(t.Left) && isCanonicalInequality(t.Right) - } - return isVar(expr) || opt.IsConstValueOp(expr) - } - switch t := filter.Condition.(type) { - case *memo.EqExpr: - return isVar(t.Left) && opt.IsConstValueOp(t.Right) - case *memo.InExpr: - return isVar(t.Left) && memo.CanExtractConstTuple(t.Right) - default: - return isCanonicalInequality(t) - } -} - // makeConstFilter builds a filter that constrains the given column to the given // set of constant values. This is performed by either constructing an equality // expression or an IN expression. @@ -1183,78 +937,6 @@ func (c *CustomFuncs) mapInvertedJoin( invertedJoin.On = *on } -// findComputedColJoinEquality returns the computed column expression of col and -// ok=true when a join equality constraint can be generated for the column. This -// is possible when: -// -// 1. col is non-nullable. -// 2. col is a computed column. -// 3. Columns referenced in the computed expression are a subset of columns -// that already have equality constraints. -// -// For example, consider the table and query: -// -// CREATE TABLE a ( -// a INT -// ) -// -// CREATE TABLE bc ( -// b INT, -// c INT NOT NULL AS (b + 1) STORED -// ) -// -// SELECT * FROM a JOIN b ON a = b -// -// We can add an equality constraint for c because c is a function of b and b -// has an equality constraint in the join predicate: -// -// SELECT * FROM a JOIN b ON a = b AND a + 1 = c -// -// Condition (1) is required to prevent generating invalid equality constraints -// for computed column expressions that can evaluate to NULL even when the -// columns referenced in the expression are non-NULL. For example, consider the -// table and query: -// -// CREATE TABLE a ( -// a INT -// ) -// -// CREATE TABLE bc ( -// b INT, -// c INT AS (CASE WHEN b > 0 THEN NULL ELSE -1 END) STORED -// ) -// -// SELECT a, b FROM a JOIN b ON a = b -// -// The following is an invalid transformation: a row such as (a=1, b=1) would no -// longer be returned because NULL=NULL is false. -// -// SELECT a, b FROM a JOIN b ON a = b AND (CASE WHEN a > 0 THEN NULL ELSE -1 END) = c -// -// TODO(mgartner): We can relax condition (1) to allow nullable columns if it -// can be proven that the expression will never evaluate to NULL. We can use -// memo.ExprIsNeverNull to determine this, passing both NOT NULL and equality -// columns as notNullCols. -func (c *CustomFuncs) findComputedColJoinEquality( - tabID opt.TableID, col opt.ColumnID, eqCols opt.ColSet, -) (_ opt.ScalarExpr, ok bool) { - tabMeta := c.e.mem.Metadata().TableMeta(tabID) - tab := c.e.mem.Metadata().Table(tabID) - if tab.Column(tabID.ColumnOrdinal(col)).IsNullable() { - return nil, false - } - expr, ok := tabMeta.ComputedColExpr(col) - if !ok { - return nil, false - } - var sharedProps props.Shared - memo.BuildSharedProps(expr, &sharedProps, c.e.evalCtx) - if !sharedProps.OuterCols.SubsetOf(eqCols) { - return nil, false - } - return expr, true -} - // findJoinFilterConstants tries to find a filter that is exactly equivalent to // constraining the given column to a constant value or a set of constant // values. If successful, the constant values and the index of the constraining @@ -1292,71 +974,6 @@ func (c *CustomFuncs) findJoinFilterConstants( return bestValues, bestFilterIdx, true } -// findJoinFilterRange tries to find an inequality range for this column. -func (c *CustomFuncs) findJoinFilterRange( - filters memo.FiltersExpr, col opt.ColumnID, -) (filterIdx int, ok bool) { - // canAdvance returns whether non-nil, non-NULL datum can be "advanced" - // (i.e. both Next and Prev can be called on it). - canAdvance := func(val tree.Datum) bool { - if val.IsMax(c.e.evalCtx) { - return false - } - _, ok := val.Next(c.e.evalCtx) - if !ok { - return false - } - if val.IsMin(c.e.evalCtx) { - return false - } - _, ok = val.Prev(c.e.evalCtx) - return ok - } - for filterIdx := range filters { - props := filters[filterIdx].ScalarProps() - if props.TightConstraints && props.Constraints.Length() > 0 { - constraintObj := props.Constraints.Constraint(0) - constraintCol := constraintObj.Columns.Get(0) - // Non-canonical filters aren't yet supported for range spans like - // they are for const spans so filter those out here (const spans - // from non-canonical filters can be turned into a canonical filter, - // see makeConstFilter). We only support 1 span in the execution - // engine so check that. - if constraintCol.ID() != col || constraintObj.Spans.Count() != 1 || - !c.isCanonicalLookupJoinFilter(filters[filterIdx]) { - continue - } - span := constraintObj.Spans.Get(0) - // If we have a datum for either end of the span, we have to ensure - // that it can be "advanced" if the corresponding span boundary is - // exclusive. - // - // This limitation comes from the execution that must be able to - // "advance" the start boundary, but since we don't know the - // direction of the index here, we have to check both ends of the - // span. - if !span.StartKey().IsEmpty() && !span.StartKey().IsNull() { - val := span.StartKey().Value(0) - if span.StartBoundary() == constraint.ExcludeBoundary { - if !canAdvance(val) { - continue - } - } - } - if !span.EndKey().IsEmpty() && !span.EndKey().IsNull() { - val := span.EndKey().Value(0) - if span.EndBoundary() == constraint.ExcludeBoundary { - if !canAdvance(val) { - continue - } - } - } - return filterIdx, true - } - } - return -1, false -} - // constructJoinWithConstants constructs a cross join that joins every row in // the input with every value in vals. The cross join will be converted into a // projection by inlining normalization rules if vals contains only a single From dc16841840c1258467f3a2d48e581b4180c66994 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Tue, 12 Apr 2022 19:21:38 -0400 Subject: [PATCH 06/12] opt: moves some lookup join generation logic to lookup join package This commit adds a new `lookupjoin` package. Logic for determining the key columns and lookup expressions for lookup joins has been moved to `lookupJoin.ConstraintBuilder`. The code was moved with as few changes as possible, and the behavior does not change in any way. This move will make it easier to test this code in isolation in the future, and allow for further refactoring. Release note: None --- pkg/BUILD.bazel | 1 + pkg/sql/opt/lookupjoin/BUILD.bazel | 40 ++++ pkg/sql/opt/lookupjoin/constraint_builder.go | 213 ++++++++++++------ .../constraint_builder_test.go} | 80 ++++--- .../exports_test.go} | 6 +- pkg/sql/opt/xform/BUILD.bazel | 3 +- pkg/sql/opt/xform/join_funcs.go | 28 +-- 7 files changed, 241 insertions(+), 130 deletions(-) create mode 100644 pkg/sql/opt/lookupjoin/BUILD.bazel rename pkg/sql/opt/{xform/join_funcs_test.go => lookupjoin/constraint_builder_test.go} (77%) rename pkg/sql/opt/{xform/join_funcs_export_test.go => lookupjoin/exports_test.go} (70%) diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index fa908fefb103..e629cf5fa907 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -318,6 +318,7 @@ ALL_TESTS = [ "//pkg/sql/opt/indexrec:indexrec_test", "//pkg/sql/opt/invertedexpr:invertedexpr_test", "//pkg/sql/opt/invertedidx:invertedidx_test", + "//pkg/sql/opt/lookupjoin:lookupjoin_test", "//pkg/sql/opt/memo:memo_test", "//pkg/sql/opt/norm:norm_test", "//pkg/sql/opt/opbench:opbench_test", diff --git a/pkg/sql/opt/lookupjoin/BUILD.bazel b/pkg/sql/opt/lookupjoin/BUILD.bazel new file mode 100644 index 000000000000..649a49a0e21d --- /dev/null +++ b/pkg/sql/opt/lookupjoin/BUILD.bazel @@ -0,0 +1,40 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "lookupjoin", + srcs = ["constraint_builder.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/sql/opt/lookupjoin", + visibility = ["//visibility:public"], + deps = [ + "//pkg/sql/opt", + "//pkg/sql/opt/cat", + "//pkg/sql/opt/constraint", + "//pkg/sql/opt/memo", + "//pkg/sql/opt/norm", + "//pkg/sql/opt/props", + "//pkg/sql/sem/tree", + "//pkg/sql/types", + "@com_github_cockroachdb_errors//:errors", + ], +) + +go_test( + name = "lookupjoin_test", + srcs = [ + "constraint_builder_test.go", + "exports_test.go", + ], + data = glob(["testdata/**"]), + embed = [":lookupjoin"], + deps = [ + "//pkg/settings/cluster", + "//pkg/sql/opt", + "//pkg/sql/opt/constraint", + "//pkg/sql/opt/memo", + "//pkg/sql/opt/norm", + "//pkg/sql/opt/testutils", + "//pkg/sql/opt/testutils/testcat", + "//pkg/sql/sem/tree", + "//pkg/util/leaktest", + ], +) diff --git a/pkg/sql/opt/lookupjoin/constraint_builder.go b/pkg/sql/opt/lookupjoin/constraint_builder.go index 28df6ebc6f6e..2fdb18cf8d52 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder.go @@ -17,17 +17,82 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" + "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" "github.com/cockroachdb/cockroach/pkg/sql/opt/props" + "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" ) -func stub() { - // Find the longest prefix of index key columns that are constrained by - // an equality with another column or a constant. - numIndexKeyCols := index.LaxKeyColumnCount() +// ConstraintBuilder determines how to constrain index key columns for a lookup +// join. See Build for more details. +type ConstraintBuilder struct { + f *norm.Factory + md *opt.Metadata + evalCtx *eval.Context + + inputProps *props.Relational + sp *memo.ScanPrivate + rightCols opt.ColSet + leftEq, rightEq opt.ColList + rightEqSet opt.ColSet + eqColMap opt.ColMap +} + +// Init initializes a ConstraintBuilder. Once initialized, a ConstraintBuilder +// can be reused to build lookup join constraints for all indexes in the given +// ScanPrivate, as long as the join input and ON condition do not change. +func (b *ConstraintBuilder) Init( + f *norm.Factory, + md *opt.Metadata, + evalCtx *eval.Context, + inputProps *props.Relational, + sp *memo.ScanPrivate, + rightCols opt.ColSet, + leftEq, rightEq opt.ColList, +) { + // This initialization pattern ensures that fields are not unwittingly + // reused. Field reuse must be explicit. + *b = ConstraintBuilder{ + f: f, + md: md, + evalCtx: evalCtx, + inputProps: inputProps, + sp: sp, + rightCols: rightCols, + leftEq: leftEq, + rightEq: rightEq, + rightEqSet: rightEq.ToSet(), + } +} - var constraintFilters memo.FiltersExpr +// Build determines how to constrain index key columns for a lookup join. It +// returns either a list of key columns or a lookup expression. It has several +// return values: +// +// keyCols - An ordered list of columns from the left side of the +// join that correspond to a prefix of columns in the given +// index. +// lookupExpr - A lookup expression for multi-span lookup joins. +// inputProjections - Projections of constant values and computed columns that +// must be projected on the lookup join's input. +// constFilters - Filters representing constant equalities and ranges in +// either keyCols or lookupExpr that are used to aid +// selectivity estimation. +// rightSideCols - A list of constrained index columns. +// +// Build will return either non-nil keyCols or a non-nil lookupExpr, but not +// both. If both keyCols and lookupExpr are nil, then the index cannot be used +// for a lookup join. +func (b *ConstraintBuilder) Build( + index cat.Index, onFilters, optionalFilters memo.FiltersExpr, +) ( + keyCols opt.ColList, + lookupExpr memo.FiltersExpr, + inputProjections memo.ProjectionsExpr, + constFilters memo.FiltersExpr, + rightSideCols opt.ColList, +) { allFilters := append(onFilters, optionalFilters...) // Check if the first column in the index either: @@ -39,23 +104,29 @@ func stub() { // // This check doesn't guarantee that we will find lookup join key // columns, but it avoids unnecessary work in most cases. - firstIdxCol := scanPrivate.Table.IndexColumnID(index, 0) - if _, ok := rightEq.Find(firstIdxCol); !ok { - if _, ok := c.findComputedColJoinEquality(scanPrivate.Table, firstIdxCol, rightEqSet); !ok { - if _, _, ok := c.findJoinFilterConstants(allFilters, firstIdxCol); !ok { + firstIdxCol := b.sp.Table.IndexColumnID(index, 0) + if _, ok := b.rightEq.Find(firstIdxCol); !ok { + if _, ok := b.findComputedColJoinEquality(b.sp.Table, firstIdxCol, b.rightEqSet); !ok { + if _, _, ok := b.findJoinFilterConstants(allFilters, firstIdxCol); !ok { return } } } + // Find the longest prefix of index key columns that are constrained by + // an equality with another column or a constant. + numIndexKeyCols := index.LaxKeyColumnCount() + + keyCols = make(opt.ColList, 0, numIndexKeyCols) + rightSideCols = make(opt.ColList, 0, numIndexKeyCols) shouldBuildMultiSpanLookupJoin := false // All the lookup conditions must apply to the prefix of the index and so // the projected columns created must be created in order. for j := 0; j < numIndexKeyCols; j++ { - idxCol := scanPrivate.Table.IndexColumnID(index, j) - if eqIdx, ok := rightEq.Find(idxCol); ok { - lookupJoin.KeyCols = append(lookupJoin.KeyCols, leftEq[eqIdx]) + idxCol := b.sp.Table.IndexColumnID(index, j) + if eqIdx, ok := b.rightEq.Find(idxCol); ok { + keyCols = append(keyCols, b.leftEq[eqIdx]) rightSideCols = append(rightSideCols, idxCol) continue } @@ -66,22 +137,22 @@ func stub() { // and construct a Project expression that wraps the join's input // below. See findComputedColJoinEquality for the requirements to // synthesize a computed column equality constraint. - if expr, ok := c.findComputedColJoinEquality(scanPrivate.Table, idxCol, rightEqSet); ok { - colMeta := md.ColumnMeta(idxCol) - compEqCol := md.AddColumn(fmt.Sprintf("%s_eq", colMeta.Alias), colMeta.Type) + if expr, ok := b.findComputedColJoinEquality(b.sp.Table, idxCol, b.rightEqSet); ok { + colMeta := b.md.ColumnMeta(idxCol) + compEqCol := b.md.AddColumn(fmt.Sprintf("%s_eq", colMeta.Alias), colMeta.Type) // Lazily initialize eqColMap. - if eqColMap.Empty() { - for i := range rightEq { - eqColMap.Set(int(rightEq[i]), int(leftEq[i])) + if b.eqColMap.Empty() { + for i := range b.rightEq { + b.eqColMap.Set(int(b.rightEq[i]), int(b.leftEq[i])) } } // Project the computed column expression, mapping all columns // in rightEq to corresponding columns in leftEq. - projection := c.e.f.ConstructProjectionsItem(c.e.f.RemapCols(expr, eqColMap), compEqCol) + projection := b.f.ConstructProjectionsItem(b.f.RemapCols(expr, b.eqColMap), compEqCol) inputProjections = append(inputProjections, projection) - lookupJoin.KeyCols = append(lookupJoin.KeyCols, compEqCol) + keyCols = append(keyCols, compEqCol) rightSideCols = append(rightSideCols, idxCol) continue } @@ -90,21 +161,21 @@ func stub() { // constant values. We cannot use a NULL value because the lookup // join implements logic equivalent to simple equality between // columns (where NULL never equals anything). - foundVals, allIdx, ok := c.findJoinFilterConstants(allFilters, idxCol) + foundVals, allIdx, ok := b.findJoinFilterConstants(allFilters, idxCol) if ok && len(foundVals) == 1 { // If a single constant value was found, project it in the input // and use it as an equality column. - idxColType := c.e.f.Metadata().ColumnMeta(idxCol).Type - constColID := c.e.f.Metadata().AddColumn( + idxColType := b.md.ColumnMeta(idxCol).Type + constColID := b.md.AddColumn( fmt.Sprintf("lookup_join_const_col_@%d", idxCol), idxColType, ) - inputProjections = append(inputProjections, c.e.f.ConstructProjectionsItem( - c.e.f.ConstructConstVal(foundVals[0], idxColType), + inputProjections = append(inputProjections, b.f.ConstructProjectionsItem( + b.f.ConstructConstVal(foundVals[0], idxColType), constColID, )) - constraintFilters = append(constraintFilters, allFilters[allIdx]) - lookupJoin.KeyCols = append(lookupJoin.KeyCols, constColID) + constFilters = append(constFilters, allFilters[allIdx]) + keyCols = append(keyCols, constColID) rightSideCols = append(rightSideCols, idxCol) continue } @@ -113,7 +184,7 @@ func stub() { if !ok { // If constant values were not found, try to find a filter that // constrains this index column to a range. - _, foundRange = c.findJoinFilterRange(allFilters, idxCol) + _, foundRange = b.findJoinFilterRange(allFilters, idxCol) } // If more than one constant value or a range to constrain the index @@ -149,20 +220,22 @@ func stub() { var eqFilters memo.FiltersExpr extractEqualityFilter := func(leftCol, rightCol opt.ColumnID) memo.FiltersItem { return memo.ExtractJoinEqualityFilter( - leftCol, rightCol, inputProps.OutputCols, rightCols, onFilters, + leftCol, rightCol, b.inputProps.OutputCols, b.rightCols, onFilters, ) } - eqFilters, constraintFilters, rightSideCols = c.findFiltersForIndexLookup( - allFilters, scanPrivate.Table, index, leftEq, rightEq, extractEqualityFilter, + eqFilters, constFilters, rightSideCols = b.findFiltersForIndexLookup( + allFilters, b.sp.Table, index, b.leftEq, b.rightEq, extractEqualityFilter, ) - lookupJoin.LookupExpr = append(eqFilters, constraintFilters...) + lookupExpr = append(eqFilters, constFilters...) - // Reset KeyCols since we're not using it anymore. - lookupJoin.KeyCols = opt.ColList{} - // Reset the input projections since we don't need any constant - // values projected. - inputProjections = nil + // A multi-span lookup join with a lookup expression has no key columns + // and requires no projections on the input. + return nil /* keyCols */, lookupExpr, nil /* inputProjections */, constFilters, rightSideCols } + + // If we did not build a lookup expression, return the key columns we found, + // if any. + return keyCols, nil /* lookupExpr */, inputProjections, constFilters, rightSideCols } // findComputedColJoinEquality returns the computed column expression of col and @@ -217,11 +290,11 @@ func stub() { // can be proven that the expression will never evaluate to NULL. We can use // memo.ExprIsNeverNull to determine this, passing both NOT NULL and equality // columns as notNullCols. -func (c *CustomFuncs) findComputedColJoinEquality( +func (b *ConstraintBuilder) findComputedColJoinEquality( tabID opt.TableID, col opt.ColumnID, eqCols opt.ColSet, ) (_ opt.ScalarExpr, ok bool) { - tabMeta := c.e.mem.Metadata().TableMeta(tabID) - tab := c.e.mem.Metadata().Table(tabID) + tabMeta := b.md.TableMeta(tabID) + tab := b.md.Table(tabID) if tab.Column(tabID.ColumnOrdinal(col)).IsNullable() { return nil, false } @@ -230,7 +303,7 @@ func (c *CustomFuncs) findComputedColJoinEquality( return nil, false } var sharedProps props.Shared - memo.BuildSharedProps(expr, &sharedProps, c.e.evalCtx) + memo.BuildSharedProps(expr, &sharedProps, b.evalCtx) if !sharedProps.OuterCols.SubsetOf(eqCols) { return nil, false } @@ -240,13 +313,13 @@ func (c *CustomFuncs) findComputedColJoinEquality( // findFiltersForIndexLookup finds the equality and constraint filters in // filters that can be used to constrain the given index. Constraint filters // can be either constants or inequality conditions. -func (c *CustomFuncs) findFiltersForIndexLookup( +func (b *ConstraintBuilder) findFiltersForIndexLookup( filters memo.FiltersExpr, tabID opt.TableID, index cat.Index, leftEq, rightEq opt.ColList, extractEqualityFilter func(opt.ColumnID, opt.ColumnID) memo.FiltersItem, -) (eqFilters, constraintFilters memo.FiltersExpr, rightSideCols opt.ColList) { +) (eqFilters, constFilters memo.FiltersExpr, rightSideCols opt.ColList) { numIndexKeyCols := index.LaxKeyColumnCount() eqFilters = make(memo.FiltersExpr, 0, len(filters)) @@ -267,17 +340,17 @@ func (c *CustomFuncs) findFiltersForIndexLookup( // constant values. We cannot use a NULL value because the lookup // join implements logic equivalent to simple equality between // columns (where NULL never equals anything). - values, allIdx, foundConstFilter := c.findJoinFilterConstants(filters, idxCol) + values, allIdx, foundConstFilter := b.findJoinFilterConstants(filters, idxCol) if !foundConstFilter { // If there's no const filters look for an inequality range. - allIdx, foundRange = c.findJoinFilterRange(filters, idxCol) + allIdx, foundRange = b.findJoinFilterRange(filters, idxCol) if !foundRange { break } } - if constraintFilters == nil { - constraintFilters = make(memo.FiltersExpr, 0, numIndexKeyCols-j) + if constFilters == nil { + constFilters = make(memo.FiltersExpr, 0, numIndexKeyCols-j) } // Construct a constant filter as an equality, IN expression, or @@ -285,15 +358,15 @@ func (c *CustomFuncs) findFiltersForIndexLookup( // supported by the lookupJoiner for building lookup spans. if foundConstFilter { constFilter := filters[allIdx] - if !c.isCanonicalLookupJoinFilter(constFilter) { - constFilter = c.makeConstFilter(idxCol, values) + if !isCanonicalFilter(constFilter) { + constFilter = b.makeConstFilter(idxCol, values) } - constraintFilters = append(constraintFilters, constFilter) + constFilters = append(constFilters, constFilter) } // Non-canonical range filters aren't supported and are already filtered // out by findJoinFilterRange above. if foundRange { - constraintFilters = append(constraintFilters, filters[allIdx]) + constFilters = append(constFilters, filters[allIdx]) // Generating additional columns after a range isn't helpful so stop here. break } @@ -304,29 +377,29 @@ func (c *CustomFuncs) findFiltersForIndexLookup( return nil, nil, nil } - return eqFilters, constraintFilters, rightSideCols + return eqFilters, constFilters, rightSideCols } // makeConstFilter builds a filter that constrains the given column to the given // set of constant values. This is performed by either constructing an equality // expression or an IN expression. -func (c *CustomFuncs) makeConstFilter(col opt.ColumnID, values tree.Datums) memo.FiltersItem { +func (b *ConstraintBuilder) makeConstFilter(col opt.ColumnID, values tree.Datums) memo.FiltersItem { if len(values) == 1 { - return c.e.f.ConstructFiltersItem(c.e.f.ConstructEq( - c.e.f.ConstructVariable(col), - c.e.f.ConstructConstVal(values[0], values[0].ResolvedType()), + return b.f.ConstructFiltersItem(b.f.ConstructEq( + b.f.ConstructVariable(col), + b.f.ConstructConstVal(values[0], values[0].ResolvedType()), )) } elems := make(memo.ScalarListExpr, len(values)) elemTypes := make([]*types.T, len(values)) for i := range values { typ := values[i].ResolvedType() - elems[i] = c.e.f.ConstructConstVal(values[i], typ) + elems[i] = b.f.ConstructConstVal(values[i], typ) elemTypes[i] = typ } - return c.e.f.ConstructFiltersItem(c.e.f.ConstructIn( - c.e.f.ConstructVariable(col), - c.e.f.ConstructTuple(elems, types.MakeTuple(elemTypes)), + return b.f.ConstructFiltersItem(b.f.ConstructIn( + b.f.ConstructVariable(col), + b.f.ConstructTuple(elems, types.MakeTuple(elemTypes)), )) } @@ -336,7 +409,7 @@ func (c *CustomFuncs) makeConstFilter(col opt.ColumnID, values tree.Datums) memo // FiltersItem are returned. If multiple filters match, the one that minimizes // the number of returned values is chosen. Note that the returned constant // values do not contain NULL. -func (c *CustomFuncs) findJoinFilterConstants( +func (b *ConstraintBuilder) findJoinFilterConstants( filters memo.FiltersExpr, col opt.ColumnID, ) (values tree.Datums, filterIdx int, ok bool) { var bestValues tree.Datums @@ -344,7 +417,7 @@ func (c *CustomFuncs) findJoinFilterConstants( for filterIdx := range filters { props := filters[filterIdx].ScalarProps() if props.TightConstraints { - constCol, constVals, ok := props.Constraints.HasSingleColumnConstValues(c.e.evalCtx) + constCol, constVals, ok := props.Constraints.HasSingleColumnConstValues(b.evalCtx) if !ok || constCol != col { continue } @@ -368,23 +441,23 @@ func (c *CustomFuncs) findJoinFilterConstants( } // findJoinFilterRange tries to find an inequality range for this column. -func (c *CustomFuncs) findJoinFilterRange( +func (b *ConstraintBuilder) findJoinFilterRange( filters memo.FiltersExpr, col opt.ColumnID, ) (filterIdx int, ok bool) { // canAdvance returns whether non-nil, non-NULL datum can be "advanced" // (i.e. both Next and Prev can be called on it). canAdvance := func(val tree.Datum) bool { - if val.IsMax(c.e.evalCtx) { + if val.IsMax(b.evalCtx) { return false } - _, ok := val.Next(c.e.evalCtx) + _, ok := val.Next(b.evalCtx) if !ok { return false } - if val.IsMin(c.e.evalCtx) { + if val.IsMin(b.evalCtx) { return false } - _, ok = val.Prev(c.e.evalCtx) + _, ok = val.Prev(b.evalCtx) return ok } for filterIdx := range filters { @@ -398,7 +471,7 @@ func (c *CustomFuncs) findJoinFilterRange( // see makeConstFilter). We only support 1 span in the execution // engine so check that. if constraintCol.ID() != col || constraintObj.Spans.Count() != 1 || - !c.isCanonicalLookupJoinFilter(filters[filterIdx]) { + !isCanonicalFilter(filters[filterIdx]) { continue } span := constraintObj.Spans.Get(0) @@ -432,9 +505,9 @@ func (c *CustomFuncs) findJoinFilterRange( return -1, false } -// isCanonicalLookupJoinFilter returns true for the limited set of expr's that are +// isCanonicalFilter returns true for the limited set of expr's that are // supported by the lookup joiner at execution time. -func (c *CustomFuncs) isCanonicalLookupJoinFilter(filter memo.FiltersItem) bool { +func isCanonicalFilter(filter memo.FiltersItem) bool { isVar := func(expr opt.Expr) bool { _, ok := expr.(*memo.VariableExpr) return ok diff --git a/pkg/sql/opt/xform/join_funcs_test.go b/pkg/sql/opt/lookupjoin/constraint_builder_test.go similarity index 77% rename from pkg/sql/opt/xform/join_funcs_test.go rename to pkg/sql/opt/lookupjoin/constraint_builder_test.go index 718bdd8ec5e6..22a9f30b4c65 100644 --- a/pkg/sql/opt/xform/join_funcs_test.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder_test.go @@ -1,4 +1,4 @@ -// Copyright 2021 The Cockroach Authors. +// Copyright 2022 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -8,58 +8,24 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package xform_test +package lookupjoin_test import ( "testing" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/opt" + "github.com/cockroachdb/cockroach/pkg/sql/opt/lookupjoin" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils" "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat" - "github.com/cockroachdb/cockroach/pkg/sql/opt/xform" "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/util/leaktest" ) -type testFilterBuilder struct { - t *testing.T - semaCtx *tree.SemaContext - ctx *eval.Context - o *xform.Optimizer - f *norm.Factory - tbl opt.TableID -} - -func makeFilterBuilder(t *testing.T) testFilterBuilder { - var o xform.Optimizer - ctx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) - o.Init(&ctx, nil) - f := o.Factory() - cat := testcat.New() - if _, err := cat.ExecuteDDL("CREATE TABLE a (i INT PRIMARY KEY, b BOOL)"); err != nil { - t.Fatal(err) - } - tn := tree.NewTableNameWithSchema("t", tree.PublicSchemaName, "a") - tbl := f.Metadata().AddTable(cat.Table(tn), tn) - return testFilterBuilder{ - t: t, - semaCtx: &tree.SemaContext{}, - ctx: &ctx, - o: &o, - f: f, - tbl: tbl, - } -} - -func (fb *testFilterBuilder) buildFilter(str string) memo.FiltersItem { - return testutils.BuildFilters(fb.t, fb.f, fb.semaCtx, fb.ctx, str)[0] -} - -func TestCustomFuncs_isCanonicalFilter(t *testing.T) { +func TestIsCanonicalFilter(t *testing.T) { defer leaktest.AfterTest(t)() fb := makeFilterBuilder(t) @@ -102,14 +68,44 @@ func TestCustomFuncs_isCanonicalFilter(t *testing.T) { want: false, }, } - fut := xform.TestingIsCanonicalLookupJoinFilter for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - c := fb.o.CustomFuncs() filter := fb.buildFilter(tt.filter) - if got := fut(c, filter); got != tt.want { - t.Errorf("isCanonicalLookupJoinFilter() = %v, want %v", got, tt.want) + if got := lookupjoin.TestingIsCanonicalLookupJoinFilter(filter); got != tt.want { + t.Errorf("isCanonicalFilter() = %v, want %v", got, tt.want) } }) } } + +type testFilterBuilder struct { + t *testing.T + semaCtx *tree.SemaContext + evalCtx *eval.Context + f *norm.Factory + tbl opt.TableID +} + +func makeFilterBuilder(t *testing.T) testFilterBuilder { + evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) + var f norm.Factory + f.Init(&evalCtx, nil) + cat := testcat.New() + if _, err := cat.ExecuteDDL("CREATE TABLE a (i INT PRIMARY KEY, b BOOL)"); err != nil { + t.Fatal(err) + } + tn := tree.NewTableNameWithSchema("t", tree.PublicSchemaName, "a") + tbl := f.Metadata().AddTable(cat.Table(tn), tn) + return testFilterBuilder{ + t: t, + semaCtx: &tree.SemaContext{}, + evalCtx: &evalCtx, + // o: &o, + f: &f, + tbl: tbl, + } +} + +func (fb *testFilterBuilder) buildFilter(str string) memo.FiltersItem { + return testutils.BuildFilters(fb.t, fb.f, fb.semaCtx, fb.evalCtx, str)[0] +} diff --git a/pkg/sql/opt/xform/join_funcs_export_test.go b/pkg/sql/opt/lookupjoin/exports_test.go similarity index 70% rename from pkg/sql/opt/xform/join_funcs_export_test.go rename to pkg/sql/opt/lookupjoin/exports_test.go index 1d97e9d402e3..fe63f93c2246 100644 --- a/pkg/sql/opt/xform/join_funcs_export_test.go +++ b/pkg/sql/opt/lookupjoin/exports_test.go @@ -1,4 +1,4 @@ -// Copyright 2021 The Cockroach Authors. +// Copyright 2022 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -8,6 +8,6 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package xform +package lookupjoin -var TestingIsCanonicalLookupJoinFilter = (*CustomFuncs).isCanonicalLookupJoinFilter +var TestingIsCanonicalLookupJoinFilter = isCanonicalFilter diff --git a/pkg/sql/opt/xform/BUILD.bazel b/pkg/sql/opt/xform/BUILD.bazel index 15704af86fba..f51af7ab55bc 100644 --- a/pkg/sql/opt/xform/BUILD.bazel +++ b/pkg/sql/opt/xform/BUILD.bazel @@ -34,6 +34,7 @@ go_library( "//pkg/sql/opt/idxconstraint", "//pkg/sql/opt/invertedexpr", "//pkg/sql/opt/invertedidx", + "//pkg/sql/opt/lookupjoin", "//pkg/sql/opt/memo", "//pkg/sql/opt/norm", "//pkg/sql/opt/ordering", @@ -62,8 +63,6 @@ go_test( srcs = [ "coster_test.go", "general_funcs_test.go", - "join_funcs_export_test.go", - "join_funcs_test.go", "join_order_builder_test.go", "main_test.go", "optimizer_test.go", diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 10357e154f7e..ab648ff02065 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" "github.com/cockroachdb/cockroach/pkg/sql/opt/invertedidx" + "github.com/cockroachdb/cockroach/pkg/sql/opt/lookupjoin" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/ordering" "github.com/cockroachdb/cockroach/pkg/sql/opt/partition" @@ -344,7 +345,6 @@ func (c *CustomFuncs) generateLookupJoinsImpl( if len(leftEq) == 0 { return } - rightEqSet := rightEq.ToSet() // Generate implicit filters from CHECK constraints and computed columns as // optional filters to help generate lookup join keys. @@ -353,8 +353,9 @@ func (c *CustomFuncs) generateLookupJoinsImpl( optionalFilters = append(optionalFilters, computedColFilters...) var pkCols opt.ColList - var eqColMap opt.ColMap var iter scanIndexIter + var cb lookupjoin.ConstraintBuilder + cb.Init(c.e.f, c.e.mem.Metadata(), c.e.evalCtx, inputProps, scanPrivate, rightCols, leftEq, rightEq) iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, scanPrivate, on, rejectInvertedIndexes) iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, _ bool, _ memo.ProjectionsExpr) { // Skip indexes that do no cover all virtual projection columns, if @@ -368,21 +369,22 @@ func (c *CustomFuncs) generateLookupJoinsImpl( return } + keyCols, lookupExpr, inputProjections, constFilters, rightSideCols := + cb.Build(index, onFilters, optionalFilters) + if len(keyCols) == 0 && len(lookupExpr) == 0 { + // We couldn't find equality columns or a lookup expression to + // perform a lookup join on this index. + return + } + lookupJoin := memo.LookupJoinExpr{Input: input} lookupJoin.JoinPrivate = *joinPrivate lookupJoin.JoinType = joinType lookupJoin.Table = scanPrivate.Table lookupJoin.Index = index.Ordinal() lookupJoin.Locking = scanPrivate.Locking - - lookupJoin.KeyCols = make(opt.ColList, 0, numIndexKeyCols) - rightSideCols := make(opt.ColList, 0, numIndexKeyCols) - var inputProjections memo.ProjectionsExpr - - if len(lookupJoin.KeyCols) == 0 && len(lookupJoin.LookupExpr) == 0 { - // We couldn't find equality columns which we can lookup. - return - } + lookupJoin.KeyCols = keyCols + lookupJoin.LookupExpr = lookupExpr // Wrap the input in a Project if any projections are required. The // lookup join will project away these synthesized columns. @@ -406,8 +408,8 @@ func (c *CustomFuncs) generateLookupJoinsImpl( lookupJoin.On = memo.ExtractRemainingJoinFilters(lookupJoin.On, lookupJoin.KeyCols, rightSideCols) } lookupJoin.On = lookupJoin.On.Difference(lookupJoin.LookupExpr) - lookupJoin.On = lookupJoin.On.Difference(constraintFilters) - lookupJoin.ConstFilters = constraintFilters + lookupJoin.On = lookupJoin.On.Difference(constFilters) + lookupJoin.ConstFilters = constFilters // Add input columns and lookup expression columns, since these will be // needed for all join types and cases. From ce650e32fed4badc0f013f02f9d7b9767bd52b7a Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 13 Apr 2022 11:06:12 -0400 Subject: [PATCH 07/12] opt: generalize lookupjoin.ConstraintBuilder API This commit makes the lookupjoin.ConstraintBuilder API more general to make unit testing easier in a future commit. Release note: None --- pkg/sql/opt/lookupjoin/constraint_builder.go | 41 ++++++++++++-------- pkg/sql/opt/xform/join_funcs.go | 3 +- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/pkg/sql/opt/lookupjoin/constraint_builder.go b/pkg/sql/opt/lookupjoin/constraint_builder.go index 2fdb18cf8d52..16e1411043ae 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder.go @@ -31,24 +31,31 @@ type ConstraintBuilder struct { md *opt.Metadata evalCtx *eval.Context - inputProps *props.Relational - sp *memo.ScanPrivate - rightCols opt.ColSet + // The table on the right side of the join to perform the lookup into. + table opt.TableID + // The columns on the left and right side of the join. + leftCols, rightCols opt.ColSet + // The columns on the left and right side of the join with equality + // conditions, i.e., the column at leftEq[i] is held equal to the column at + // rightEq[i]. leftEq, rightEq opt.ColList - rightEqSet opt.ColSet - eqColMap opt.ColMap + // The set of columns in rightEq. + rightEqSet opt.ColSet + // A map of columns in rightEq to their corresponding columns in leftEq. + // This is used to remap computed column expressions, and is only + // initialized if needed. + eqColMap opt.ColMap } // Init initializes a ConstraintBuilder. Once initialized, a ConstraintBuilder // can be reused to build lookup join constraints for all indexes in the given -// ScanPrivate, as long as the join input and ON condition do not change. +// table, as long as the join input and ON condition do not change. func (b *ConstraintBuilder) Init( f *norm.Factory, md *opt.Metadata, evalCtx *eval.Context, - inputProps *props.Relational, - sp *memo.ScanPrivate, - rightCols opt.ColSet, + table opt.TableID, + leftCols, rightCols opt.ColSet, leftEq, rightEq opt.ColList, ) { // This initialization pattern ensures that fields are not unwittingly @@ -57,8 +64,8 @@ func (b *ConstraintBuilder) Init( f: f, md: md, evalCtx: evalCtx, - inputProps: inputProps, - sp: sp, + table: table, + leftCols: leftCols, rightCols: rightCols, leftEq: leftEq, rightEq: rightEq, @@ -104,9 +111,9 @@ func (b *ConstraintBuilder) Build( // // This check doesn't guarantee that we will find lookup join key // columns, but it avoids unnecessary work in most cases. - firstIdxCol := b.sp.Table.IndexColumnID(index, 0) + firstIdxCol := b.table.IndexColumnID(index, 0) if _, ok := b.rightEq.Find(firstIdxCol); !ok { - if _, ok := b.findComputedColJoinEquality(b.sp.Table, firstIdxCol, b.rightEqSet); !ok { + if _, ok := b.findComputedColJoinEquality(b.table, firstIdxCol, b.rightEqSet); !ok { if _, _, ok := b.findJoinFilterConstants(allFilters, firstIdxCol); !ok { return } @@ -124,7 +131,7 @@ func (b *ConstraintBuilder) Build( // All the lookup conditions must apply to the prefix of the index and so // the projected columns created must be created in order. for j := 0; j < numIndexKeyCols; j++ { - idxCol := b.sp.Table.IndexColumnID(index, j) + idxCol := b.table.IndexColumnID(index, j) if eqIdx, ok := b.rightEq.Find(idxCol); ok { keyCols = append(keyCols, b.leftEq[eqIdx]) rightSideCols = append(rightSideCols, idxCol) @@ -137,7 +144,7 @@ func (b *ConstraintBuilder) Build( // and construct a Project expression that wraps the join's input // below. See findComputedColJoinEquality for the requirements to // synthesize a computed column equality constraint. - if expr, ok := b.findComputedColJoinEquality(b.sp.Table, idxCol, b.rightEqSet); ok { + if expr, ok := b.findComputedColJoinEquality(b.table, idxCol, b.rightEqSet); ok { colMeta := b.md.ColumnMeta(idxCol) compEqCol := b.md.AddColumn(fmt.Sprintf("%s_eq", colMeta.Alias), colMeta.Type) @@ -220,11 +227,11 @@ func (b *ConstraintBuilder) Build( var eqFilters memo.FiltersExpr extractEqualityFilter := func(leftCol, rightCol opt.ColumnID) memo.FiltersItem { return memo.ExtractJoinEqualityFilter( - leftCol, rightCol, b.inputProps.OutputCols, b.rightCols, onFilters, + leftCol, rightCol, b.leftCols, b.rightCols, onFilters, ) } eqFilters, constFilters, rightSideCols = b.findFiltersForIndexLookup( - allFilters, b.sp.Table, index, b.leftEq, b.rightEq, extractEqualityFilter, + allFilters, b.table, index, b.leftEq, b.rightEq, extractEqualityFilter, ) lookupExpr = append(eqFilters, constFilters...) diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index ab648ff02065..446fdc0dd928 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -355,7 +355,8 @@ func (c *CustomFuncs) generateLookupJoinsImpl( var pkCols opt.ColList var iter scanIndexIter var cb lookupjoin.ConstraintBuilder - cb.Init(c.e.f, c.e.mem.Metadata(), c.e.evalCtx, inputProps, scanPrivate, rightCols, leftEq, rightEq) + cb.Init(c.e.f, c.e.mem.Metadata(), c.e.evalCtx, scanPrivate.Table, + inputProps.OutputCols, rightCols, leftEq, rightEq) iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, scanPrivate, on, rejectInvertedIndexes) iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, _ bool, _ memo.ProjectionsExpr) { // Skip indexes that do no cover all virtual projection columns, if From d79341526e91383af50a1fe72bf71d83f6f17ac2 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 13 Apr 2022 14:26:21 -0400 Subject: [PATCH 08/12] opt: add data-driven tests for lookupjoin.ConstraintBuilder Release note: None --- pkg/sql/opt/lookupjoin/BUILD.bazel | 6 + .../opt/lookupjoin/constraint_builder_test.go | 257 ++++++++++++++++++ pkg/sql/opt/lookupjoin/testdata/computed | 62 +++++ pkg/sql/opt/lookupjoin/testdata/key_cols | 90 ++++++ pkg/sql/opt/lookupjoin/testdata/lookup_expr | 162 +++++++++++ 5 files changed, 577 insertions(+) create mode 100644 pkg/sql/opt/lookupjoin/testdata/computed create mode 100644 pkg/sql/opt/lookupjoin/testdata/key_cols create mode 100644 pkg/sql/opt/lookupjoin/testdata/lookup_expr diff --git a/pkg/sql/opt/lookupjoin/BUILD.bazel b/pkg/sql/opt/lookupjoin/BUILD.bazel index 649a49a0e21d..20182fa5be74 100644 --- a/pkg/sql/opt/lookupjoin/BUILD.bazel +++ b/pkg/sql/opt/lookupjoin/BUILD.bazel @@ -30,11 +30,17 @@ go_test( "//pkg/settings/cluster", "//pkg/sql/opt", "//pkg/sql/opt/constraint", + "//pkg/sql/opt/exec/execbuilder", "//pkg/sql/opt/memo", "//pkg/sql/opt/norm", + "//pkg/sql/opt/optbuilder", + "//pkg/sql/opt/props", "//pkg/sql/opt/testutils", "//pkg/sql/opt/testutils/testcat", + "//pkg/sql/parser", "//pkg/sql/sem/tree", + "//pkg/testutils", "//pkg/util/leaktest", + "@com_github_cockroachdb_datadriven//:datadriven", ], ) diff --git a/pkg/sql/opt/lookupjoin/constraint_builder_test.go b/pkg/sql/opt/lookupjoin/constraint_builder_test.go index 22a9f30b4c65..690af727666e 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder_test.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder_test.go @@ -11,20 +11,184 @@ package lookupjoin_test import ( + "context" + "fmt" + "strings" "testing" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/opt" + "github.com/cockroachdb/cockroach/pkg/sql/opt/exec/execbuilder" "github.com/cockroachdb/cockroach/pkg/sql/opt/lookupjoin" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" + "github.com/cockroachdb/cockroach/pkg/sql/opt/optbuilder" + "github.com/cockroachdb/cockroach/pkg/sql/opt/props" "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils" "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat" + "github.com/cockroachdb/cockroach/pkg/sql/parser" "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + tu "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/datadriven" ) +// The test files support only one command: +// +// - lookup-constraints [arg | arg=val | arg=(val1,val2, ...)]... +// +// Builds lookup join constrains for the given left and right table +// definitions, and an index for the right table.Arguments: +// +// - left=( [not null] [as [stored|virtual], ...) +// +// Information about the left columns. + +// - right=( [not null] [as [stored|virtual], ...) +// +// Information about the left columns. +// +// - index=( [asc|desc], ...) +// +// Information for the index on the right table. +// +func TestLookupConstraints(t *testing.T) { + defer leaktest.AfterTest(t)() + + datadriven.Walk(t, tu.TestDataPath(t), func(t *testing.T, path string) { + semaCtx := tree.MakeSemaContext() + evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) + + datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { + testCatalog := testcat.New() + var f norm.Factory + f.Init(&evalCtx, testCatalog) + md := f.Metadata() + + for _, arg := range d.CmdArgs { + key, vals := arg.Key, arg.Vals + cols := strings.Join(vals, ", ") + switch key { + case "left", "right": + fmt.Printf("CREATE TABLE %s (%s)\n", key, cols) + _, err := testCatalog.ExecuteDDL(fmt.Sprintf("CREATE TABLE %s_table (%s)", key, cols)) + if err != nil { + d.Fatalf(t, "%v", err) + } + + case "index": + _, err := testCatalog.ExecuteDDL(fmt.Sprintf("CREATE INDEX ON right_table (%s)", cols)) + if err != nil { + d.Fatalf(t, "%v", err) + } + + default: + d.Fatalf(t, "unknown argument: %s", key) + } + } + + addTable := func(name string) (_ opt.TableID, cols opt.ColSet, _ error) { + tn := tree.NewUnqualifiedTableName(tree.Name(name)) + table := testCatalog.Table(tn) + tableID := md.AddTable(table, tn) + for i, n := 0, md.Table(tableID).ColumnCount(); i < n; i++ { + colID := tableID.ColumnID(i) + cols.Add(colID) + col := md.Table(tableID).Column(i) + if col.IsComputed() { + expr, err := parser.ParseExpr(col.ComputedExprStr()) + if err != nil { + return 0, opt.ColSet{}, err + } + b := optbuilder.NewScalar(context.Background(), &semaCtx, &evalCtx, &f) + if err := b.Build(expr); err != nil { + return 0, opt.ColSet{}, err + } + compExpr := f.Memo().RootExpr().(opt.ScalarExpr) + md.TableMeta(tableID).AddComputedCol(colID, compExpr) + } + } + return tableID, cols, nil + } + + _, leftCols, err := addTable("left_table") + if err != nil { + t.Fatal(err) + } + rightTable, rightCols, err := addTable("right_table") + if err != nil { + t.Fatal(err) + } + index := md.Table(rightTable).Index(1) + allCols := leftCols.Union(rightCols) + + switch d.Cmd { + case "lookup-constraints": + // Allow specifying optional filters using the "optional:" delimiter. + var filters, optionalFilters memo.FiltersExpr + var err error + if idx := strings.Index(d.Input, "optional:"); idx >= 0 { + optional := d.Input[idx+len("optional:"):] + optionalFilters, err = makeFilters(optional, allCols, &semaCtx, &evalCtx, &f) + if err != nil { + d.Fatalf(t, "%v", err) + } + d.Input = d.Input[:idx] + } + if filters, err = makeFilters(d.Input, allCols, &semaCtx, &evalCtx, &f); err != nil { + d.Fatalf(t, "%v", err) + } + + leftEq, rightEq := memo.ExtractJoinEqualityColumns(leftCols, rightCols, filters) + if len(leftEq) == 0 { + return "lookup join requires equality columns" + } + + var cb lookupjoin.ConstraintBuilder + cb.Init(&f, md, f.EvalContext(), rightTable, leftCols, rightCols, leftEq, rightEq) + keyCols, lookupExpr, inputProjections, _, _ := + cb.Build(index, filters, optionalFilters) + var b strings.Builder + if len(keyCols) == 0 && len(lookupExpr) == 0 { + b.WriteString("lookup join not possible") + } + if len(keyCols) > 0 { + b.WriteString("key cols:\n") + for i := range keyCols { + b.WriteString(" ") + b.WriteString(string(index.Column(i).ColName())) + b.WriteString(" = ") + b.WriteString(md.ColumnMeta(keyCols[i]).Alias) + b.WriteString("\n") + } + } + if len(inputProjections) > 0 { + b.WriteString("input projections:\n") + for i := range inputProjections { + col := inputProjections[i].Col + b.WriteString(" ") + b.WriteString(md.ColumnMeta(col).Alias) + b.WriteString(" = ") + b.WriteString(formatScalar(inputProjections[i].Element, &f, &evalCtx)) + b.WriteString("\n") + } + } + if len(lookupExpr) > 0 { + b.WriteString("lookup expression:\n ") + b.WriteString(formatScalar(&lookupExpr, &f, &evalCtx)) + b.WriteString("\n") + } + return b.String() + + default: + d.Fatalf(t, "unsupported command: %s", d.Cmd) + return "" + } + }) + }) +} + func TestIsCanonicalFilter(t *testing.T) { defer leaktest.AfterTest(t)() fb := makeFilterBuilder(t) @@ -78,6 +242,99 @@ func TestIsCanonicalFilter(t *testing.T) { } } +// makeFilters returns a FiltersExpr generated from the input string that is +// normalized within the context of a Select. By normalizing within a Select, +// rules that only match on Selects are applied, such as SimplifySelectFilters. +// This ensures that these test filters mimic the filters that will be created +// during a real query. +// TODO(mgartner): This function is a duplicate of one in the partialidx_test +// package. Extract this to a utility that can be used in both packages. +func makeFilters( + input string, cols opt.ColSet, semaCtx *tree.SemaContext, evalCtx *eval.Context, f *norm.Factory, +) (memo.FiltersExpr, error) { + filters, err := makeFiltersExpr(input, semaCtx, evalCtx, f) + if err != nil { + return nil, err + } + + // Create an output set of columns for the fake relation with all the + // columns in the test case. + colStatsMap := props.ColStatsMap{} + cols.ForEach(func(col opt.ColumnID) { + colStat, _ := colStatsMap.Add(opt.MakeColSet(col)) + colStat.DistinctCount = 100 + colStat.NullCount = 10 + }) + + // Create a non-zero cardinality to prevent the fake Select from + // simplifying into a ValuesExpr. + card := props.Cardinality{Min: 0, Max: 1} + + // Create stats for the fake relation. + stats := props.Statistics{ + Available: true, + RowCount: 1000, + ColStats: colStatsMap, + Selectivity: props.OneSelectivity, + } + + // Create a fake Select and input so that normalization rules are run. + p := &props.Relational{OutputCols: cols, Cardinality: card, Stats: stats} + fakeRel := f.ConstructFakeRel(&memo.FakeRelPrivate{Props: p}) + sel := f.ConstructSelect(fakeRel, filters) + + // If the normalized relational expression is a Select, return the filters. + if s, ok := sel.(*memo.SelectExpr); ok { + return s.Filters, nil + } + + // Otherwise, the filters may be either true or false. Check the cardinality + // to determine which one. + if sel.Relational().Cardinality.IsZero() { + return memo.FiltersExpr{f.ConstructFiltersItem(memo.FalseSingleton)}, nil + } + + return memo.TrueFilter, nil +} + +// makeFiltersExpr returns a FiltersExpr generated from the input string. +func makeFiltersExpr( + input string, semaCtx *tree.SemaContext, evalCtx *eval.Context, f *norm.Factory, +) (memo.FiltersExpr, error) { + expr, err := parser.ParseExpr(input) + if err != nil { + return nil, err + } + + b := optbuilder.NewScalar(context.Background(), semaCtx, evalCtx, f) + if err := b.Build(expr); err != nil { + return nil, err + } + + root := f.Memo().RootExpr().(opt.ScalarExpr) + + return memo.FiltersExpr{f.ConstructFiltersItem(root)}, nil +} + +func formatScalar(e opt.Expr, f *norm.Factory, evalCtx *eval.Context) string { + execBld := execbuilder.New( + nil /* execFactory */, nil /* optimizer */, f.Memo(), nil, /* catalog */ + e, evalCtx, false, /* allowAutoCommit */ + ) + expr, err := execBld.BuildScalar() + if err != nil { + return fmt.Sprintf("error: %v\n", err) + } + fmtCtx := tree.NewFmtCtx( + tree.FmtSimple, + tree.FmtIndexedVarFormat(func(ctx *tree.FmtCtx, idx int) { + ctx.WriteString(f.Metadata().ColumnMeta(opt.ColumnID(idx + 1)).Alias) + }), + ) + expr.Format(fmtCtx) + return fmtCtx.String() +} + type testFilterBuilder struct { t *testing.T semaCtx *tree.SemaContext diff --git a/pkg/sql/opt/lookupjoin/testdata/computed b/pkg/sql/opt/lookupjoin/testdata/computed new file mode 100644 index 000000000000..a657e7102fd0 --- /dev/null +++ b/pkg/sql/opt/lookupjoin/testdata/computed @@ -0,0 +1,62 @@ +# Tests for computed columns. + +lookup-constraints left=(a int, b int) right=(x int, v int not null as (x + 10) stored) index=(v, x) +x = a +---- +key cols: + v = v_eq + x = a +input projections: + v_eq = a + 10 + +lookup-constraints left=(a int, b int) right=(x int, v int not null as (x + 10) virtual) index=(v, x) +x = a +---- +key cols: + v = v_eq + x = a +input projections: + v_eq = a + 10 + +# TODO(mgartner): We should be able to generate a lookup join by determining +# that v is not null because the filter demands that x is not null, and v is +# calculated from x. +lookup-constraints left=(a int, b int) right=(x int, v int as (x + 10) virtual) index=(v, x) +x = a +---- +lookup join not possible + +lookup-constraints left=(a int, b int) right=(x int, y int, v int not null as (x + 10) virtual) index=(v, x, y) +x = a AND y = b +---- +key cols: + v = v_eq + x = a + y = b +input projections: + v_eq = a + 10 + +lookup-constraints left=(a int, b int) right=(x int, y int, v int not null as (x + 10) virtual) index=(v, x, y) +x = a AND y = 1 +---- +key cols: + v = v_eq + x = a + y = lookup_join_const_col_@7 +input projections: + v_eq = a + 10 + lookup_join_const_col_@7 = 1 + +# TODO(mgartner): We should be able to generate a lookup join constraint by +# projecting an expression for v. +lookup-constraints left=(a int, b int) right=(x int, y int, v int not null as (x + 10) virtual) index=(v, x, y) +x = a AND y IN (1, 2) +---- +lookup join not possible + +# TODO(mgartner): We should be able to generate a lookup join constraint by +# projecting an expression for v. +lookup-constraints left=(a int, b int) right=(x int, y int, v int not null as (x + 10) virtual) index=(v, x, y) +x = a AND y > 0 +---- +lookup join not possible diff --git a/pkg/sql/opt/lookupjoin/testdata/key_cols b/pkg/sql/opt/lookupjoin/testdata/key_cols new file mode 100644 index 000000000000..c903b232c4d4 --- /dev/null +++ b/pkg/sql/opt/lookupjoin/testdata/key_cols @@ -0,0 +1,90 @@ +# This is a comment. +lookup-constraints left=(a int) right=(x int) index=(x) +x = a +---- +key cols: + x = a + +lookup-constraints left=(a int) right=(x int, y int) index=(x) +y = a +---- +lookup join not possible + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = a +---- +key cols: + x = a + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = a AND y = b +---- +key cols: + x = a + y = b + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = a AND z = b +---- +key cols: + x = a + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = a AND y = b AND z = c +---- +key cols: + x = a + y = b + z = c + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y) +x = a AND y = b AND z = c +---- +key cols: + x = a + y = b + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +y = b AND z = c +---- +lookup join not possible + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = 1 AND y = b +---- +key cols: + x = lookup_join_const_col_@7 + y = b +input projections: + lookup_join_const_col_@7 = 1 + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = 1 AND y = 2 AND z = c +---- +key cols: + x = lookup_join_const_col_@7 + y = lookup_join_const_col_@8 + z = c +input projections: + lookup_join_const_col_@7 = 1 + lookup_join_const_col_@8 = 2 + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = 1 AND y = b AND z = 3 +---- +key cols: + x = lookup_join_const_col_@7 + y = b + z = lookup_join_const_col_@9 +input projections: + lookup_join_const_col_@7 = 1 + lookup_join_const_col_@9 = 3 + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y) +x = 1 AND y = b AND z = 3 +---- +key cols: + x = lookup_join_const_col_@7 + y = b +input projections: + lookup_join_const_col_@7 = 1 diff --git a/pkg/sql/opt/lookupjoin/testdata/lookup_expr b/pkg/sql/opt/lookupjoin/testdata/lookup_expr new file mode 100644 index 000000000000..889dc27159c9 --- /dev/null +++ b/pkg/sql/opt/lookupjoin/testdata/lookup_expr @@ -0,0 +1,162 @@ +# Tests for IN filters. + +lookup-constraints left=(a int, b int) right=(x int, y int, z int) index=(x, y) +x IN (1, 2, 3) AND y = b +---- +lookup expression: + (y = b) AND (x IN (1, 2, 3)) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x IN (1, 2, 3) AND y = b AND z = c +---- +lookup expression: + ((y = b) AND (z = c)) AND (x IN (1, 2, 3)) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y) +x IN (1, 2, 3) AND y = b AND z = c +---- +lookup expression: + (y = b) AND (x IN (1, 2, 3)) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x IN (1, 2, 3) AND y = 4 AND z = c +---- +lookup expression: + ((z = c) AND (x IN (1, 2, 3))) AND (y = 4) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x IN (1, 2, 3) AND y = b AND z = 1 +---- +lookup expression: + ((y = b) AND (x IN (1, 2, 3))) AND (z = 1) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x IN (1, 2, 3) AND y = b AND z IN (4, 5, 6) +---- +lookup expression: + ((y = b) AND (x IN (1, 2, 3))) AND (z IN (4, 5, 6)) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x IN (1, 2, 3) AND y = b AND z IN (4, 5, 6) +---- +lookup expression: + ((y = b) AND (x IN (1, 2, 3))) AND (z IN (4, 5, 6)) + +lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y) +y = b +optional: x IN (1, 2, 3) +---- +lookup expression: + (y = b) AND (x IN (1, 2, 3)) + +lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y) +x = a +optional: y IN (1, 2, 3) +---- +lookup expression: + (x = a) AND (y IN (1, 2, 3)) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = 1 AND z = c +optional: y IN (3, 4) +---- +lookup expression: + ((z = c) AND (x = 1)) AND (y IN (3, 4)) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +z = c +optional: x IN (1, 2) AND y IN (3, 4) +---- +lookup expression: + ((z = c) AND (x IN (1, 2))) AND (y IN (3, 4)) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +y = b +optional: x IN (1, 2) AND z IN (3, 4) +---- +lookup expression: + ((y = b) AND (x IN (1, 2))) AND (z IN (3, 4)) + +# TODO(#75596): The lookup expression should not contain (z IN (3, 4)) because +# it is an optional filter from a CHECK constraint. It will only increase the +# number of lookup spans generated without increasing the selectivity of the +# lookup. +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = 1 AND y = b +optional: z IN (3, 4) +---- +lookup expression: + ((y = b) AND (x = 1)) AND (z IN (3, 4)) + + +# Test for range filters. + +lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y) +x = a AND y > 0 +---- +lookup expression: + (x = a) AND (y > 0) + +lookup-constraints left=(a int, b int) right=(x int, y int) index=(x desc, y desc) +x = a AND y > 0 +---- +lookup expression: + (x = a) AND (y > 0) + +lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y) +x > 0 AND y = b +---- +lookup join not possible + +lookup-constraints left=(a int, b int) right=(x int, y int) index=(x, y) +x = a +optional: y > 0 +---- +lookup expression: + (x = a) AND (y > 0) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = a AND y = b AND z > 0 +---- +lookup expression: + ((x = a) AND (y = b)) AND (z > 0) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = 1 AND y = b AND z > 0 +---- +lookup expression: + ((y = b) AND (x = 1)) AND (z > 0) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = a AND y = 1 AND z > 0 +---- +lookup expression: + ((x = a) AND (y = 1)) AND (z > 0) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x = 1 AND y = b +optional: z > 0 +---- +lookup expression: + ((y = b) AND (x = 1)) AND (z > 0) + + +# Test for range filters and IN filters. + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x IN (1, 2) AND y = b AND z > 0 +---- +lookup expression: + ((y = b) AND (x IN (1, 2))) AND (z > 0) + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +x IN (1, 2) AND y > 0 AND z = c +---- +lookup join not possible + +lookup-constraints left=(a int, b int, c int) right=(x int, y int, z int) index=(x, y, z) +y = b AND z > 0 +optional: x IN (1, 2) +---- +lookup expression: + ((y = b) AND (x IN (1, 2))) AND (z > 0) From 84a67d15ac10fe25bedd496f1bd8735c0a43942d Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 28 Apr 2022 14:13:30 +0000 Subject: [PATCH 09/12] kvclient: fix gRPC stream leak in rangefeed client When the DistSender rangefeed client received a `RangeFeedError` message and propagated a retryable error up the stack, it would fail to close the existing gRPC stream, causing stream/goroutine leaks. Release note (bug fix): Fixed a goroutine leak when internal rangefeed clients received certain kinds of retriable errors. --- pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go b/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go index 5228c57301a7..b7b8fad40499 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go @@ -388,6 +388,10 @@ func (ds *DistSender) singleRangeFeed( eventCh chan<- *roachpb.RangeFeedEvent, onRangeEvent onRangeEventCb, ) (hlc.Timestamp, error) { + // Ensure context is cancelled on all errors, to prevent gRPC stream leaks. + ctx, cancel := context.WithCancel(ctx) + defer cancel() + args := roachpb.RangeFeedRequest{ Span: span, Header: roachpb.Header{ From 5033bb4aadace5f649b6e87a7581f11bfd7aeaf8 Mon Sep 17 00:00:00 2001 From: Aditya Maru Date: Thu, 28 Apr 2022 20:10:06 -0400 Subject: [PATCH 10/12] joberror: add ConnectionReset/ConnectionRefused to retryable err allow list Bulk jobs will no longer treat `sysutil.IsErrConnectionReset` and `sysutil.IsErrConnectionRefused` as permanent errors. IMPORT, RESTORE and BACKUP will treat this error as transient and retry. Release note: None --- pkg/jobs/joberror/BUILD.bazel | 1 + pkg/jobs/joberror/errors.go | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/jobs/joberror/BUILD.bazel b/pkg/jobs/joberror/BUILD.bazel index 2b6cac7e92bd..5774616a65ae 100644 --- a/pkg/jobs/joberror/BUILD.bazel +++ b/pkg/jobs/joberror/BUILD.bazel @@ -10,6 +10,7 @@ go_library( "//pkg/sql/flowinfra", "//pkg/util/circuit", "//pkg/util/grpcutil", + "//pkg/util/sysutil", "@com_github_cockroachdb_errors//:errors", ], ) diff --git a/pkg/jobs/joberror/errors.go b/pkg/jobs/joberror/errors.go index 40f3b36ce3f4..7d2d7a76d0a9 100644 --- a/pkg/jobs/joberror/errors.go +++ b/pkg/jobs/joberror/errors.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/flowinfra" "github.com/cockroachdb/cockroach/pkg/util/circuit" "github.com/cockroachdb/cockroach/pkg/util/grpcutil" + "github.com/cockroachdb/cockroach/pkg/util/sysutil" "github.com/cockroachdb/errors" ) @@ -45,9 +46,9 @@ func isBreakerOpenError(err error) bool { } // IsPermanentBulkJobError returns true if the error results in a permanent -// failure of a bulk job (IMPORT, BACKUP, RESTORE). This function is a allowlist -// instead of a blocklist: only known safe errors are confirmed to not be -// permanent errors. Anything unknown is assumed to be permanent. +// failure of a bulk job (IMPORT, BACKUP, RESTORE). This function is an +// allowlist instead of a blocklist: only known safe errors are confirmed to not +// be permanent errors. Anything unknown is assumed to be permanent. func IsPermanentBulkJobError(err error) bool { if err == nil { return false @@ -57,5 +58,7 @@ func IsPermanentBulkJobError(err error) bool { !grpcutil.IsClosedConnection(err) && !flowinfra.IsNoInboundStreamConnectionError(err) && !kvcoord.IsSendError(err) && - !isBreakerOpenError(err) + !isBreakerOpenError(err) && + !sysutil.IsErrConnectionReset(err) && + !sysutil.IsErrConnectionRefused(err) } From bd36058ea99159404518bf9a724cf989afc1a3f4 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Fri, 29 Apr 2022 02:21:40 -0400 Subject: [PATCH 11/12] backupccl: break dependency to testcluster Noticed we were building testing library packages when building CRDB binaries. $ bazel query "somepath(//pkg/cmd/cockroach-short, //pkg/testutils/testcluster)" //pkg/cmd/cockroach-short:cockroach-short //pkg/cmd/cockroach-short:cockroach-short_lib //pkg/ccl:ccl //pkg/ccl/backupccl:backupccl //pkg/testutils/testcluster:testcluster Release note: None --- pkg/ccl/backupccl/BUILD.bazel | 12 ++---------- pkg/ccl/backupccl/{testutils.go => utils_test.go} | 0 2 files changed, 2 insertions(+), 10 deletions(-) rename pkg/ccl/backupccl/{testutils.go => utils_test.go} (100%) diff --git a/pkg/ccl/backupccl/BUILD.bazel b/pkg/ccl/backupccl/BUILD.bazel index 5342ee295367..7168db303adc 100644 --- a/pkg/ccl/backupccl/BUILD.bazel +++ b/pkg/ccl/backupccl/BUILD.bazel @@ -33,7 +33,6 @@ go_library( "split_and_scatter_processor.go", "system_schema.go", "targets.go", - "testutils.go", ], embed = [":backupccl_go_proto"], importpath = "github.com/cockroachdb/cockroach/pkg/ccl/backupccl", @@ -79,7 +78,6 @@ go_library( "//pkg/sql/catalog/descidgen", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/descs", - "//pkg/sql/catalog/desctestutils", "//pkg/sql/catalog/ingesting", "//pkg/sql/catalog/multiregion", "//pkg/sql/catalog/nstree", @@ -111,10 +109,6 @@ go_library( "//pkg/sql/stats", "//pkg/sql/types", "//pkg/storage", - "//pkg/testutils", - "//pkg/testutils/serverutils", - "//pkg/testutils/sqlutils", - "//pkg/testutils/testcluster", "//pkg/util", "//pkg/util/admission", "//pkg/util/contextutil", @@ -129,7 +123,6 @@ go_library( "//pkg/util/metric", "//pkg/util/mon", "//pkg/util/protoutil", - "//pkg/util/randutil", "//pkg/util/retry", "//pkg/util/span", "//pkg/util/stop", @@ -137,15 +130,12 @@ go_library( "//pkg/util/timeutil", "//pkg/util/tracing", "//pkg/util/uuid", - "//pkg/workload/bank", - "//pkg/workload/workloadsql", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_logtags//:logtags", "@com_github_gogo_protobuf//jsonpb", "@com_github_gogo_protobuf//types", "@com_github_kr_pretty//:pretty", "@com_github_robfig_cron_v3//:cron", - "@com_github_stretchr_testify//require", ], ) @@ -180,6 +170,7 @@ go_test( "show_test.go", "split_and_scatter_processor_test.go", "system_schema_test.go", + "utils_test.go", ], data = glob(["testdata/**"]) + ["//c-deps:libgeos"], embed = [":backupccl"], @@ -271,6 +262,7 @@ go_test( "//pkg/util/timeutil", "//pkg/util/uuid", "//pkg/workload/bank", + "//pkg/workload/workloadsql", "@com_github_aws_aws_sdk_go//aws/credentials", "@com_github_cockroachdb_cockroach_go_v2//crdb", "@com_github_cockroachdb_datadriven//:datadriven", diff --git a/pkg/ccl/backupccl/testutils.go b/pkg/ccl/backupccl/utils_test.go similarity index 100% rename from pkg/ccl/backupccl/testutils.go rename to pkg/ccl/backupccl/utils_test.go From 6f3230e4d558b83d0482d719caaa37d488642f1a Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 27 Apr 2022 13:17:03 -0400 Subject: [PATCH 12/12] opt: add lookupjoin.Constraint struct The `lookupjoin.Constraint` struct has been added to encapsulate multiple data structures that represent a strategy for constraining a lookup join. Release note: None --- pkg/sql/opt/lookupjoin/BUILD.bazel | 4 +- pkg/sql/opt/lookupjoin/constraint_builder.go | 93 +++++++++++++------ .../opt/lookupjoin/constraint_builder_test.go | 23 +++-- pkg/sql/opt/xform/join_funcs.go | 21 ++--- 4 files changed, 86 insertions(+), 55 deletions(-) diff --git a/pkg/sql/opt/lookupjoin/BUILD.bazel b/pkg/sql/opt/lookupjoin/BUILD.bazel index 20182fa5be74..3f59c0b11589 100644 --- a/pkg/sql/opt/lookupjoin/BUILD.bazel +++ b/pkg/sql/opt/lookupjoin/BUILD.bazel @@ -12,9 +12,9 @@ go_library( "//pkg/sql/opt/memo", "//pkg/sql/opt/norm", "//pkg/sql/opt/props", + "//pkg/sql/sem/eval", "//pkg/sql/sem/tree", "//pkg/sql/types", - "@com_github_cockroachdb_errors//:errors", ], ) @@ -29,7 +29,6 @@ go_test( deps = [ "//pkg/settings/cluster", "//pkg/sql/opt", - "//pkg/sql/opt/constraint", "//pkg/sql/opt/exec/execbuilder", "//pkg/sql/opt/memo", "//pkg/sql/opt/norm", @@ -38,6 +37,7 @@ go_test( "//pkg/sql/opt/testutils", "//pkg/sql/opt/testutils/testcat", "//pkg/sql/parser", + "//pkg/sql/sem/eval", "//pkg/sql/sem/tree", "//pkg/testutils", "//pkg/util/leaktest", diff --git a/pkg/sql/opt/lookupjoin/constraint_builder.go b/pkg/sql/opt/lookupjoin/constraint_builder.go index 16e1411043ae..7bb7f939ed52 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder.go @@ -24,6 +24,49 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" ) +// Constraint is used to constrain a lookup join. There are two types of +// constraints: +// +// 1. Constraints with KeyCols use columns from the input to directly +// constrain lookups into a target index. +// 2. Constraints with a LookupExpr build multiple spans from an expression +// that is evaluated for each input row. These spans are used to perform +// lookups into a target index. +// +// A constraint is not constraining if both KeyCols and LookupExpr are empty. +// See IsUnconstrained. +type Constraint struct { + // KeyCols is an ordered list of columns from the left side of the join to + // be used as lookup join key columns. This list corresponds to the columns + // in RightSideCols. It will be nil if LookupExpr is non-nil. + KeyCols opt.ColList + + // RightSideCols is an ordered list of prefix index columns that are + // constrained by this constraint. It corresponds 1:1 with the columns in + // KeyCols if KeyCols is non-nil. Otherwise, it includes the prefix of index + // columns constrained by LookupExpr. + RightSideCols opt.ColList + + // LookupExpr is a lookup expression for multi-span lookup joins. It will be + // nil if KeyCols is non-nil. + LookupExpr memo.FiltersExpr + + // InputProjections contains constant values and computed columns that must + // be projected on the lookup join's input. + InputProjections memo.ProjectionsExpr + + // ConstFilters contains constant equalities and ranges in either KeyCols or + // LookupExpr that are used to aid selectivity estimation. See + // memo.LookupJoinPrivate.ConstFilters. + ConstFilters memo.FiltersExpr +} + +// IsUnconstrained returns true if the constraint does not constrain a lookup +// join. +func (c *Constraint) IsUnconstrained() bool { + return len(c.KeyCols) == 0 && len(c.LookupExpr) == 0 +} + // ConstraintBuilder determines how to constrain index key columns for a lookup // join. See Build for more details. type ConstraintBuilder struct { @@ -73,33 +116,11 @@ func (b *ConstraintBuilder) Init( } } -// Build determines how to constrain index key columns for a lookup join. It -// returns either a list of key columns or a lookup expression. It has several -// return values: -// -// keyCols - An ordered list of columns from the left side of the -// join that correspond to a prefix of columns in the given -// index. -// lookupExpr - A lookup expression for multi-span lookup joins. -// inputProjections - Projections of constant values and computed columns that -// must be projected on the lookup join's input. -// constFilters - Filters representing constant equalities and ranges in -// either keyCols or lookupExpr that are used to aid -// selectivity estimation. -// rightSideCols - A list of constrained index columns. -// -// Build will return either non-nil keyCols or a non-nil lookupExpr, but not -// both. If both keyCols and lookupExpr are nil, then the index cannot be used -// for a lookup join. +// Build returns a Constraint that constrains a lookup join on the given index. +// The constraint returned may be unconstrained if no constraint could be built. func (b *ConstraintBuilder) Build( index cat.Index, onFilters, optionalFilters memo.FiltersExpr, -) ( - keyCols opt.ColList, - lookupExpr memo.FiltersExpr, - inputProjections memo.ProjectionsExpr, - constFilters memo.FiltersExpr, - rightSideCols opt.ColList, -) { +) Constraint { allFilters := append(onFilters, optionalFilters...) // Check if the first column in the index either: @@ -115,7 +136,7 @@ func (b *ConstraintBuilder) Build( if _, ok := b.rightEq.Find(firstIdxCol); !ok { if _, ok := b.findComputedColJoinEquality(b.table, firstIdxCol, b.rightEqSet); !ok { if _, _, ok := b.findJoinFilterConstants(allFilters, firstIdxCol); !ok { - return + return Constraint{} } } } @@ -124,8 +145,11 @@ func (b *ConstraintBuilder) Build( // an equality with another column or a constant. numIndexKeyCols := index.LaxKeyColumnCount() - keyCols = make(opt.ColList, 0, numIndexKeyCols) - rightSideCols = make(opt.ColList, 0, numIndexKeyCols) + keyCols := make(opt.ColList, 0, numIndexKeyCols) + rightSideCols := make(opt.ColList, 0, numIndexKeyCols) + var inputProjections memo.ProjectionsExpr + var lookupExpr memo.FiltersExpr + var constFilters memo.FiltersExpr shouldBuildMultiSpanLookupJoin := false // All the lookup conditions must apply to the prefix of the index and so @@ -237,12 +261,21 @@ func (b *ConstraintBuilder) Build( // A multi-span lookup join with a lookup expression has no key columns // and requires no projections on the input. - return nil /* keyCols */, lookupExpr, nil /* inputProjections */, constFilters, rightSideCols + return Constraint{ + RightSideCols: rightSideCols, + LookupExpr: lookupExpr, + ConstFilters: constFilters, + } } // If we did not build a lookup expression, return the key columns we found, // if any. - return keyCols, nil /* lookupExpr */, inputProjections, constFilters, rightSideCols + return Constraint{ + KeyCols: keyCols, + RightSideCols: rightSideCols, + InputProjections: inputProjections, + ConstFilters: constFilters, + } } // findComputedColJoinEquality returns the computed column expression of col and diff --git a/pkg/sql/opt/lookupjoin/constraint_builder_test.go b/pkg/sql/opt/lookupjoin/constraint_builder_test.go index 690af727666e..bafdd944cc48 100644 --- a/pkg/sql/opt/lookupjoin/constraint_builder_test.go +++ b/pkg/sql/opt/lookupjoin/constraint_builder_test.go @@ -147,36 +147,35 @@ func TestLookupConstraints(t *testing.T) { var cb lookupjoin.ConstraintBuilder cb.Init(&f, md, f.EvalContext(), rightTable, leftCols, rightCols, leftEq, rightEq) - keyCols, lookupExpr, inputProjections, _, _ := - cb.Build(index, filters, optionalFilters) + lookupConstraint := cb.Build(index, filters, optionalFilters) var b strings.Builder - if len(keyCols) == 0 && len(lookupExpr) == 0 { + if lookupConstraint.IsUnconstrained() { b.WriteString("lookup join not possible") } - if len(keyCols) > 0 { + if len(lookupConstraint.KeyCols) > 0 { b.WriteString("key cols:\n") - for i := range keyCols { + for i := range lookupConstraint.KeyCols { b.WriteString(" ") b.WriteString(string(index.Column(i).ColName())) b.WriteString(" = ") - b.WriteString(md.ColumnMeta(keyCols[i]).Alias) + b.WriteString(md.ColumnMeta(lookupConstraint.KeyCols[i]).Alias) b.WriteString("\n") } } - if len(inputProjections) > 0 { + if len(lookupConstraint.InputProjections) > 0 { b.WriteString("input projections:\n") - for i := range inputProjections { - col := inputProjections[i].Col + for i := range lookupConstraint.InputProjections { + col := lookupConstraint.InputProjections[i].Col b.WriteString(" ") b.WriteString(md.ColumnMeta(col).Alias) b.WriteString(" = ") - b.WriteString(formatScalar(inputProjections[i].Element, &f, &evalCtx)) + b.WriteString(formatScalar(lookupConstraint.InputProjections[i].Element, &f, &evalCtx)) b.WriteString("\n") } } - if len(lookupExpr) > 0 { + if len(lookupConstraint.LookupExpr) > 0 { b.WriteString("lookup expression:\n ") - b.WriteString(formatScalar(&lookupExpr, &f, &evalCtx)) + b.WriteString(formatScalar(&lookupConstraint.LookupExpr, &f, &evalCtx)) b.WriteString("\n") } return b.String() diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 446fdc0dd928..5cb1f0038afd 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -370,9 +370,8 @@ func (c *CustomFuncs) generateLookupJoinsImpl( return } - keyCols, lookupExpr, inputProjections, constFilters, rightSideCols := - cb.Build(index, onFilters, optionalFilters) - if len(keyCols) == 0 && len(lookupExpr) == 0 { + lookupConstraint := cb.Build(index, onFilters, optionalFilters) + if lookupConstraint.IsUnconstrained() { // We couldn't find equality columns or a lookup expression to // perform a lookup join on this index. return @@ -384,15 +383,15 @@ func (c *CustomFuncs) generateLookupJoinsImpl( lookupJoin.Table = scanPrivate.Table lookupJoin.Index = index.Ordinal() lookupJoin.Locking = scanPrivate.Locking - lookupJoin.KeyCols = keyCols - lookupJoin.LookupExpr = lookupExpr + lookupJoin.KeyCols = lookupConstraint.KeyCols + lookupJoin.LookupExpr = lookupConstraint.LookupExpr // Wrap the input in a Project if any projections are required. The // lookup join will project away these synthesized columns. - if len(inputProjections) > 0 { + if len(lookupConstraint.InputProjections) > 0 { lookupJoin.Input = c.e.f.ConstructProject( lookupJoin.Input, - inputProjections, + lookupConstraint.InputProjections, lookupJoin.Input.Relational().OutputCols, ) } @@ -400,17 +399,17 @@ func (c *CustomFuncs) generateLookupJoinsImpl( tableFDs := memo.MakeTableFuncDep(md, scanPrivate.Table) // A lookup join will drop any input row which contains NULLs, so a lax key // is sufficient. - lookupJoin.LookupColsAreTableKey = tableFDs.ColsAreLaxKey(rightSideCols.ToSet()) + lookupJoin.LookupColsAreTableKey = tableFDs.ColsAreLaxKey(lookupConstraint.RightSideCols.ToSet()) // Remove redundant filters from the ON condition if columns were // constrained by equality filters or constant filters. lookupJoin.On = onFilters if len(lookupJoin.KeyCols) > 0 { - lookupJoin.On = memo.ExtractRemainingJoinFilters(lookupJoin.On, lookupJoin.KeyCols, rightSideCols) + lookupJoin.On = memo.ExtractRemainingJoinFilters(lookupJoin.On, lookupJoin.KeyCols, lookupConstraint.RightSideCols) } lookupJoin.On = lookupJoin.On.Difference(lookupJoin.LookupExpr) - lookupJoin.On = lookupJoin.On.Difference(constFilters) - lookupJoin.ConstFilters = constFilters + lookupJoin.On = lookupJoin.On.Difference(lookupConstraint.ConstFilters) + lookupJoin.ConstFilters = lookupConstraint.ConstFilters // Add input columns and lookup expression columns, since these will be // needed for all join types and cases.