diff --git a/DEPS.bzl b/DEPS.bzl index 96543cf51c60..90743b8ffbcc 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -628,8 +628,8 @@ def go_deps(): name = "com_github_cockroachdb_pebble", build_file_proto_mode = "disable_global", importpath = "github.com/cockroachdb/pebble", - sum = "h1:5qIOzg4DH0pJqNFV5KyqtfIJalHGorKbsMQlog8sO1g=", - version = "v0.0.0-20210712141052-2215b8d4c8ab", + sum = "h1:Igd6YmtOZ77EgLAIaE9+mHl7+sAKaZ5m4iMI0Dz/J2A=", + version = "v0.0.0-20210719141320-8c3bd06debb5", ) go_repository( diff --git a/Makefile b/Makefile index 1477fcb73a3a..e16a0250b79c 100644 --- a/Makefile +++ b/Makefile @@ -879,9 +879,12 @@ EXECGEN_TARGETS = \ pkg/sql/colexec/colexecsel/default_cmp_sel_ops.eg.go \ pkg/sql/colexec/colexecsel/selection_ops.eg.go \ pkg/sql/colexec/colexecsel/sel_like_ops.eg.go \ + pkg/sql/colexec/colexecwindow/first_value.eg.go \ pkg/sql/colexec/colexecwindow/lag.eg.go \ + pkg/sql/colexec/colexecwindow/last_value.eg.go \ pkg/sql/colexec/colexecwindow/lead.eg.go \ pkg/sql/colexec/colexecwindow/ntile.eg.go \ + pkg/sql/colexec/colexecwindow/nth_value.eg.go \ pkg/sql/colexec/colexecwindow/range_offset_handler.eg.go \ pkg/sql/colexec/colexecwindow/rank.eg.go \ pkg/sql/colexec/colexecwindow/relative_rank.eg.go \ diff --git a/cloud/kubernetes/bring-your-own-certs/client.yaml b/cloud/kubernetes/bring-your-own-certs/client.yaml index 82c3d9a0c594..f4dc4eb23345 100644 --- a/cloud/kubernetes/bring-your-own-certs/client.yaml +++ b/cloud/kubernetes/bring-your-own-certs/client.yaml @@ -19,7 +19,7 @@ spec: serviceAccountName: cockroachdb containers: - name: cockroachdb-client - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 # Keep a pod open indefinitely so kubectl exec can be used to get a shell to it # and run cockroach client commands, such as cockroach sql, cockroach node status, etc. command: diff --git a/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml b/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml index feee1345712a..d0c0b8d6e493 100644 --- a/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml +++ b/cloud/kubernetes/bring-your-own-certs/cockroachdb-statefulset.yaml @@ -152,7 +152,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: Change these to appropriate values for the hardware that you're running. You can see # the resources that can be allocated on each of your Kubernetes nodes by running: diff --git a/cloud/kubernetes/client-secure.yaml b/cloud/kubernetes/client-secure.yaml index 6ec53aabe0eb..76e89fcbe584 100644 --- a/cloud/kubernetes/client-secure.yaml +++ b/cloud/kubernetes/client-secure.yaml @@ -31,7 +31,7 @@ spec: mountPath: /cockroach-certs containers: - name: cockroachdb-client - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/cluster-init-secure.yaml b/cloud/kubernetes/cluster-init-secure.yaml index 7d0e469cd23b..711438306c76 100644 --- a/cloud/kubernetes/cluster-init-secure.yaml +++ b/cloud/kubernetes/cluster-init-secure.yaml @@ -33,7 +33,7 @@ spec: mountPath: /cockroach-certs containers: - name: cluster-init - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/cluster-init.yaml b/cloud/kubernetes/cluster-init.yaml index 8706c87e5283..1428e32457d0 100644 --- a/cloud/kubernetes/cluster-init.yaml +++ b/cloud/kubernetes/cluster-init.yaml @@ -9,7 +9,7 @@ spec: spec: containers: - name: cluster-init - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent command: - "/cockroach/cockroach" diff --git a/cloud/kubernetes/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/cockroachdb-statefulset-secure.yaml index 88c35c0aadfa..22dc7bab9a40 100644 --- a/cloud/kubernetes/cockroachdb-statefulset-secure.yaml +++ b/cloud/kubernetes/cockroachdb-statefulset-secure.yaml @@ -194,7 +194,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: Change these to appropriate values for the hardware that you're running. You can see # the resources that can be allocated on each of your Kubernetes nodes by running: diff --git a/cloud/kubernetes/cockroachdb-statefulset.yaml b/cloud/kubernetes/cockroachdb-statefulset.yaml index 42f3995fad64..277de94bbad4 100644 --- a/cloud/kubernetes/cockroachdb-statefulset.yaml +++ b/cloud/kubernetes/cockroachdb-statefulset.yaml @@ -97,7 +97,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: Change these to appropriate values for the hardware that you're running. You can see # the resources that can be allocated on each of your Kubernetes nodes by running: diff --git a/cloud/kubernetes/multiregion/client-secure.yaml b/cloud/kubernetes/multiregion/client-secure.yaml index a3c6ecb25ed1..55774211ffdd 100644 --- a/cloud/kubernetes/multiregion/client-secure.yaml +++ b/cloud/kubernetes/multiregion/client-secure.yaml @@ -8,7 +8,7 @@ spec: serviceAccountName: cockroachdb containers: - name: cockroachdb-client - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/multiregion/cluster-init-secure.yaml b/cloud/kubernetes/multiregion/cluster-init-secure.yaml index e5a4b4dd32b6..ba006b5dd1db 100644 --- a/cloud/kubernetes/multiregion/cluster-init-secure.yaml +++ b/cloud/kubernetes/multiregion/cluster-init-secure.yaml @@ -10,7 +10,7 @@ spec: serviceAccountName: cockroachdb containers: - name: cluster-init - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml index 15cefc648424..adaf2c110362 100644 --- a/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml +++ b/cloud/kubernetes/multiregion/cockroachdb-statefulset-secure.yaml @@ -166,7 +166,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent ports: - containerPort: 26257 diff --git a/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml b/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml index 9ee699ee23e1..f80e82153637 100644 --- a/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml +++ b/cloud/kubernetes/multiregion/eks/cockroachdb-statefulset-secure-eks.yaml @@ -184,7 +184,7 @@ spec: name: cockroach-env containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: Change these to appropriate values for the hardware that you're running. You can see # the resources that can be allocated on each of your Kubernetes nodes by running: diff --git a/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml b/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml index 5f19c0b7c98c..451f83d53fba 100644 --- a/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml +++ b/cloud/kubernetes/performance/cockroachdb-daemonset-insecure.yaml @@ -81,7 +81,7 @@ spec: hostNetwork: true containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: If you configured taints to give CockroachDB exclusive access to nodes, feel free # to remove the requests and limits sections. If you didn't, you'll need to change these to diff --git a/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml b/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml index 8f946af9981f..2b7f0ad8c369 100644 --- a/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml +++ b/cloud/kubernetes/performance/cockroachdb-daemonset-secure.yaml @@ -197,7 +197,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: If you configured taints to give CockroachDB exclusive access to nodes, feel free # to remove the requests and limits sections. If you didn't, you'll need to change these to diff --git a/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml b/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml index f68720862023..f51bc90431ee 100644 --- a/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml +++ b/cloud/kubernetes/performance/cockroachdb-statefulset-insecure.yaml @@ -140,7 +140,7 @@ spec: - name: cockroachdb # NOTE: Always use the most recent version of CockroachDB for the best # performance and reliability. - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: Change these to appropriate values for the hardware that you're running. You can see # the resources that can be allocated on each of your Kubernetes nodes by running: diff --git a/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml index 238b7639c53d..90e23395e57e 100644 --- a/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml +++ b/cloud/kubernetes/performance/cockroachdb-statefulset-secure.yaml @@ -231,7 +231,7 @@ spec: - name: cockroachdb # NOTE: Always use the most recent version of CockroachDB for the best # performance and reliability. - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent # TODO: Change these to appropriate values for the hardware that you're running. You can see # the resources that can be allocated on each of your Kubernetes nodes by running: diff --git a/cloud/kubernetes/v1.6/client-secure.yaml b/cloud/kubernetes/v1.6/client-secure.yaml index 6ec53aabe0eb..76e89fcbe584 100644 --- a/cloud/kubernetes/v1.6/client-secure.yaml +++ b/cloud/kubernetes/v1.6/client-secure.yaml @@ -31,7 +31,7 @@ spec: mountPath: /cockroach-certs containers: - name: cockroachdb-client - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/v1.6/cluster-init-secure.yaml b/cloud/kubernetes/v1.6/cluster-init-secure.yaml index 7d0e469cd23b..711438306c76 100644 --- a/cloud/kubernetes/v1.6/cluster-init-secure.yaml +++ b/cloud/kubernetes/v1.6/cluster-init-secure.yaml @@ -33,7 +33,7 @@ spec: mountPath: /cockroach-certs containers: - name: cluster-init - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/v1.6/cluster-init.yaml b/cloud/kubernetes/v1.6/cluster-init.yaml index 8706c87e5283..1428e32457d0 100644 --- a/cloud/kubernetes/v1.6/cluster-init.yaml +++ b/cloud/kubernetes/v1.6/cluster-init.yaml @@ -9,7 +9,7 @@ spec: spec: containers: - name: cluster-init - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent command: - "/cockroach/cockroach" diff --git a/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml index 75c5ce130874..42a63a2d28c4 100644 --- a/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml +++ b/cloud/kubernetes/v1.6/cockroachdb-statefulset-secure.yaml @@ -177,7 +177,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent ports: - containerPort: 26257 diff --git a/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml b/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml index be90eeb890b3..77ad86c3b3c9 100644 --- a/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml +++ b/cloud/kubernetes/v1.6/cockroachdb-statefulset.yaml @@ -80,7 +80,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent ports: - containerPort: 26257 diff --git a/cloud/kubernetes/v1.7/client-secure.yaml b/cloud/kubernetes/v1.7/client-secure.yaml index 6ec53aabe0eb..76e89fcbe584 100644 --- a/cloud/kubernetes/v1.7/client-secure.yaml +++ b/cloud/kubernetes/v1.7/client-secure.yaml @@ -31,7 +31,7 @@ spec: mountPath: /cockroach-certs containers: - name: cockroachdb-client - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/v1.7/cluster-init-secure.yaml b/cloud/kubernetes/v1.7/cluster-init-secure.yaml index 7d0e469cd23b..711438306c76 100644 --- a/cloud/kubernetes/v1.7/cluster-init-secure.yaml +++ b/cloud/kubernetes/v1.7/cluster-init-secure.yaml @@ -33,7 +33,7 @@ spec: mountPath: /cockroach-certs containers: - name: cluster-init - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent volumeMounts: - name: client-certs diff --git a/cloud/kubernetes/v1.7/cluster-init.yaml b/cloud/kubernetes/v1.7/cluster-init.yaml index 8706c87e5283..1428e32457d0 100644 --- a/cloud/kubernetes/v1.7/cluster-init.yaml +++ b/cloud/kubernetes/v1.7/cluster-init.yaml @@ -9,7 +9,7 @@ spec: spec: containers: - name: cluster-init - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent command: - "/cockroach/cockroach" diff --git a/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml b/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml index 851d14d528b7..211ad604941a 100644 --- a/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml +++ b/cloud/kubernetes/v1.7/cockroachdb-statefulset-secure.yaml @@ -189,7 +189,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent ports: - containerPort: 26257 diff --git a/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml b/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml index 2d0eaaae343c..c6577e9c1dfd 100644 --- a/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml +++ b/cloud/kubernetes/v1.7/cockroachdb-statefulset.yaml @@ -92,7 +92,7 @@ spec: topologyKey: kubernetes.io/hostname containers: - name: cockroachdb - image: cockroachdb/cockroach:v21.1.5 + image: cockroachdb/cockroach:v21.1.6 imagePullPolicy: IfNotPresent ports: - containerPort: 26257 diff --git a/go.mod b/go.mod index f6253abdd409..cddf75430a61 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( github.com/cockroachdb/go-test-teamcity v0.0.0-20191211140407-cff980ad0a55 github.com/cockroachdb/gostdlib v1.13.0 github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f - github.com/cockroachdb/pebble v0.0.0-20210712141052-2215b8d4c8ab + github.com/cockroachdb/pebble v0.0.0-20210719141320-8c3bd06debb5 github.com/cockroachdb/redact v1.1.3 github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 diff --git a/go.sum b/go.sum index 8a39d57318aa..43fae311d700 100644 --- a/go.sum +++ b/go.sum @@ -263,8 +263,8 @@ github.com/cockroachdb/gostdlib v1.13.0 h1:TzSEPYgkKDNei3gbLc0rrHu4iHyBp7/+NxPOF github.com/cockroachdb/gostdlib v1.13.0/go.mod h1:eXX95p9QDrYwJfJ6AgeN9QnRa/lqqid9LAzWz/l5OgA= github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY= github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= -github.com/cockroachdb/pebble v0.0.0-20210712141052-2215b8d4c8ab h1:5qIOzg4DH0pJqNFV5KyqtfIJalHGorKbsMQlog8sO1g= -github.com/cockroachdb/pebble v0.0.0-20210712141052-2215b8d4c8ab/go.mod h1:JXfQr3d+XO4bL1pxGwKKo09xylQSdZ/mpZ9b2wfVcPs= +github.com/cockroachdb/pebble v0.0.0-20210719141320-8c3bd06debb5 h1:Igd6YmtOZ77EgLAIaE9+mHl7+sAKaZ5m4iMI0Dz/J2A= +github.com/cockroachdb/pebble v0.0.0-20210719141320-8c3bd06debb5/go.mod h1:JXfQr3d+XO4bL1pxGwKKo09xylQSdZ/mpZ9b2wfVcPs= github.com/cockroachdb/pq v0.0.0-20210517091544-990dd3347596 h1:xTc0ViFhuelzQZAYQOxMR2J5QDO9/C+0L0fkPXIcoMI= github.com/cockroachdb/pq v0.0.0-20210517091544-990dd3347596/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/cockroachdb/redact v1.0.8/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row index efa6b9b67ec0..a8b94d521104 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row +++ b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row @@ -1652,6 +1652,9 @@ ALTER TABLE regional_by_row_table ADD CONSTRAINT unique_b_a UNIQUE(b, a) # We should plan uniqueness checks for all unique indexes in # REGIONAL BY ROW tables. +# TODO(treilly): The constraint check for uniq_idx should use uniq_idx but due +# to stats issues w/ empty stats, partial indexes and multicol stats its not. +# Hopefully fixing #67583 (and possibly #67479) will resolve this. query T SELECT * FROM [EXPLAIN INSERT INTO regional_by_row_table (pk, pk2, a, b) VALUES (1, 1, 1, 1)] OFFSET 2 ---- @@ -1698,9 +1701,9 @@ SELECT * FROM [EXPLAIN INSERT INTO regional_by_row_table (pk, pk2, a, b) VALUES │ └── • error if rows │ │ │ └── • lookup join (semi) -│ │ table: regional_by_row_table@uniq_idx (partial index) -│ │ lookup condition: (column3 = a) AND (crdb_region = 'ap-southeast-2') -│ │ remote lookup condition: (column3 = a) AND (crdb_region IN ('ca-central-1', 'us-east-1')) +│ │ table: regional_by_row_table@new_idx +│ │ lookup condition: ((column3 = a) AND (crdb_region = 'ap-southeast-2')) AND (b > 0) +│ │ remote lookup condition: ((column3 = a) AND (crdb_region IN ('ca-central-1', 'us-east-1'))) AND (b > 0) │ │ pred: (column1 != pk) OR (crdb_region_default != crdb_region) │ │ │ └── • filter @@ -1728,6 +1731,9 @@ INSERT INTO regional_by_row_table (crdb_region, pk, pk2, a, b) VALUES ('us-east- # The conflict columns in an upsert should only include the primary key, # not the region column. +# TODO(treilly): The constraint check for uniq_idx should use uniq_idx but due +# to stats issues w/ empty stats, partial indexes and multicol stats its not. +# Hopefully fixing #67583 (and possibly #67479) will resolve this. query T SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table (crdb_region, pk, pk2, a, b) VALUES ('us-east-1', 2, 3, 2, 3)] OFFSET 2 ---- @@ -1779,9 +1785,9 @@ SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table (crdb_region, pk, pk2, │ └── • error if rows │ │ │ └── • lookup join (semi) -│ │ table: regional_by_row_table@uniq_idx (partial index) -│ │ lookup condition: (column4 = a) AND (crdb_region = 'ap-southeast-2') -│ │ remote lookup condition: (column4 = a) AND (crdb_region IN ('ca-central-1', 'us-east-1')) +│ │ table: regional_by_row_table@new_idx +│ │ lookup condition: ((column4 = a) AND (crdb_region = 'ap-southeast-2')) AND (b > 0) +│ │ remote lookup condition: ((column4 = a) AND (crdb_region IN ('ca-central-1', 'us-east-1'))) AND (b > 0) │ │ pred: (upsert_pk != pk) OR (column1 != crdb_region) │ │ │ └── • filter @@ -1803,6 +1809,9 @@ SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table (crdb_region, pk, pk2, └── • scan buffer label: buffer 1 +# TODO(treilly): The constraint check for uniq_idx should use uniq_idx but due +# to stats issues w/ empty stats, partial indexes and multicol stats its not. +# Hopefully fixing #67583 (and possibly #67479) will resolve this. query T SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table (crdb_region, pk, pk2, a, b) VALUES ('us-east-1', 23, 24, 25, 26), ('ca-central-1', 30, 30, 31, 32)] OFFSET 2 @@ -1850,9 +1859,9 @@ VALUES ('us-east-1', 23, 24, 25, 26), ('ca-central-1', 30, 30, 31, 32)] OFFSET 2 │ └── • error if rows │ │ │ └── • lookup join (semi) -│ │ table: regional_by_row_table@uniq_idx (partial index) -│ │ lookup condition: (column4 = a) AND (crdb_region = 'ap-southeast-2') -│ │ remote lookup condition: (column4 = a) AND (crdb_region IN ('ca-central-1', 'us-east-1')) +│ │ table: regional_by_row_table@new_idx +│ │ lookup condition: ((column4 = a) AND (crdb_region = 'ap-southeast-2')) AND (b > 0) +│ │ remote lookup condition: ((column4 = a) AND (crdb_region IN ('ca-central-1', 'us-east-1'))) AND (b > 0) │ │ pred: (upsert_pk != pk) OR (column1 != crdb_region) │ │ │ └── • filter diff --git a/pkg/cmd/roachtest/tests/predecessor_version.go b/pkg/cmd/roachtest/tests/predecessor_version.go index 48d943ae9ed4..0ae0340f4cb0 100644 --- a/pkg/cmd/roachtest/tests/predecessor_version.go +++ b/pkg/cmd/roachtest/tests/predecessor_version.go @@ -34,7 +34,7 @@ func PredecessorVersion(buildVersion version.Version) (string, error) { // (see runVersionUpgrade). The same is true for adding a new key to this // map. verMap := map[string]string{ - "21.2": "21.1.5", + "21.2": "21.1.6", "21.1": "20.2.12", "20.2": "20.1.16", "20.1": "19.2.11", diff --git a/pkg/roachpb/data.go b/pkg/roachpb/data.go index 0caa0453e205..a01b4df3a4df 100644 --- a/pkg/roachpb/data.go +++ b/pkg/roachpb/data.go @@ -2244,6 +2244,18 @@ func (s Span) ContainsKey(key Key) bool { return bytes.Compare(key, s.Key) >= 0 && bytes.Compare(key, s.EndKey) < 0 } +// CompareKey returns -1 if the key precedes the span start, 0 if its contained +// by the span and 1 if its after the end of the span. +func (s Span) CompareKey(key Key) int { + if bytes.Compare(key, s.Key) >= 0 { + if bytes.Compare(key, s.EndKey) < 0 { + return 0 + } + return 1 + } + return -1 +} + // ProperlyContainsKey returns whether the span properly contains the given key. func (s Span) ProperlyContainsKey(key Key) bool { return bytes.Compare(key, s.Key) > 0 && bytes.Compare(key, s.EndKey) < 0 diff --git a/pkg/sql/colexec/colbuilder/execplan.go b/pkg/sql/colexec/colbuilder/execplan.go index 477be56cb90a..1053ba2bd0ef 100644 --- a/pkg/sql/colexec/colbuilder/execplan.go +++ b/pkg/sql/colexec/colbuilder/execplan.go @@ -211,15 +211,6 @@ func supportedNatively(spec *execinfrapb.ProcessorSpec) error { case spec.Core.Windower != nil: for _, wf := range spec.Core.Windower.WindowFns { - if wf.Frame != nil { - frame, err := wf.Frame.ConvertToAST() - if err != nil { - return err - } - if !frame.IsDefaultFrame() { - return errors.Newf("window functions with non-default window frames are not supported") - } - } if wf.FilterColIdx != tree.NoColumnIdx { return errors.Newf("window functions with FILTER clause are not supported") } @@ -1186,6 +1177,9 @@ func NewColOperator( typs := make([]*types.T, len(result.ColumnTypes), len(result.ColumnTypes)+len(wf.ArgsIdxs)+2) copy(typs, result.ColumnTypes) + // Set any nil values in the window frame to their default values. + wf.Frame = colexecwindow.NormalizeWindowFrame(wf.Frame) + tempColOffset := uint32(0) argTypes := make([]*types.T, len(wf.ArgsIdxs)) argIdxs := make([]int, len(wf.ArgsIdxs)) @@ -1246,7 +1240,7 @@ func NewColOperator( if err != nil { return r, err } - if colexecwindow.WindowFnNeedsPeersInfo(*wf.Func.WindowFunc) { + if colexecwindow.WindowFnNeedsPeersInfo(&wf) { peersColIdx = int(wf.OutputColIdx + tempColOffset) input, err = colexecwindow.NewWindowPeerGrouper( streamingAllocator, input, typs, wf.Ordering.Columns, @@ -1324,6 +1318,45 @@ func NewColOperator( args.DiskQueueCfg, args.FDSemaphore, diskAcc, input, typs, outputIdx, partitionColIdx, argIdxs[0], argIdxs[1], argIdxs[2], ) + case execinfrapb.WindowerSpec_FIRST_VALUE: + opName := opNamePrefix + "first_value" + unlimitedAllocator, diskAcc := result.getDiskBackedWindowFnFields( + ctx, opName, flowCtx, spec.ProcessorID, factory) + // FirstValue operators need an extra allocator. + bufferAllocator := colmem.NewAllocator( + ctx, result.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID), factory, + ) + result.Root, err = colexecwindow.NewFirstValueOperator( + evalCtx, wf.Frame, &wf.Ordering, unlimitedAllocator, bufferAllocator, + execinfra.GetWorkMemLimit(flowCtx), args.DiskQueueCfg, args.FDSemaphore, + diskAcc, input, typs, outputIdx, partitionColIdx, peersColIdx, argIdxs, + ) + case execinfrapb.WindowerSpec_LAST_VALUE: + opName := opNamePrefix + "last_value" + unlimitedAllocator, diskAcc := result.getDiskBackedWindowFnFields( + ctx, opName, flowCtx, spec.ProcessorID, factory) + // LastValue operators need an extra allocator. + bufferAllocator := colmem.NewAllocator( + ctx, result.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID), factory, + ) + result.Root, err = colexecwindow.NewLastValueOperator( + evalCtx, wf.Frame, &wf.Ordering, unlimitedAllocator, bufferAllocator, + execinfra.GetWorkMemLimit(flowCtx), args.DiskQueueCfg, args.FDSemaphore, + diskAcc, input, typs, outputIdx, partitionColIdx, peersColIdx, argIdxs, + ) + case execinfrapb.WindowerSpec_NTH_VALUE: + opName := opNamePrefix + "nth_value" + unlimitedAllocator, diskAcc := result.getDiskBackedWindowFnFields( + ctx, opName, flowCtx, spec.ProcessorID, factory) + // NthValue operators need an extra allocator. + bufferAllocator := colmem.NewAllocator( + ctx, result.createBufferingUnlimitedMemAccount(ctx, flowCtx, opName, spec.ProcessorID), factory, + ) + result.Root, err = colexecwindow.NewNthValueOperator( + evalCtx, wf.Frame, &wf.Ordering, unlimitedAllocator, bufferAllocator, + execinfra.GetWorkMemLimit(flowCtx), args.DiskQueueCfg, args.FDSemaphore, + diskAcc, input, typs, outputIdx, partitionColIdx, peersColIdx, argIdxs, + ) default: return r, errors.AssertionFailedf("window function %s is not supported", wf.String()) } diff --git a/pkg/sql/colexec/colexecproj/proj_const_left_ops.eg.go b/pkg/sql/colexec/colexecproj/proj_const_left_ops.eg.go index 3ea1420d528d..42b0dca5ffcf 100644 --- a/pkg/sql/colexec/colexecproj/proj_const_left_ops.eg.go +++ b/pkg/sql/colexec/colexecproj/proj_const_left_ops.eg.go @@ -5853,7 +5853,12 @@ func (p projPlusTimestampConstIntervalOp) Next() coldata.Batch { if !colNulls.NullAt(i) { // We only want to perform the projection operation if the value is not null. arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -5864,7 +5869,12 @@ func (p projPlusTimestampConstIntervalOp) Next() coldata.Batch { // We only want to perform the projection operation if the value is not null. //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -5879,7 +5889,12 @@ func (p projPlusTimestampConstIntervalOp) Next() coldata.Batch { sel = sel[:n] for _, i := range sel { arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -5887,7 +5902,12 @@ func (p projPlusTimestampConstIntervalOp) Next() coldata.Batch { for i := 0; i < n; i++ { //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include @@ -5947,7 +5967,12 @@ func (p projPlusIntervalConstTimestampOp) Next() coldata.Batch { if !colNulls.NullAt(i) { // We only want to perform the projection operation if the value is not null. arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -5958,7 +5983,12 @@ func (p projPlusIntervalConstTimestampOp) Next() coldata.Batch { // We only want to perform the projection operation if the value is not null. //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -5973,7 +6003,12 @@ func (p projPlusIntervalConstTimestampOp) Next() coldata.Batch { sel = sel[:n] for _, i := range sel { arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -5981,7 +6016,12 @@ func (p projPlusIntervalConstTimestampOp) Next() coldata.Batch { for i := 0; i < n; i++ { //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include @@ -9599,7 +9639,12 @@ func (p projMinusTimestampConstIntervalOp) Next() coldata.Batch { if !colNulls.NullAt(i) { // We only want to perform the projection operation if the value is not null. arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg.Mul(-1)) + t_res := duration.Add(p.constArg, arg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -9610,7 +9655,12 @@ func (p projMinusTimestampConstIntervalOp) Next() coldata.Batch { // We only want to perform the projection operation if the value is not null. //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg.Mul(-1)) + t_res := duration.Add(p.constArg, arg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -9625,7 +9675,12 @@ func (p projMinusTimestampConstIntervalOp) Next() coldata.Batch { sel = sel[:n] for _, i := range sel { arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg.Mul(-1)) + t_res := duration.Add(p.constArg, arg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -9633,7 +9688,12 @@ func (p projMinusTimestampConstIntervalOp) Next() coldata.Batch { for i := 0; i < n; i++ { //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg.Mul(-1)) + t_res := duration.Add(p.constArg, arg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include diff --git a/pkg/sql/colexec/colexecproj/proj_const_right_ops.eg.go b/pkg/sql/colexec/colexecproj/proj_const_right_ops.eg.go index eecde553bbe8..e93c73f38116 100644 --- a/pkg/sql/colexec/colexecproj/proj_const_right_ops.eg.go +++ b/pkg/sql/colexec/colexecproj/proj_const_right_ops.eg.go @@ -5849,7 +5849,12 @@ func (p projPlusTimestampIntervalConstOp) Next() coldata.Batch { if !colNulls.NullAt(i) { // We only want to perform the projection operation if the value is not null. arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -5860,7 +5865,12 @@ func (p projPlusTimestampIntervalConstOp) Next() coldata.Batch { // We only want to perform the projection operation if the value is not null. //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -5875,7 +5885,12 @@ func (p projPlusTimestampIntervalConstOp) Next() coldata.Batch { sel = sel[:n] for _, i := range sel { arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -5883,7 +5898,12 @@ func (p projPlusTimestampIntervalConstOp) Next() coldata.Batch { for i := 0; i < n; i++ { //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg) + t_res := duration.Add(arg, p.constArg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include @@ -5943,7 +5963,12 @@ func (p projPlusIntervalTimestampConstOp) Next() coldata.Batch { if !colNulls.NullAt(i) { // We only want to perform the projection operation if the value is not null. arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -5954,7 +5979,12 @@ func (p projPlusIntervalTimestampConstOp) Next() coldata.Batch { // We only want to perform the projection operation if the value is not null. //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -5969,7 +5999,12 @@ func (p projPlusIntervalTimestampConstOp) Next() coldata.Batch { sel = sel[:n] for _, i := range sel { arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -5977,7 +6012,12 @@ func (p projPlusIntervalTimestampConstOp) Next() coldata.Batch { for i := 0; i < n; i++ { //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(p.constArg, arg) + t_res := duration.Add(p.constArg, arg) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include @@ -9595,7 +9635,12 @@ func (p projMinusTimestampIntervalConstOp) Next() coldata.Batch { if !colNulls.NullAt(i) { // We only want to perform the projection operation if the value is not null. arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg.Mul(-1)) + t_res := duration.Add(arg, p.constArg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -9606,7 +9651,12 @@ func (p projMinusTimestampIntervalConstOp) Next() coldata.Batch { // We only want to perform the projection operation if the value is not null. //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg.Mul(-1)) + t_res := duration.Add(arg, p.constArg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -9621,7 +9671,12 @@ func (p projMinusTimestampIntervalConstOp) Next() coldata.Batch { sel = sel[:n] for _, i := range sel { arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg.Mul(-1)) + t_res := duration.Add(arg, p.constArg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -9629,7 +9684,12 @@ func (p projMinusTimestampIntervalConstOp) Next() coldata.Batch { for i := 0; i < n; i++ { //gcassert:bce arg := col.Get(i) - projCol[i] = duration.Add(arg, p.constArg.Mul(-1)) + t_res := duration.Add(arg, p.constArg.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include diff --git a/pkg/sql/colexec/colexecproj/proj_non_const_ops.eg.go b/pkg/sql/colexec/colexecproj/proj_non_const_ops.eg.go index d12bfe6c3c64..c25b6229ad38 100644 --- a/pkg/sql/colexec/colexecproj/proj_non_const_ops.eg.go +++ b/pkg/sql/colexec/colexecproj/proj_non_const_ops.eg.go @@ -12,6 +12,7 @@ package colexecproj import ( "bytes" "math" + "time" "unsafe" "github.com/cockroachdb/cockroach/pkg/col/coldata" @@ -6262,7 +6263,12 @@ func (p projPlusTimestampIntervalOp) Next() coldata.Batch { // null. arg1 := col1.Get(i) arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2) + t_res := duration.Add(arg1, arg2) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -6277,7 +6283,12 @@ func (p projPlusTimestampIntervalOp) Next() coldata.Batch { arg1 := col1.Get(i) //gcassert:bce arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2) + t_res := duration.Add(arg1, arg2) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -6293,7 +6304,12 @@ func (p projPlusTimestampIntervalOp) Next() coldata.Batch { for _, i := range sel { arg1 := col1.Get(i) arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2) + t_res := duration.Add(arg1, arg2) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -6304,7 +6320,12 @@ func (p projPlusTimestampIntervalOp) Next() coldata.Batch { arg1 := col1.Get(i) //gcassert:bce arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2) + t_res := duration.Add(arg1, arg2) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include @@ -6364,7 +6385,12 @@ func (p projPlusIntervalTimestampOp) Next() coldata.Batch { // null. arg1 := col1.Get(i) arg2 := col2.Get(i) - projCol[i] = duration.Add(arg2, arg1) + t_res := duration.Add(arg2, arg1) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -6379,7 +6405,12 @@ func (p projPlusIntervalTimestampOp) Next() coldata.Batch { arg1 := col1.Get(i) //gcassert:bce arg2 := col2.Get(i) - projCol[i] = duration.Add(arg2, arg1) + t_res := duration.Add(arg2, arg1) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -6395,7 +6426,12 @@ func (p projPlusIntervalTimestampOp) Next() coldata.Batch { for _, i := range sel { arg1 := col1.Get(i) arg2 := col2.Get(i) - projCol[i] = duration.Add(arg2, arg1) + t_res := duration.Add(arg2, arg1) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -6406,7 +6442,12 @@ func (p projPlusIntervalTimestampOp) Next() coldata.Batch { arg1 := col1.Get(i) //gcassert:bce arg2 := col2.Get(i) - projCol[i] = duration.Add(arg2, arg1) + t_res := duration.Add(arg2, arg1) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include @@ -10232,7 +10273,12 @@ func (p projMinusTimestampIntervalOp) Next() coldata.Batch { // null. arg1 := col1.Get(i) arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2.Mul(-1)) + t_res := duration.Add(arg1, arg2.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } else { @@ -10247,7 +10293,12 @@ func (p projMinusTimestampIntervalOp) Next() coldata.Batch { arg1 := col1.Get(i) //gcassert:bce arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2.Mul(-1)) + t_res := duration.Add(arg1, arg2.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } } @@ -10263,7 +10314,12 @@ func (p projMinusTimestampIntervalOp) Next() coldata.Batch { for _, i := range sel { arg1 := col1.Get(i) arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2.Mul(-1)) + t_res := duration.Add(arg1, arg2.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } else { _ = projCol.Get(n - 1) @@ -10274,7 +10330,12 @@ func (p projMinusTimestampIntervalOp) Next() coldata.Batch { arg1 := col1.Get(i) //gcassert:bce arg2 := col2.Get(i) - projCol[i] = duration.Add(arg1, arg2.Mul(-1)) + t_res := duration.Add(arg1, arg2.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + projCol[i] = t_res } } // _outNulls has been updated from within the _ASSIGN function to include diff --git a/pkg/sql/colexec/colexecwindow/BUILD.bazel b/pkg/sql/colexec/colexecwindow/BUILD.bazel index 5c2a9c820783..d0974ac6f667 100644 --- a/pkg/sql/colexec/colexecwindow/BUILD.bazel +++ b/pkg/sql/colexec/colexecwindow/BUILD.bazel @@ -15,6 +15,7 @@ go_library( "//pkg/col/coldata", # keep "//pkg/col/coldataext", # keep "//pkg/col/typeconv", # keep + "//pkg/sql/catalog/descpb", "//pkg/sql/colcontainer", # keep "//pkg/sql/colconv", # keep "//pkg/sql/colexec/colexecbase", @@ -25,16 +26,19 @@ go_library( "//pkg/sql/colmem", # keep "//pkg/sql/execinfra", # keep "//pkg/sql/execinfrapb", # keep + "//pkg/sql/randgen", "//pkg/sql/rowenc", # keep "//pkg/sql/sem/builtins", # keep "//pkg/sql/sem/tree", # keep "//pkg/sql/types", # keep "//pkg/util/duration", # keep + "//pkg/util/encoding", "//pkg/util/mon", # keep "//pkg/util/timeutil/pgdate", # keep "@com_github_cockroachdb_apd_v2//:apd", # keep "@com_github_cockroachdb_errors//:errors", # keep "@com_github_marusama_semaphore//:semaphore", # keep + "@com_github_stretchr_testify//require", ], ) @@ -64,7 +68,6 @@ go_test( "//pkg/sql/execinfra", "//pkg/sql/execinfrapb", "//pkg/sql/randgen", - "//pkg/sql/rowenc", "//pkg/sql/sem/tree", "//pkg/sql/types", "//pkg/testutils/buildutil", @@ -83,8 +86,11 @@ go_test( # Map between target name and relevant template. targets = [ + ("first_value.eg.go", "first_last_nth_value_tmpl.go"), ("lag.eg.go", "lead_lag_tmpl.go"), + ("last_value.eg.go", "first_last_nth_value_tmpl.go"), ("lead.eg.go", "lead_lag_tmpl.go"), + ("nth_value.eg.go", "first_last_nth_value_tmpl.go"), ("ntile.eg.go", "ntile_tmpl.go"), ("range_offset_handler.eg.go", "range_offset_handler_tmpl.go"), ("rank.eg.go", "rank_tmpl.go"), diff --git a/pkg/sql/colexec/colexecwindow/buffered_window.go b/pkg/sql/colexec/colexecwindow/buffered_window.go index 6560b655edf1..333733b086dd 100644 --- a/pkg/sql/colexec/colexecwindow/buffered_window.go +++ b/pkg/sql/colexec/colexecwindow/buffered_window.go @@ -358,3 +358,51 @@ func (b *bufferedWindowOp) Close() error { b.windower.Close() return nil } + +// partitionSeekerBase extracts common fields and methods for buffered windower +// implementations that use the same logic for the seekNextPartition phase. +type partitionSeekerBase struct { + colexecop.InitHelper + partitionColIdx int + partitionSize int + + buffer *colexecutils.SpillingBuffer +} + +func (b *partitionSeekerBase) seekNextPartition( + batch coldata.Batch, startIdx int, isPartitionStart bool, +) (nextPartitionIdx int) { + n := batch.Length() + if b.partitionColIdx == -1 { + // There is only one partition, so it includes the entirety of this batch. + b.partitionSize += n + nextPartitionIdx = n + } else { + i := startIdx + partitionCol := batch.ColVec(b.partitionColIdx).Bool() + _ = partitionCol[n-1] + // Find the location of the start of the next partition (and the end of the + // current one). + if isPartitionStart { + i++ + } + if i < n { + _ = partitionCol[i] + for ; i < n; i++ { + //gcassert:bce + if partitionCol[i] { + break + } + } + } + b.partitionSize += i - startIdx + nextPartitionIdx = i + } + + // Add all tuples from the argument column that fall within the current + // partition to the buffer so that they can be accessed later. + if startIdx < nextPartitionIdx { + b.buffer.AppendTuples(b.Ctx, batch, startIdx, nextPartitionIdx) + } + return nextPartitionIdx +} diff --git a/pkg/sql/colexec/colexecwindow/first_last_nth_value_tmpl.go b/pkg/sql/colexec/colexecwindow/first_last_nth_value_tmpl.go new file mode 100644 index 000000000000..6b8c7600fbf7 --- /dev/null +++ b/pkg/sql/colexec/colexecwindow/first_last_nth_value_tmpl.go @@ -0,0 +1,231 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// {{/* +// +build execgen_template +// +// This file is the execgen template for first_value.eg.go, last_value.eg.go, +// and nth_value.eg.go. It's formatted in a special way, so it's both valid Go +// and a valid text/template input. This permits editing this file with editor +// support. +// +// */}} + +package colexecwindow + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/col/coldata" + "github.com/cockroachdb/cockroach/pkg/col/typeconv" + "github.com/cockroachdb/cockroach/pkg/sql/colcontainer" + "github.com/cockroachdb/cockroach/pkg/sql/colexec/colexecutils" + "github.com/cockroachdb/cockroach/pkg/sql/colexecerror" + "github.com/cockroachdb/cockroach/pkg/sql/colexecop" + "github.com/cockroachdb/cockroach/pkg/sql/colmem" + "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/errors" + "github.com/marusama/semaphore" +) + +// {{/* + +// Declarations to make the template compile properly. + +// _CANONICAL_TYPE_FAMILY is the template variable. +const _CANONICAL_TYPE_FAMILY = types.UnknownFamily + +// _TYPE_WIDTH is the template variable. +const _TYPE_WIDTH = 0 + +// */}} + +// New_UPPERCASE_NAMEOperator creates a new Operator that computes window +// function _OP_NAME. outputColIdx specifies in which coldata.Vec the operator +// should put its output (if there is no such column, a new column is appended). +func New_UPPERCASE_NAMEOperator( + evalCtx *tree.EvalContext, + frame *execinfrapb.WindowerSpec_Frame, + ordering *execinfrapb.Ordering, + unlimitedAllocator *colmem.Allocator, + bufferAllocator *colmem.Allocator, + memoryLimit int64, + diskQueueCfg colcontainer.DiskQueueCfg, + fdSemaphore semaphore.Semaphore, + diskAcc *mon.BoundAccount, + input colexecop.Operator, + inputTypes []*types.T, + outputColIdx int, + partitionColIdx int, + peersColIdx int, + argIdxs []int, +) (colexecop.Operator, error) { + framer := newWindowFramer(evalCtx, frame, ordering, inputTypes, peersColIdx) + colsToStore := []int{argIdxs[0]} + colsToStore = framer.getColsToStore(colsToStore) + + // Allow the direct-access buffer 10% of the available memory. The rest will + // be given to the bufferedWindowOp queue. While it is somewhat more important + // for the direct-access buffer tuples to be kept in-memory, it only has to + // store a single column. TODO(drewk): play around with benchmarks to find a + // good empirically-supported fraction to use. + bufferMemLimit := int64(float64(memoryLimit) * 0.10) + buffer := colexecutils.NewSpillingBuffer( + bufferAllocator, bufferMemLimit, diskQueueCfg, fdSemaphore, inputTypes, diskAcc, colsToStore...) + base := _OP_NAMEBase{ + partitionSeekerBase: partitionSeekerBase{ + buffer: buffer, + partitionColIdx: partitionColIdx, + }, + framer: framer, + outputColIdx: outputColIdx, + bufferArgIdx: 0, // The arg column is the first column in the buffer. + } + argType := inputTypes[argIdxs[0]] + switch typeconv.TypeFamilyToCanonicalTypeFamily(argType.Family()) { + // {{range .}} + case _CANONICAL_TYPE_FAMILY: + switch argType.Width() { + // {{range .WidthOverloads}} + case _TYPE_WIDTH: + windower := &_OP_NAME_TYPEWindow{_OP_NAMEBase: base} + // {{if .IsNthValue}} + windower.nColIdx = argIdxs[1] + // {{end}} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + // {{end}} + } + // {{end}} + } + return nil, errors.Errorf("unsupported _OP_NAME window operator type %s", argType.Name()) +} + +type _OP_NAMEBase struct { + partitionSeekerBase + colexecop.CloserHelper + framer windowFramer + + outputColIdx int + bufferArgIdx int +} + +// {{range .}} +// {{range .WidthOverloads}} + +type _OP_NAME_TYPEWindow struct { + _OP_NAMEBase + // {{if .IsNthValue}} + nColIdx int + // {{end}} +} + +var _ bufferedWindower = &_OP_NAME_TYPEWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *_OP_NAME_TYPEWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.TemplateType() + outputNulls := outputVec.Nulls() + // {{if .Sliceable}} + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + // {{end}} + + // {{if .IsNthValue}} + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + // {{end}} + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + // {{if .IsFirstValue}} + requestedIdx := w.framer.frameFirstIdx() + // {{else if .IsLastValue}} + requestedIdx := w.framer.frameLastIdx() + // {{else}} + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + // {{end}} + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.TemplateType() + // {{if .IsBytesLike}} + // We have to use CopySlice here because the column already has a length of + // n elements, and Set cannot set values before the last one. + outputCol.CopySlice(col, i, idx, idx+1) + // {{else}} + val := col.Get(idx) + // {{if .Sliceable}} + //gcassert:bce + // {{end}} + outputCol.Set(i, val) + // {{end}} + } +} + +// {{end}} +// {{end}} + +// transitionToProcessing implements the bufferedWindower interface. +func (b *_OP_NAMEBase) transitionToProcessing() { + b.framer.startPartition(b.Ctx, b.partitionSize, b.buffer) +} + +// startNewPartition implements the bufferedWindower interface. +func (b *_OP_NAMEBase) startNewPartition() { + b.partitionSize = 0 + b.buffer.Reset(b.Ctx) +} + +// Init implements the bufferedWindower interface. +func (b *_OP_NAMEBase) Init(ctx context.Context) { + if !b.InitHelper.Init(ctx) { + return + } +} + +// Close implements the bufferedWindower interface. +func (b *_OP_NAMEBase) Close() { + if !b.CloserHelper.Close() { + return + } + b.buffer.Close(b.EnsureCtx()) +} diff --git a/pkg/sql/colexec/colexecwindow/first_value.eg.go b/pkg/sql/colexec/colexecwindow/first_value.eg.go new file mode 100644 index 000000000000..41cc6549aecd --- /dev/null +++ b/pkg/sql/colexec/colexecwindow/first_value.eg.go @@ -0,0 +1,625 @@ +// Code generated by execgen; DO NOT EDIT. +// Copyright 2021 The Cockroach Authors. +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package colexecwindow + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/col/coldata" + "github.com/cockroachdb/cockroach/pkg/col/typeconv" + "github.com/cockroachdb/cockroach/pkg/sql/colcontainer" + "github.com/cockroachdb/cockroach/pkg/sql/colexec/colexecutils" + "github.com/cockroachdb/cockroach/pkg/sql/colexecop" + "github.com/cockroachdb/cockroach/pkg/sql/colmem" + "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/errors" + "github.com/marusama/semaphore" +) + +// NewFirstValueOperator creates a new Operator that computes window +// function firstValue. outputColIdx specifies in which coldata.Vec the operator +// should put its output (if there is no such column, a new column is appended). +func NewFirstValueOperator( + evalCtx *tree.EvalContext, + frame *execinfrapb.WindowerSpec_Frame, + ordering *execinfrapb.Ordering, + unlimitedAllocator *colmem.Allocator, + bufferAllocator *colmem.Allocator, + memoryLimit int64, + diskQueueCfg colcontainer.DiskQueueCfg, + fdSemaphore semaphore.Semaphore, + diskAcc *mon.BoundAccount, + input colexecop.Operator, + inputTypes []*types.T, + outputColIdx int, + partitionColIdx int, + peersColIdx int, + argIdxs []int, +) (colexecop.Operator, error) { + framer := newWindowFramer(evalCtx, frame, ordering, inputTypes, peersColIdx) + colsToStore := []int{argIdxs[0]} + colsToStore = framer.getColsToStore(colsToStore) + + // Allow the direct-access buffer 10% of the available memory. The rest will + // be given to the bufferedWindowOp queue. While it is somewhat more important + // for the direct-access buffer tuples to be kept in-memory, it only has to + // store a single column. TODO(drewk): play around with benchmarks to find a + // good empirically-supported fraction to use. + bufferMemLimit := int64(float64(memoryLimit) * 0.10) + buffer := colexecutils.NewSpillingBuffer( + bufferAllocator, bufferMemLimit, diskQueueCfg, fdSemaphore, inputTypes, diskAcc, colsToStore...) + base := firstValueBase{ + partitionSeekerBase: partitionSeekerBase{ + buffer: buffer, + partitionColIdx: partitionColIdx, + }, + framer: framer, + outputColIdx: outputColIdx, + bufferArgIdx: 0, // The arg column is the first column in the buffer. + } + argType := inputTypes[argIdxs[0]] + switch typeconv.TypeFamilyToCanonicalTypeFamily(argType.Family()) { + case types.BoolFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueBoolWindow{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.BytesFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueBytesWindow{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.DecimalFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueDecimalWindow{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.IntFamily: + switch argType.Width() { + case 16: + windower := &firstValueInt16Window{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + case 32: + windower := &firstValueInt32Window{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + case -1: + default: + windower := &firstValueInt64Window{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.FloatFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueFloat64Window{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.TimestampTZFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueTimestampWindow{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.IntervalFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueIntervalWindow{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.JsonFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueJSONWindow{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case typeconv.DatumVecCanonicalTypeFamily: + switch argType.Width() { + case -1: + default: + windower := &firstValueDatumWindow{firstValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + } + return nil, errors.Errorf("unsupported firstValue window operator type %s", argType.Name()) +} + +type firstValueBase struct { + partitionSeekerBase + colexecop.CloserHelper + framer windowFramer + + outputColIdx int + bufferArgIdx int +} + +type firstValueBoolWindow struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueBoolWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueBoolWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Bool() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Bool() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueBytesWindow struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueBytesWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueBytesWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Bytes() + outputNulls := outputVec.Nulls() + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Bytes() + // We have to use CopySlice here because the column already has a length of + // n elements, and Set cannot set values before the last one. + outputCol.CopySlice(col, i, idx, idx+1) + } +} + +type firstValueDecimalWindow struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueDecimalWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueDecimalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Decimal() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Decimal() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueInt16Window struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueInt16Window{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueInt16Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int16() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int16() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueInt32Window struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueInt32Window{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueInt32Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int32() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int32() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueInt64Window struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueInt64Window{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueInt64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int64() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int64() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueFloat64Window struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueFloat64Window{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueFloat64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Float64() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Float64() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueTimestampWindow struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueTimestampWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueTimestampWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Timestamp() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Timestamp() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueIntervalWindow struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueIntervalWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueIntervalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Interval() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Interval() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type firstValueJSONWindow struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueJSONWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueJSONWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.JSON() + outputNulls := outputVec.Nulls() + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.JSON() + // We have to use CopySlice here because the column already has a length of + // n elements, and Set cannot set values before the last one. + outputCol.CopySlice(col, i, idx, idx+1) + } +} + +type firstValueDatumWindow struct { + firstValueBase +} + +var _ bufferedWindower = &firstValueDatumWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *firstValueDatumWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Datum() + outputNulls := outputVec.Nulls() + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameFirstIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Datum() + val := col.Get(idx) + outputCol.Set(i, val) + } +} + +// transitionToProcessing implements the bufferedWindower interface. +func (b *firstValueBase) transitionToProcessing() { + b.framer.startPartition(b.Ctx, b.partitionSize, b.buffer) +} + +// startNewPartition implements the bufferedWindower interface. +func (b *firstValueBase) startNewPartition() { + b.partitionSize = 0 + b.buffer.Reset(b.Ctx) +} + +// Init implements the bufferedWindower interface. +func (b *firstValueBase) Init(ctx context.Context) { + if !b.InitHelper.Init(ctx) { + return + } +} + +// Close implements the bufferedWindower interface. +func (b *firstValueBase) Close() { + if !b.CloserHelper.Close() { + return + } + b.buffer.Close(b.EnsureCtx()) +} diff --git a/pkg/sql/colexec/colexecwindow/lag.eg.go b/pkg/sql/colexec/colexecwindow/lag.eg.go index 5384928c4176..4c04d8dd6a2a 100644 --- a/pkg/sql/colexec/colexecwindow/lag.eg.go +++ b/pkg/sql/colexec/colexecwindow/lag.eg.go @@ -51,12 +51,14 @@ func NewLagOperator( buffer := colexecutils.NewSpillingBuffer( bufferAllocator, bufferMemLimit, diskQueueCfg, fdSemaphore, inputTypes, diskAcc, argIdx) base := lagBase{ - buffer: buffer, - outputColIdx: outputColIdx, - partitionColIdx: partitionColIdx, - argIdx: argIdx, - offsetIdx: offsetIdx, - defaultIdx: defaultIdx, + partitionSeekerBase: partitionSeekerBase{ + buffer: buffer, + partitionColIdx: partitionColIdx, + }, + outputColIdx: outputColIdx, + argIdx: argIdx, + offsetIdx: offsetIdx, + defaultIdx: defaultIdx, } argType := inputTypes[argIdx] switch typeconv.TypeFamilyToCanonicalTypeFamily(argType.Family()) { @@ -158,12 +160,10 @@ func NewLagOperator( // lagBase extracts common fields and methods of the lag windower // variations. type lagBase struct { - colexecop.InitHelper + partitionSeekerBase colexecop.CloserHelper lagComputeFields - buffer *colexecutils.SpillingBuffer - outputColIdx int partitionColIdx int argIdx int @@ -174,8 +174,7 @@ type lagBase struct { // lagComputeFields extracts the fields that are used to calculate lag // output values. type lagComputeFields struct { - partitionSize int - idx int + idx int } type lagBoolWindow struct { @@ -184,42 +183,6 @@ type lagBoolWindow struct { var _ bufferedWindower = &lagBoolWindow{} -func (w *lagBoolWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagBoolWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -352,42 +315,6 @@ type lagBytesWindow struct { var _ bufferedWindower = &lagBytesWindow{} -func (w *lagBytesWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagBytesWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -516,42 +443,6 @@ type lagDecimalWindow struct { var _ bufferedWindower = &lagDecimalWindow{} -func (w *lagDecimalWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagDecimalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -684,42 +575,6 @@ type lagInt16Window struct { var _ bufferedWindower = &lagInt16Window{} -func (w *lagInt16Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagInt16Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -852,42 +707,6 @@ type lagInt32Window struct { var _ bufferedWindower = &lagInt32Window{} -func (w *lagInt32Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagInt32Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1020,42 +839,6 @@ type lagInt64Window struct { var _ bufferedWindower = &lagInt64Window{} -func (w *lagInt64Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagInt64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1188,42 +971,6 @@ type lagFloat64Window struct { var _ bufferedWindower = &lagFloat64Window{} -func (w *lagFloat64Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagFloat64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1356,42 +1103,6 @@ type lagTimestampWindow struct { var _ bufferedWindower = &lagTimestampWindow{} -func (w *lagTimestampWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagTimestampWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1524,42 +1235,6 @@ type lagIntervalWindow struct { var _ bufferedWindower = &lagIntervalWindow{} -func (w *lagIntervalWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagIntervalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1692,42 +1367,6 @@ type lagJSONWindow struct { var _ bufferedWindower = &lagJSONWindow{} -func (w *lagJSONWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagJSONWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1856,42 +1495,6 @@ type lagDatumWindow struct { var _ bufferedWindower = &lagDatumWindow{} -func (w *lagDatumWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *lagDatumWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -2014,9 +1617,7 @@ func (w *lagDatumWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) } } -func (b *lagBase) transitionToProcessing() { - -} +func (b *lagBase) transitionToProcessing() {} func (b *lagBase) startNewPartition() { b.idx = 0 diff --git a/pkg/sql/colexec/colexecwindow/last_value.eg.go b/pkg/sql/colexec/colexecwindow/last_value.eg.go new file mode 100644 index 000000000000..5634be53f3b6 --- /dev/null +++ b/pkg/sql/colexec/colexecwindow/last_value.eg.go @@ -0,0 +1,625 @@ +// Code generated by execgen; DO NOT EDIT. +// Copyright 2021 The Cockroach Authors. +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package colexecwindow + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/col/coldata" + "github.com/cockroachdb/cockroach/pkg/col/typeconv" + "github.com/cockroachdb/cockroach/pkg/sql/colcontainer" + "github.com/cockroachdb/cockroach/pkg/sql/colexec/colexecutils" + "github.com/cockroachdb/cockroach/pkg/sql/colexecop" + "github.com/cockroachdb/cockroach/pkg/sql/colmem" + "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/errors" + "github.com/marusama/semaphore" +) + +// NewLastValueOperator creates a new Operator that computes window +// function lastValue. outputColIdx specifies in which coldata.Vec the operator +// should put its output (if there is no such column, a new column is appended). +func NewLastValueOperator( + evalCtx *tree.EvalContext, + frame *execinfrapb.WindowerSpec_Frame, + ordering *execinfrapb.Ordering, + unlimitedAllocator *colmem.Allocator, + bufferAllocator *colmem.Allocator, + memoryLimit int64, + diskQueueCfg colcontainer.DiskQueueCfg, + fdSemaphore semaphore.Semaphore, + diskAcc *mon.BoundAccount, + input colexecop.Operator, + inputTypes []*types.T, + outputColIdx int, + partitionColIdx int, + peersColIdx int, + argIdxs []int, +) (colexecop.Operator, error) { + framer := newWindowFramer(evalCtx, frame, ordering, inputTypes, peersColIdx) + colsToStore := []int{argIdxs[0]} + colsToStore = framer.getColsToStore(colsToStore) + + // Allow the direct-access buffer 10% of the available memory. The rest will + // be given to the bufferedWindowOp queue. While it is somewhat more important + // for the direct-access buffer tuples to be kept in-memory, it only has to + // store a single column. TODO(drewk): play around with benchmarks to find a + // good empirically-supported fraction to use. + bufferMemLimit := int64(float64(memoryLimit) * 0.10) + buffer := colexecutils.NewSpillingBuffer( + bufferAllocator, bufferMemLimit, diskQueueCfg, fdSemaphore, inputTypes, diskAcc, colsToStore...) + base := lastValueBase{ + partitionSeekerBase: partitionSeekerBase{ + buffer: buffer, + partitionColIdx: partitionColIdx, + }, + framer: framer, + outputColIdx: outputColIdx, + bufferArgIdx: 0, // The arg column is the first column in the buffer. + } + argType := inputTypes[argIdxs[0]] + switch typeconv.TypeFamilyToCanonicalTypeFamily(argType.Family()) { + case types.BoolFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueBoolWindow{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.BytesFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueBytesWindow{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.DecimalFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueDecimalWindow{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.IntFamily: + switch argType.Width() { + case 16: + windower := &lastValueInt16Window{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + case 32: + windower := &lastValueInt32Window{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + case -1: + default: + windower := &lastValueInt64Window{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.FloatFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueFloat64Window{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.TimestampTZFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueTimestampWindow{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.IntervalFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueIntervalWindow{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.JsonFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueJSONWindow{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case typeconv.DatumVecCanonicalTypeFamily: + switch argType.Width() { + case -1: + default: + windower := &lastValueDatumWindow{lastValueBase: base} + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + } + return nil, errors.Errorf("unsupported lastValue window operator type %s", argType.Name()) +} + +type lastValueBase struct { + partitionSeekerBase + colexecop.CloserHelper + framer windowFramer + + outputColIdx int + bufferArgIdx int +} + +type lastValueBoolWindow struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueBoolWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueBoolWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Bool() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Bool() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueBytesWindow struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueBytesWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueBytesWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Bytes() + outputNulls := outputVec.Nulls() + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Bytes() + // We have to use CopySlice here because the column already has a length of + // n elements, and Set cannot set values before the last one. + outputCol.CopySlice(col, i, idx, idx+1) + } +} + +type lastValueDecimalWindow struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueDecimalWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueDecimalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Decimal() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Decimal() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueInt16Window struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueInt16Window{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueInt16Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int16() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int16() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueInt32Window struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueInt32Window{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueInt32Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int32() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int32() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueInt64Window struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueInt64Window{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueInt64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int64() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int64() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueFloat64Window struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueFloat64Window{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueFloat64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Float64() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Float64() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueTimestampWindow struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueTimestampWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueTimestampWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Timestamp() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Timestamp() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueIntervalWindow struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueIntervalWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueIntervalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Interval() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Interval() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type lastValueJSONWindow struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueJSONWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueJSONWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.JSON() + outputNulls := outputVec.Nulls() + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.JSON() + // We have to use CopySlice here because the column already has a length of + // n elements, and Set cannot set values before the last one. + outputCol.CopySlice(col, i, idx, idx+1) + } +} + +type lastValueDatumWindow struct { + lastValueBase +} + +var _ bufferedWindower = &lastValueDatumWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *lastValueDatumWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Datum() + outputNulls := outputVec.Nulls() + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + requestedIdx := w.framer.frameLastIdx() + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Datum() + val := col.Get(idx) + outputCol.Set(i, val) + } +} + +// transitionToProcessing implements the bufferedWindower interface. +func (b *lastValueBase) transitionToProcessing() { + b.framer.startPartition(b.Ctx, b.partitionSize, b.buffer) +} + +// startNewPartition implements the bufferedWindower interface. +func (b *lastValueBase) startNewPartition() { + b.partitionSize = 0 + b.buffer.Reset(b.Ctx) +} + +// Init implements the bufferedWindower interface. +func (b *lastValueBase) Init(ctx context.Context) { + if !b.InitHelper.Init(ctx) { + return + } +} + +// Close implements the bufferedWindower interface. +func (b *lastValueBase) Close() { + if !b.CloserHelper.Close() { + return + } + b.buffer.Close(b.EnsureCtx()) +} diff --git a/pkg/sql/colexec/colexecwindow/lead.eg.go b/pkg/sql/colexec/colexecwindow/lead.eg.go index b240f0d9ecb0..54e58057aa28 100644 --- a/pkg/sql/colexec/colexecwindow/lead.eg.go +++ b/pkg/sql/colexec/colexecwindow/lead.eg.go @@ -51,12 +51,14 @@ func NewLeadOperator( buffer := colexecutils.NewSpillingBuffer( bufferAllocator, bufferMemLimit, diskQueueCfg, fdSemaphore, inputTypes, diskAcc, argIdx) base := leadBase{ - buffer: buffer, - outputColIdx: outputColIdx, - partitionColIdx: partitionColIdx, - argIdx: argIdx, - offsetIdx: offsetIdx, - defaultIdx: defaultIdx, + partitionSeekerBase: partitionSeekerBase{ + buffer: buffer, + partitionColIdx: partitionColIdx, + }, + outputColIdx: outputColIdx, + argIdx: argIdx, + offsetIdx: offsetIdx, + defaultIdx: defaultIdx, } argType := inputTypes[argIdx] switch typeconv.TypeFamilyToCanonicalTypeFamily(argType.Family()) { @@ -158,12 +160,10 @@ func NewLeadOperator( // leadBase extracts common fields and methods of the lead windower // variations. type leadBase struct { - colexecop.InitHelper + partitionSeekerBase colexecop.CloserHelper leadComputeFields - buffer *colexecutils.SpillingBuffer - outputColIdx int partitionColIdx int argIdx int @@ -174,8 +174,7 @@ type leadBase struct { // leadComputeFields extracts the fields that are used to calculate lead // output values. type leadComputeFields struct { - partitionSize int - idx int + idx int } type leadBoolWindow struct { @@ -184,42 +183,6 @@ type leadBoolWindow struct { var _ bufferedWindower = &leadBoolWindow{} -func (w *leadBoolWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadBoolWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -352,42 +315,6 @@ type leadBytesWindow struct { var _ bufferedWindower = &leadBytesWindow{} -func (w *leadBytesWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadBytesWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -516,42 +443,6 @@ type leadDecimalWindow struct { var _ bufferedWindower = &leadDecimalWindow{} -func (w *leadDecimalWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadDecimalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -684,42 +575,6 @@ type leadInt16Window struct { var _ bufferedWindower = &leadInt16Window{} -func (w *leadInt16Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadInt16Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -852,42 +707,6 @@ type leadInt32Window struct { var _ bufferedWindower = &leadInt32Window{} -func (w *leadInt32Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadInt32Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1020,42 +839,6 @@ type leadInt64Window struct { var _ bufferedWindower = &leadInt64Window{} -func (w *leadInt64Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadInt64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1188,42 +971,6 @@ type leadFloat64Window struct { var _ bufferedWindower = &leadFloat64Window{} -func (w *leadFloat64Window) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadFloat64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1356,42 +1103,6 @@ type leadTimestampWindow struct { var _ bufferedWindower = &leadTimestampWindow{} -func (w *leadTimestampWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadTimestampWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1524,42 +1235,6 @@ type leadIntervalWindow struct { var _ bufferedWindower = &leadIntervalWindow{} -func (w *leadIntervalWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadIntervalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1692,42 +1367,6 @@ type leadJSONWindow struct { var _ bufferedWindower = &leadJSONWindow{} -func (w *leadJSONWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadJSONWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -1856,42 +1495,6 @@ type leadDatumWindow struct { var _ bufferedWindower = &leadDatumWindow{} -func (w *leadDatumWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *leadDatumWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -2014,9 +1617,7 @@ func (w *leadDatumWindow) processBatch(batch coldata.Batch, startIdx, endIdx int } } -func (b *leadBase) transitionToProcessing() { - -} +func (b *leadBase) transitionToProcessing() {} func (b *leadBase) startNewPartition() { b.idx = 0 diff --git a/pkg/sql/colexec/colexecwindow/lead_lag_tmpl.go b/pkg/sql/colexec/colexecwindow/lead_lag_tmpl.go index 3f2744bb3588..03b99b3cd953 100644 --- a/pkg/sql/colexec/colexecwindow/lead_lag_tmpl.go +++ b/pkg/sql/colexec/colexecwindow/lead_lag_tmpl.go @@ -73,12 +73,14 @@ func New_UPPERCASE_NAMEOperator( buffer := colexecutils.NewSpillingBuffer( bufferAllocator, bufferMemLimit, diskQueueCfg, fdSemaphore, inputTypes, diskAcc, argIdx) base := _OP_NAMEBase{ - buffer: buffer, - outputColIdx: outputColIdx, - partitionColIdx: partitionColIdx, - argIdx: argIdx, - offsetIdx: offsetIdx, - defaultIdx: defaultIdx, + partitionSeekerBase: partitionSeekerBase{ + buffer: buffer, + partitionColIdx: partitionColIdx, + }, + outputColIdx: outputColIdx, + argIdx: argIdx, + offsetIdx: offsetIdx, + defaultIdx: defaultIdx, } argType := inputTypes[argIdx] switch typeconv.TypeFamilyToCanonicalTypeFamily(argType.Family()) { @@ -101,12 +103,10 @@ func New_UPPERCASE_NAMEOperator( // _OP_NAMEBase extracts common fields and methods of the _OP_NAME windower // variations. type _OP_NAMEBase struct { - colexecop.InitHelper + partitionSeekerBase colexecop.CloserHelper _OP_NAMEComputeFields - buffer *colexecutils.SpillingBuffer - outputColIdx int partitionColIdx int argIdx int @@ -117,8 +117,7 @@ type _OP_NAMEBase struct { // _OP_NAMEComputeFields extracts the fields that are used to calculate _OP_NAME // output values. type _OP_NAMEComputeFields struct { - partitionSize int - idx int + idx int } // {{range .}} @@ -130,42 +129,6 @@ type _OP_NAME_TYPEWindow struct { var _ bufferedWindower = &_OP_NAME_TYPEWindow{} -func (w *_OP_NAME_TYPEWindow) seekNextPartition( - batch coldata.Batch, startIdx int, isPartitionStart bool, -) (nextPartitionIdx int) { - n := batch.Length() - if w.partitionColIdx == -1 { - // There is only one partition, so it includes the entirety of this batch. - w.partitionSize += n - nextPartitionIdx = n - } else { - i := startIdx - partitionCol := batch.ColVec(w.partitionColIdx).Bool() - _ = partitionCol[n-1] - _ = partitionCol[i] - // Find the location of the start of the next partition (and the end of the - // current one). - for ; i < n; i++ { - //gcassert:bce - if partitionCol[i] { - // Don't break for the start of the current partition. - if !isPartitionStart || i != startIdx { - break - } - } - } - w.partitionSize += i - startIdx - nextPartitionIdx = i - } - - // Add all tuples from the argument column that fall within the current - // partition to the buffer so that they can be accessed later. - if startIdx < nextPartitionIdx { - w.buffer.AppendTuples(w.Ctx, batch, startIdx, nextPartitionIdx) - } - return nextPartitionIdx -} - func (w *_OP_NAME_TYPEWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { if startIdx >= endIdx { // No processing needs to be done for this portion of the current partition. @@ -211,9 +174,7 @@ func (w *_OP_NAME_TYPEWindow) processBatch(batch coldata.Batch, startIdx, endIdx // {{end}} // {{end}} -func (b *_OP_NAMEBase) transitionToProcessing() { - -} +func (b *_OP_NAMEBase) transitionToProcessing() {} func (b *_OP_NAMEBase) startNewPartition() { b.idx = 0 diff --git a/pkg/sql/colexec/colexecwindow/nth_value.eg.go b/pkg/sql/colexec/colexecwindow/nth_value.eg.go new file mode 100644 index 000000000000..5dc8e6373113 --- /dev/null +++ b/pkg/sql/colexec/colexecwindow/nth_value.eg.go @@ -0,0 +1,825 @@ +// Code generated by execgen; DO NOT EDIT. +// Copyright 2021 The Cockroach Authors. +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package colexecwindow + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/col/coldata" + "github.com/cockroachdb/cockroach/pkg/col/typeconv" + "github.com/cockroachdb/cockroach/pkg/sql/colcontainer" + "github.com/cockroachdb/cockroach/pkg/sql/colexec/colexecutils" + "github.com/cockroachdb/cockroach/pkg/sql/colexecerror" + "github.com/cockroachdb/cockroach/pkg/sql/colexecop" + "github.com/cockroachdb/cockroach/pkg/sql/colmem" + "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/errors" + "github.com/marusama/semaphore" +) + +// NewNthValueOperator creates a new Operator that computes window +// function nthValue. outputColIdx specifies in which coldata.Vec the operator +// should put its output (if there is no such column, a new column is appended). +func NewNthValueOperator( + evalCtx *tree.EvalContext, + frame *execinfrapb.WindowerSpec_Frame, + ordering *execinfrapb.Ordering, + unlimitedAllocator *colmem.Allocator, + bufferAllocator *colmem.Allocator, + memoryLimit int64, + diskQueueCfg colcontainer.DiskQueueCfg, + fdSemaphore semaphore.Semaphore, + diskAcc *mon.BoundAccount, + input colexecop.Operator, + inputTypes []*types.T, + outputColIdx int, + partitionColIdx int, + peersColIdx int, + argIdxs []int, +) (colexecop.Operator, error) { + framer := newWindowFramer(evalCtx, frame, ordering, inputTypes, peersColIdx) + colsToStore := []int{argIdxs[0]} + colsToStore = framer.getColsToStore(colsToStore) + + // Allow the direct-access buffer 10% of the available memory. The rest will + // be given to the bufferedWindowOp queue. While it is somewhat more important + // for the direct-access buffer tuples to be kept in-memory, it only has to + // store a single column. TODO(drewk): play around with benchmarks to find a + // good empirically-supported fraction to use. + bufferMemLimit := int64(float64(memoryLimit) * 0.10) + buffer := colexecutils.NewSpillingBuffer( + bufferAllocator, bufferMemLimit, diskQueueCfg, fdSemaphore, inputTypes, diskAcc, colsToStore...) + base := nthValueBase{ + partitionSeekerBase: partitionSeekerBase{ + buffer: buffer, + partitionColIdx: partitionColIdx, + }, + framer: framer, + outputColIdx: outputColIdx, + bufferArgIdx: 0, // The arg column is the first column in the buffer. + } + argType := inputTypes[argIdxs[0]] + switch typeconv.TypeFamilyToCanonicalTypeFamily(argType.Family()) { + case types.BoolFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueBoolWindow{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.BytesFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueBytesWindow{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.DecimalFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueDecimalWindow{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.IntFamily: + switch argType.Width() { + case 16: + windower := &nthValueInt16Window{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + case 32: + windower := &nthValueInt32Window{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + case -1: + default: + windower := &nthValueInt64Window{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.FloatFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueFloat64Window{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.TimestampTZFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueTimestampWindow{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.IntervalFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueIntervalWindow{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case types.JsonFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueJSONWindow{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + case typeconv.DatumVecCanonicalTypeFamily: + switch argType.Width() { + case -1: + default: + windower := &nthValueDatumWindow{nthValueBase: base} + windower.nColIdx = argIdxs[1] + return newBufferedWindowOperator( + windower, unlimitedAllocator, memoryLimit-bufferMemLimit, diskQueueCfg, + fdSemaphore, diskAcc, input, inputTypes, argType, outputColIdx, + ), nil + } + } + return nil, errors.Errorf("unsupported nthValue window operator type %s", argType.Name()) +} + +type nthValueBase struct { + partitionSeekerBase + colexecop.CloserHelper + framer windowFramer + + outputColIdx int + bufferArgIdx int +} + +type nthValueBoolWindow struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueBoolWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueBoolWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Bool() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Bool() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueBytesWindow struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueBytesWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueBytesWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Bytes() + outputNulls := outputVec.Nulls() + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Bytes() + // We have to use CopySlice here because the column already has a length of + // n elements, and Set cannot set values before the last one. + outputCol.CopySlice(col, i, idx, idx+1) + } +} + +type nthValueDecimalWindow struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueDecimalWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueDecimalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Decimal() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Decimal() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueInt16Window struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueInt16Window{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueInt16Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int16() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int16() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueInt32Window struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueInt32Window{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueInt32Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int32() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int32() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueInt64Window struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueInt64Window{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueInt64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Int64() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Int64() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueFloat64Window struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueFloat64Window{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueFloat64Window) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Float64() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Float64() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueTimestampWindow struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueTimestampWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueTimestampWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Timestamp() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Timestamp() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueIntervalWindow struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueIntervalWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueIntervalWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Interval() + outputNulls := outputVec.Nulls() + _, _ = outputCol.Get(startIdx), outputCol.Get(endIdx-1) + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Interval() + val := col.Get(idx) + //gcassert:bce + outputCol.Set(i, val) + } +} + +type nthValueJSONWindow struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueJSONWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueJSONWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.JSON() + outputNulls := outputVec.Nulls() + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.JSON() + // We have to use CopySlice here because the column already has a length of + // n elements, and Set cannot set values before the last one. + outputCol.CopySlice(col, i, idx, idx+1) + } +} + +type nthValueDatumWindow struct { + nthValueBase + nColIdx int +} + +var _ bufferedWindower = &nthValueDatumWindow{} + +// processBatch implements the bufferedWindower interface. +func (w *nthValueDatumWindow) processBatch(batch coldata.Batch, startIdx, endIdx int) { + if startIdx >= endIdx { + // No processing needs to be done for this portion of the current partition. + return + } + outputVec := batch.ColVec(w.outputColIdx) + outputCol := outputVec.Datum() + outputNulls := outputVec.Nulls() + + nVec := batch.ColVec(w.nColIdx) + nCol := nVec.Int64() + nNulls := nVec.Nulls() + _, _ = nCol[startIdx], nCol[endIdx-1] + + for i := startIdx; i < endIdx; i++ { + w.framer.next(w.Ctx) + if nNulls.MaybeHasNulls() && nNulls.NullAt(i) { + // TODO(drewk): this could be pulled out of the loop, but for now keep the + // templating simple. + outputNulls.SetNull(i) + continue + } + // gcassert:bce + nVal := int(nCol[i]) + if nVal <= 0 { + colexecerror.ExpectedError(builtins.ErrInvalidArgumentForNthValue) + } + requestedIdx := w.framer.frameNthIdx(nVal) + if requestedIdx == -1 { + // The requested row does not exist. + outputNulls.SetNull(i) + continue + } + + vec, idx, _ := w.buffer.GetVecWithTuple(w.Ctx, w.bufferArgIdx, requestedIdx) + if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(idx) { + outputNulls.SetNull(i) + continue + } + col := vec.Datum() + val := col.Get(idx) + outputCol.Set(i, val) + } +} + +// transitionToProcessing implements the bufferedWindower interface. +func (b *nthValueBase) transitionToProcessing() { + b.framer.startPartition(b.Ctx, b.partitionSize, b.buffer) +} + +// startNewPartition implements the bufferedWindower interface. +func (b *nthValueBase) startNewPartition() { + b.partitionSize = 0 + b.buffer.Reset(b.Ctx) +} + +// Init implements the bufferedWindower interface. +func (b *nthValueBase) Init(ctx context.Context) { + if !b.InitHelper.Init(ctx) { + return + } +} + +// Close implements the bufferedWindower interface. +func (b *nthValueBase) Close() { + if !b.CloserHelper.Close() { + return + } + b.buffer.Close(b.EnsureCtx()) +} diff --git a/pkg/sql/colexec/colexecwindow/range_offset_handler.eg.go b/pkg/sql/colexec/colexecwindow/range_offset_handler.eg.go index 376f2cd5e99e..8166175d3433 100644 --- a/pkg/sql/colexec/colexecwindow/range_offset_handler.eg.go +++ b/pkg/sql/colexec/colexecwindow/range_offset_handler.eg.go @@ -1852,15 +1852,20 @@ func (h *rangeHandlerOffsetPrecedingStartAscDate) getIdx(ctx context.Context, cu col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset.Mul(-1)) + t_res := duration.Add(t_casted, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -1917,18 +1922,18 @@ func (h *rangeHandlerOffsetPrecedingStartAscDate) getIdx(ctx context.Context, cu if peersCol[vecIdx] { cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -2007,7 +2012,12 @@ func (h *rangeHandlerOffsetPrecedingStartAscTimestamp) getIdx(ctx context.Contex ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset.Mul(-1)) + t_res := duration.Add(currRowVal, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -2992,15 +3002,20 @@ func (h *rangeHandlerOffsetPrecedingStartDescDate) getIdx(ctx context.Context, c col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset) + t_res := duration.Add(t_casted, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -3030,18 +3045,18 @@ func (h *rangeHandlerOffsetPrecedingStartDescDate) getIdx(ctx context.Context, c } cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -3120,7 +3135,12 @@ func (h *rangeHandlerOffsetPrecedingStartDescTimestamp) getIdx(ctx context.Conte ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset) + t_res := duration.Add(currRowVal, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -4332,15 +4352,20 @@ func (h *rangeHandlerOffsetPrecedingEndAscDate) getIdx(ctx context.Context, curr col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset.Mul(-1)) + t_res := duration.Add(t_casted, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -4397,18 +4422,18 @@ func (h *rangeHandlerOffsetPrecedingEndAscDate) getIdx(ctx context.Context, curr if peersCol[vecIdx] { cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -4504,7 +4529,12 @@ func (h *rangeHandlerOffsetPrecedingEndAscTimestamp) getIdx(ctx context.Context, ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset.Mul(-1)) + t_res := duration.Add(currRowVal, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -5625,15 +5655,20 @@ func (h *rangeHandlerOffsetPrecedingEndDescDate) getIdx(ctx context.Context, cur col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset) + t_res := duration.Add(t_casted, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -5663,18 +5698,18 @@ func (h *rangeHandlerOffsetPrecedingEndDescDate) getIdx(ctx context.Context, cur } cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -5770,7 +5805,12 @@ func (h *rangeHandlerOffsetPrecedingEndDescTimestamp) getIdx(ctx context.Context ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset) + t_res := duration.Add(currRowVal, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -6880,15 +6920,20 @@ func (h *rangeHandlerOffsetFollowingStartAscDate) getIdx(ctx context.Context, cu col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset) + t_res := duration.Add(t_casted, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -6945,18 +6990,18 @@ func (h *rangeHandlerOffsetFollowingStartAscDate) getIdx(ctx context.Context, cu if peersCol[vecIdx] { cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -7035,7 +7080,12 @@ func (h *rangeHandlerOffsetFollowingStartAscTimestamp) getIdx(ctx context.Contex ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset) + t_res := duration.Add(currRowVal, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -8020,15 +8070,20 @@ func (h *rangeHandlerOffsetFollowingStartDescDate) getIdx(ctx context.Context, c col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset.Mul(-1)) + t_res := duration.Add(t_casted, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -8058,18 +8113,18 @@ func (h *rangeHandlerOffsetFollowingStartDescDate) getIdx(ctx context.Context, c } cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -8148,7 +8203,12 @@ func (h *rangeHandlerOffsetFollowingStartDescTimestamp) getIdx(ctx context.Conte ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset.Mul(-1)) + t_res := duration.Add(currRowVal, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -9360,15 +9420,20 @@ func (h *rangeHandlerOffsetFollowingEndAscDate) getIdx(ctx context.Context, curr col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset) + t_res := duration.Add(t_casted, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -9425,18 +9490,18 @@ func (h *rangeHandlerOffsetFollowingEndAscDate) getIdx(ctx context.Context, curr if peersCol[vecIdx] { cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -9532,7 +9597,12 @@ func (h *rangeHandlerOffsetFollowingEndAscTimestamp) getIdx(ctx context.Context, ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset) + t_res := duration.Add(currRowVal, h.offset) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -10653,15 +10723,20 @@ func (h *rangeHandlerOffsetFollowingEndDescDate) getIdx(ctx context.Context, cur col := vec.Int64() currRowVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(currRowVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(currRowVal) if err != nil { colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - seekVal = duration.Add(t, h.offset.Mul(-1)) + t_res := duration.Add(t_casted, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. @@ -10691,18 +10766,18 @@ func (h *rangeHandlerOffsetFollowingEndDescDate) getIdx(ctx context.Context, cur } cmpVal := col.Get(vecIdx) - d, err := pgdate.MakeDateFromUnixEpoch(cmpVal) + d_casted, err := pgdate.MakeDateFromUnixEpoch(cmpVal) if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - t, err := d.ToTime() + t_casted, err := d_casted.ToTime() if err != nil { - colexecerror.InternalError(err) + colexecerror.ExpectedError(err) } - if t.Before(seekVal) { + if t_casted.Before(seekVal) { cmpResult = -1 - } else if seekVal.Before(t) { + } else if seekVal.Before(t_casted) { cmpResult = 1 } else { cmpResult = 0 @@ -10798,7 +10873,12 @@ func (h *rangeHandlerOffsetFollowingEndDescTimestamp) getIdx(ctx context.Context ) col := vec.Timestamp() currRowVal := col.Get(vecIdx) - seekVal = duration.Add(currRowVal, h.offset.Mul(-1)) + t_res := duration.Add(currRowVal, h.offset.Mul(-1)) + rounded_res := t_res.Round(time.Microsecond) + if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) + } + seekVal = t_res // Pick up where the last index left off, since the start and indexes of each // successive window frame are non-decreasing as we increment the current row. diff --git a/pkg/sql/colexec/colexecwindow/window_framer.eg.go b/pkg/sql/colexec/colexecwindow/window_framer.eg.go index fd422c43cce2..fc0039027000 100644 --- a/pkg/sql/colexec/colexecwindow/window_framer.eg.go +++ b/pkg/sql/colexec/colexecwindow/window_framer.eg.go @@ -1169,8 +1169,8 @@ func (b *windowFramerBase) incrementPeerGroup(ctx context.Context, index, groups return b.partitionSize } // We have to iterate to the beginning of the next peer group. + index++ for { - index++ if index >= b.partitionSize { return b.partitionSize } diff --git a/pkg/sql/colexec/colexecwindow/window_framer_test.go b/pkg/sql/colexec/colexecwindow/window_framer_test.go index 48b9cb5e5fca..febf13ca7c6f 100644 --- a/pkg/sql/colexec/colexecwindow/window_framer_test.go +++ b/pkg/sql/colexec/colexecwindow/window_framer_test.go @@ -13,7 +13,6 @@ package colexecwindow import ( "context" "fmt" - "math" "math/rand" "sort" "testing" @@ -29,7 +28,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/colmem" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" "github.com/cockroachdb/cockroach/pkg/sql/randgen" - "github.com/cockroachdb/cockroach/pkg/sql/rowenc" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/testutils/colcontainerutils" @@ -64,12 +62,15 @@ func TestWindowFramer(t *testing.T) { queueCfg.CacheMode = colcontainer.DiskQueueCacheModeClearAndReuseCache queueCfg.SetDefaultBufferSizeBytesForCacheMode() + var memLimits = []int64{1, 1 << 10, 1 << 20} + testCfg := &testConfig{ rng: rng, evalCtx: evalCtx, factory: factory, allocator: allocator, queueCfg: queueCfg, + memLimit: memLimits[rng.Intn(len(memLimits))], } const randTypeProbability = 0.5 @@ -152,6 +153,7 @@ type testConfig struct { startBound tree.WindowFrameBoundType endBound tree.WindowFrameBoundType exclusion tree.WindowFrameExclusion + memLimit int64 } func testWindowFramer(t *testing.T, testCfg *testConfig) { @@ -283,7 +285,7 @@ func makeSortedPartition(testCfg *testConfig) (tree.Datums, *colexecutils.Spilli sort.Sort(datums) partition := colexecutils.NewSpillingBuffer( - testCfg.allocator, math.MaxInt64, testCfg.queueCfg, + testCfg.allocator, testCfg.memLimit, testCfg.queueCfg, colexecop.NewTestingSemaphore(2), []*types.T{testCfg.typ, types.Bool}, testDiskAcc, ) insertBatch := testCfg.allocator.NewMemBatchWithFixedCapacity( @@ -333,10 +335,10 @@ func initWindowFramers( ) (windowFramer, *tree.WindowFrameRun, *colexecutils.SpillingBuffer) { offsetType := types.Int if testCfg.mode == tree.RANGE { - offsetType = getOffsetType(testCfg.typ) + offsetType = GetOffsetTypeFromOrderColType(t, testCfg.typ) } - startOffset := makeRandOffset(t, testCfg.rng, offsetType) - endOffset := makeRandOffset(t, testCfg.rng, offsetType) + startOffset := MakeRandWindowFrameRangeOffset(t, testCfg.rng, offsetType) + endOffset := MakeRandWindowFrameRangeOffset(t, testCfg.rng, offsetType) peersCol, orderCol := tree.NoColumnIdx, tree.NoColumnIdx if testCfg.ordered { @@ -354,7 +356,7 @@ func initWindowFramers( Bounds: execinfrapb.WindowerSpec_Frame_Bounds{ Start: execinfrapb.WindowerSpec_Frame_Bound{ BoundType: boundToExecinfrapb(testCfg.startBound), - TypedOffset: encodeOffset(t, startOffset), + TypedOffset: EncodeWindowFrameOffset(t, startOffset), OffsetType: execinfrapb.DatumInfo{ Type: testCfg.typ, Encoding: datumEncoding, @@ -362,7 +364,7 @@ func initWindowFramers( }, End: &execinfrapb.WindowerSpec_Frame_Bound{ BoundType: boundToExecinfrapb(testCfg.endBound), - TypedOffset: encodeOffset(t, endOffset), + TypedOffset: EncodeWindowFrameOffset(t, endOffset), OffsetType: execinfrapb.DatumInfo{ Type: testCfg.typ, Encoding: datumEncoding, @@ -519,38 +521,3 @@ func exclusionToExecinfrapb( } return 0 } - -func encodeOffset(t *testing.T, offset tree.Datum) []byte { - var encoded, scratch []byte - encoded, err := rowenc.EncodeTableValue( - encoded, descpb.ColumnID(encoding.NoColumnID), offset, scratch) - require.NoError(t, err) - return encoded -} - -func makeRandOffset(t *testing.T, rng *rand.Rand, typ *types.T) tree.Datum { - isNegative := func(val tree.Datum) bool { - switch datumTyp := val.(type) { - case *tree.DInt: - return int64(*datumTyp) < 0 - case *tree.DFloat: - return float64(*datumTyp) < 0 - case *tree.DDecimal: - return datumTyp.Negative - case *tree.DInterval, *tree.DTimestampTZ, *tree.DDate, *tree.DTimeTZ: - return false - default: - t.Errorf("unexpected error: %v", errors.AssertionFailedf("unsupported datum: %v", datumTyp)) - return false - } - } - - for { - val := randgen.RandDatumSimple(rng, typ) - if isNegative(val) { - // Offsets must be non-null and non-negative. - continue - } - return val - } -} diff --git a/pkg/sql/colexec/colexecwindow/window_framer_tmpl.go b/pkg/sql/colexec/colexecwindow/window_framer_tmpl.go index 04f677e3b3ef..d0bbec7c2844 100644 --- a/pkg/sql/colexec/colexecwindow/window_framer_tmpl.go +++ b/pkg/sql/colexec/colexecwindow/window_framer_tmpl.go @@ -276,8 +276,8 @@ func (b *windowFramerBase) incrementPeerGroup(ctx context.Context, index, groups return b.partitionSize } // We have to iterate to the beginning of the next peer group. + index++ for { - index++ if index >= b.partitionSize { return b.partitionSize } diff --git a/pkg/sql/colexec/colexecwindow/window_functions_test.go b/pkg/sql/colexec/colexecwindow/window_functions_test.go index 22ac5b76fce4..4a13350e735b 100644 --- a/pkg/sql/colexec/colexecwindow/window_functions_test.go +++ b/pkg/sql/colexec/colexecwindow/window_functions_test.go @@ -68,9 +68,12 @@ func TestWindowFunctions(t *testing.T) { nTileFn := execinfrapb.WindowerSpec_NTILE lagFn := execinfrapb.WindowerSpec_LAG leadFn := execinfrapb.WindowerSpec_LEAD + firstValueFn := execinfrapb.WindowerSpec_FIRST_VALUE + lastValueFn := execinfrapb.WindowerSpec_LAST_VALUE + nthValueFn := execinfrapb.WindowerSpec_NTH_VALUE accounts := make([]*mon.BoundAccount, 0) monitors := make([]*mon.BytesMonitor, 0) - for _, spillForced := range []bool{false, true} { + for _, spillForced := range []bool{true} { flowCtx.Cfg.TestingKnobs.ForceDiskSpill = spillForced for _, tc := range []windowFnTestCase{ // With PARTITION BY, no ORDER BY. @@ -193,6 +196,48 @@ func TestWindowFunctions(t *testing.T) { }, }, }, + { + tuples: colexectestutils.Tuples{{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}}, + expected: colexectestutils.Tuples{{1, 1, 1}, {1, 2, 1}, {1, 3, 1}, {2, 4, 4}, {2, 5, 4}, {3, 6, 6}}, + windowerSpec: execinfrapb.WindowerSpec{ + PartitionBy: []uint32{0}, + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &firstValueFn}, + ArgsIdxs: []uint32{1}, + OutputColIdx: 2, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}}, + expected: colexectestutils.Tuples{{1, 1, 3}, {1, 2, 3}, {1, 3, 3}, {2, 4, 5}, {2, 5, 5}, {3, 6, 6}}, + windowerSpec: execinfrapb.WindowerSpec{ + PartitionBy: []uint32{0}, + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &lastValueFn}, + ArgsIdxs: []uint32{1}, + OutputColIdx: 2, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{{1, 2, 4}, {1, 2, 1}, {1, 2, 2}, {2, 4, 1}, {2, 5, 1}, {3, 6, 1}}, + expected: colexectestutils.Tuples{{1, 2, 4, nil}, {1, 2, 1, 2}, {1, 2, 2, 2}, {2, 4, 1, 4}, {2, 5, 1, 4}, {3, 6, 1, 6}}, + windowerSpec: execinfrapb.WindowerSpec{ + PartitionBy: []uint32{0}, + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &nthValueFn}, + ArgsIdxs: []uint32{1, 2}, + OutputColIdx: 3, + }, + }, + }, + }, // No PARTITION BY, with ORDER BY. { @@ -314,6 +359,48 @@ func TestWindowFunctions(t *testing.T) { }, }, }, + { + tuples: colexectestutils.Tuples{{3, 1}, {1, 2}, {2, 3}, {nil, 4}, {1, 5}, {nil, 6}, {3, 7}}, + expected: colexectestutils.Tuples{{nil, 4, 4}, {nil, 6, 4}, {1, 2, 4}, {1, 5, 4}, {2, 3, 4}, {3, 1, 4}, {3, 7, 4}}, + windowerSpec: execinfrapb.WindowerSpec{ + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &firstValueFn}, + ArgsIdxs: []uint32{1}, + Ordering: execinfrapb.Ordering{Columns: []execinfrapb.Ordering_Column{{ColIdx: 0}}}, + OutputColIdx: 2, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{{3, 1}, {1, 2}, {2, 3}, {nil, 4}, {1, 5}, {nil, 6}, {3, 7}}, + expected: colexectestutils.Tuples{{nil, 4, 6}, {nil, 6, 6}, {1, 2, 5}, {1, 5, 5}, {2, 3, 3}, {3, 1, 7}, {3, 7, 7}}, + windowerSpec: execinfrapb.WindowerSpec{ + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &lastValueFn}, + ArgsIdxs: []uint32{1}, + Ordering: execinfrapb.Ordering{Columns: []execinfrapb.Ordering_Column{{ColIdx: 0}}}, + OutputColIdx: 2, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{{nil, 4, 1}, {nil, 6, 2}, {1, 2, 2}, {1, 5, 1}, {2, 3, 1}, {3, 1, 8}, {3, 7, 4}}, + expected: colexectestutils.Tuples{{nil, 4, 1, 4}, {nil, 6, 2, 6}, {1, 2, 2, 6}, {1, 5, 1, 4}, {2, 3, 1, 4}, {3, 1, 8, nil}, {3, 7, 4, 5}}, + windowerSpec: execinfrapb.WindowerSpec{ + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &nthValueFn}, + ArgsIdxs: []uint32{1, 2}, + Ordering: execinfrapb.Ordering{Columns: []execinfrapb.Ordering_Column{{ColIdx: 0}}}, + OutputColIdx: 3, + }, + }, + }, + }, // With both PARTITION BY and ORDER BY. { @@ -443,6 +530,69 @@ func TestWindowFunctions(t *testing.T) { }, }, }, + { + tuples: colexectestutils.Tuples{ + {3, 2, 1}, {1, nil, 2}, {2, 1, 3}, {nil, nil, 4}, + {1, 2, 5}, {nil, 1, 6}, {nil, nil, 4}, {3, 1, 8}, + }, + expected: colexectestutils.Tuples{ + {nil, nil, 4, 4}, {nil, nil, 4, 4}, {nil, 1, 6, 4}, {1, nil, 2, 2}, + {1, 2, 5, 2}, {2, 1, 3, 3}, {3, 1, 8, 8}, {3, 2, 1, 8}, + }, + windowerSpec: execinfrapb.WindowerSpec{ + PartitionBy: []uint32{0}, + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &firstValueFn}, + ArgsIdxs: []uint32{2}, + Ordering: execinfrapb.Ordering{Columns: []execinfrapb.Ordering_Column{{ColIdx: 1}}}, + OutputColIdx: 3, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{ + {3, 2, 1}, {1, nil, 2}, {2, 1, 3}, {nil, nil, 4}, + {1, 2, 5}, {nil, 1, 6}, {nil, nil, 4}, {3, 1, 8}, + }, + expected: colexectestutils.Tuples{ + {nil, nil, 4, 4}, {nil, nil, 4, 4}, {nil, 1, 6, 6}, {1, nil, 2, 2}, + {1, 2, 5, 5}, {2, 1, 3, 3}, {3, 1, 8, 8}, {3, 2, 1, 1}, + }, + windowerSpec: execinfrapb.WindowerSpec{ + PartitionBy: []uint32{0}, + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &lastValueFn}, + ArgsIdxs: []uint32{2}, + Ordering: execinfrapb.Ordering{Columns: []execinfrapb.Ordering_Column{{ColIdx: 1}}}, + OutputColIdx: 3, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{ + {nil, nil, 4, 2}, {nil, nil, 4, 5}, {nil, 1, 6, 1}, {1, nil, 2, 2}, + {1, 2, 5, 1}, {2, 1, 3, 2}, {3, 1, 8, 1}, {3, 2, 1, 2}, + }, + expected: colexectestutils.Tuples{ + {nil, nil, 4, 2, 4}, {nil, nil, 4, 5, nil}, {nil, 1, 6, 1, 4}, {1, nil, 2, 2, nil}, + {1, 2, 5, 1, 2}, {2, 1, 3, 2, nil}, {3, 1, 8, 1, 8}, {3, 2, 1, 2, 1}, + }, + windowerSpec: execinfrapb.WindowerSpec{ + PartitionBy: []uint32{0}, + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &nthValueFn}, + ArgsIdxs: []uint32{2, 3}, + Ordering: execinfrapb.Ordering{Columns: []execinfrapb.Ordering_Column{{ColIdx: 1}}}, + OutputColIdx: 4, + }, + }, + }, + }, // With neither PARTITION BY nor ORDER BY. { @@ -556,6 +706,45 @@ func TestWindowFunctions(t *testing.T) { }, }, }, + { + tuples: colexectestutils.Tuples{{1}, {2}, {3}, {4}, {5}, {6}}, + expected: colexectestutils.Tuples{{1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}, {6, 1}}, + windowerSpec: execinfrapb.WindowerSpec{ + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &firstValueFn}, + ArgsIdxs: []uint32{0}, + OutputColIdx: 1, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{{1}, {2}, {3}, {4}, {5}, {6}}, + expected: colexectestutils.Tuples{{1, 6}, {2, 6}, {3, 6}, {4, 6}, {5, 6}, {6, 6}}, + windowerSpec: execinfrapb.WindowerSpec{ + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &lastValueFn}, + ArgsIdxs: []uint32{0}, + OutputColIdx: 1, + }, + }, + }, + }, + { + tuples: colexectestutils.Tuples{{1, 1}, {2, 1}, {3, 5}, {4, 7}, {5, 3}, {6, 2}}, + expected: colexectestutils.Tuples{{1, 1, 1}, {2, 1, 1}, {3, 5, 5}, {4, 7, nil}, {5, 3, 3}, {6, 2, 2}}, + windowerSpec: execinfrapb.WindowerSpec{ + WindowFns: []execinfrapb.WindowerSpec_WindowFn{ + { + Func: execinfrapb.WindowerSpec_Func{WindowFunc: &nthValueFn}, + ArgsIdxs: []uint32{0, 1}, + OutputColIdx: 2, + }, + }, + }, + }, } { log.Infof(ctx, "spillForced=%t/%s", spillForced, tc.windowerSpec.WindowFns[0].Func.String()) var semsToCheck []semaphore.Semaphore @@ -612,6 +801,7 @@ func TestWindowFunctions(t *testing.T) { func BenchmarkWindowFunctions(b *testing.B) { defer log.Scope(b).Close(b) ctx := context.Background() + evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) const ( memLimit = 64 << 20 @@ -691,6 +881,24 @@ func BenchmarkWindowFunctions(b *testing.B) { colexecop.NewTestingSemaphore(fdLimit), testDiskAcc, source, sourceTypes, outputIdx, partitionCol, arg1ColIdx, arg2ColIdx, arg3ColIdx, ) + case execinfrapb.WindowerSpec_FIRST_VALUE: + op, err = NewFirstValueOperator( + &evalCtx, NormalizeWindowFrame(nil), &execinfrapb.Ordering{Columns: orderingCols}, + mainAllocator, bufferAllocator, memLimit, queueCfg, + colexecop.NewTestingSemaphore(fdLimit), testDiskAcc, source, sourceTypes, + outputIdx, partitionColIdx, peersColIdx, []int{arg1ColIdx}) + case execinfrapb.WindowerSpec_LAST_VALUE: + op, err = NewLastValueOperator( + &evalCtx, NormalizeWindowFrame(nil), &execinfrapb.Ordering{Columns: orderingCols}, + mainAllocator, bufferAllocator, memLimit, queueCfg, + colexecop.NewTestingSemaphore(fdLimit), testDiskAcc, source, sourceTypes, + outputIdx, partitionColIdx, peersColIdx, []int{arg1ColIdx}) + case execinfrapb.WindowerSpec_NTH_VALUE: + op, err = NewFirstValueOperator( + &evalCtx, NormalizeWindowFrame(nil), &execinfrapb.Ordering{Columns: orderingCols}, + mainAllocator, bufferAllocator, memLimit, queueCfg, + colexecop.NewTestingSemaphore(fdLimit), testDiskAcc, source, sourceTypes, + outputIdx, partitionColIdx, peersColIdx, []int{arg1ColIdx, arg2ColIdx}) } require.NoError(b, err) return op @@ -731,6 +939,9 @@ func BenchmarkWindowFunctions(b *testing.B) { execinfrapb.WindowerSpec_NTILE, execinfrapb.WindowerSpec_LAG, execinfrapb.WindowerSpec_LEAD, + execinfrapb.WindowerSpec_FIRST_VALUE, + execinfrapb.WindowerSpec_LAST_VALUE, + execinfrapb.WindowerSpec_NTH_VALUE, } // The number of rows should be a multiple of coldata.BatchSize(). diff --git a/pkg/sql/colexec/colexecwindow/window_functions_util.go b/pkg/sql/colexec/colexecwindow/window_functions_util.go index a539158efaa0..658831c5fe2d 100644 --- a/pkg/sql/colexec/colexecwindow/window_functions_util.go +++ b/pkg/sql/colexec/colexecwindow/window_functions_util.go @@ -11,10 +11,19 @@ package colexecwindow import ( + "math/rand" + "testing" + + "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/colexecerror" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/sql/randgen" + "github.com/cockroachdb/cockroach/pkg/sql/rowenc" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/errors" + "github.com/stretchr/testify/require" ) // SupportedWindowFns contains all window functions supported by the @@ -28,6 +37,9 @@ var SupportedWindowFns = map[execinfrapb.WindowerSpec_WindowFunc]struct{}{ execinfrapb.WindowerSpec_NTILE: {}, execinfrapb.WindowerSpec_LAG: {}, execinfrapb.WindowerSpec_LEAD: {}, + execinfrapb.WindowerSpec_FIRST_VALUE: {}, + execinfrapb.WindowerSpec_LAST_VALUE: {}, + execinfrapb.WindowerSpec_NTH_VALUE: {}, } // WindowFnNeedsPeersInfo returns whether a window function pays attention to @@ -36,8 +48,8 @@ var SupportedWindowFns = map[execinfrapb.WindowerSpec_WindowFunc]struct{}{ // columns in ORDER BY clause). For most window functions, the result of // computation should be the same for "peers", so most window functions do need // this information. -func WindowFnNeedsPeersInfo(windowFn execinfrapb.WindowerSpec_WindowFunc) bool { - switch windowFn { +func WindowFnNeedsPeersInfo(windowFn *execinfrapb.WindowerSpec_WindowFn) bool { + switch *windowFn.Func.WindowFunc { case execinfrapb.WindowerSpec_ROW_NUMBER, execinfrapb.WindowerSpec_NTILE, @@ -51,6 +63,28 @@ func WindowFnNeedsPeersInfo(windowFn execinfrapb.WindowerSpec_WindowFunc) bool { execinfrapb.WindowerSpec_PERCENT_RANK, execinfrapb.WindowerSpec_CUME_DIST: return true + case + execinfrapb.WindowerSpec_FIRST_VALUE, + execinfrapb.WindowerSpec_LAST_VALUE, + execinfrapb.WindowerSpec_NTH_VALUE: + if len(windowFn.Ordering.Columns) == 0 { + return false + } + windowFrame := windowFn.Frame + switch windowFrame.Mode { + case + execinfrapb.WindowerSpec_Frame_GROUPS, + execinfrapb.WindowerSpec_Frame_RANGE: + if windowFrame.Bounds.Start.BoundType != execinfrapb.WindowerSpec_Frame_UNBOUNDED_PRECEDING || + windowFrame.Bounds.End.BoundType != execinfrapb.WindowerSpec_Frame_UNBOUNDED_FOLLOWING { + return true + } + } + if windowFrame.Exclusion == execinfrapb.WindowerSpec_Frame_EXCLUDE_GROUP || + windowFrame.Exclusion == execinfrapb.WindowerSpec_Frame_EXCLUDE_TIES { + return true + } + return false default: colexecerror.InternalError(errors.AssertionFailedf("window function %s is not supported", windowFn.String())) // This code is unreachable, but the compiler cannot infer that. @@ -83,6 +117,23 @@ func WindowFnArgNeedsCast( return !types.Int.Identical(provided), types.Int } colexecerror.InternalError(errors.AssertionFailedf("lag and lead expect between one and three arguments")) + case + execinfrapb.WindowerSpec_FIRST_VALUE, + execinfrapb.WindowerSpec_LAST_VALUE: + if idx > 0 { + colexecerror.InternalError(errors.AssertionFailedf("first_value and last_value expect exactly one argument")) + } + // These window functions can take any argument type. + return false, provided + case execinfrapb.WindowerSpec_NTH_VALUE: + // The first argument can be any type, but the second must be an integer. + if idx > 1 { + colexecerror.InternalError(errors.AssertionFailedf("nth_value expects exactly two arguments")) + } + if idx == 0 { + return false, provided + } + return !types.Int.Identical(provided), types.Int case execinfrapb.WindowerSpec_ROW_NUMBER, execinfrapb.WindowerSpec_RANK, @@ -97,3 +148,93 @@ func WindowFnArgNeedsCast( // This code is unreachable, but the compiler cannot infer that. return false, nil } + +// NormalizeWindowFrame returns a frame that is identical to the given one +// except that the default values have been explicitly set (where before they +// may have been nil). +func NormalizeWindowFrame(frame *execinfrapb.WindowerSpec_Frame) *execinfrapb.WindowerSpec_Frame { + if frame == nil { + // The default window frame: + // RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE NO ROWS + return &execinfrapb.WindowerSpec_Frame{ + Mode: execinfrapb.WindowerSpec_Frame_RANGE, + Bounds: execinfrapb.WindowerSpec_Frame_Bounds{ + Start: execinfrapb.WindowerSpec_Frame_Bound{ + BoundType: execinfrapb.WindowerSpec_Frame_UNBOUNDED_PRECEDING, + }, + End: &execinfrapb.WindowerSpec_Frame_Bound{ + BoundType: execinfrapb.WindowerSpec_Frame_CURRENT_ROW, + }, + }, + Exclusion: execinfrapb.WindowerSpec_Frame_NO_EXCLUSION, + } + } + if frame.Bounds.End == nil { + // The default end bound is CURRENT ROW. + return &execinfrapb.WindowerSpec_Frame{ + Mode: frame.Mode, + Bounds: execinfrapb.WindowerSpec_Frame_Bounds{ + Start: frame.Bounds.Start, + End: &execinfrapb.WindowerSpec_Frame_Bound{ + BoundType: execinfrapb.WindowerSpec_Frame_CURRENT_ROW, + }, + }, + Exclusion: frame.Exclusion, + } + } + return frame +} + +// EncodeWindowFrameOffset returns the given datum offset encoded as bytes, for +// use in testing window functions in RANGE mode with offsets. +func EncodeWindowFrameOffset(t *testing.T, offset tree.Datum) []byte { + var encoded, scratch []byte + encoded, err := rowenc.EncodeTableValue( + encoded, descpb.ColumnID(encoding.NoColumnID), offset, scratch) + require.NoError(t, err) + return encoded +} + +// MakeRandWindowFrameRangeOffset returns a valid offset of the given type for +// use in testing window functions in RANGE mode with offsets. +func MakeRandWindowFrameRangeOffset(t *testing.T, rng *rand.Rand, typ *types.T) tree.Datum { + isNegative := func(val tree.Datum) bool { + switch datumTyp := val.(type) { + case *tree.DInt: + return int64(*datumTyp) < 0 + case *tree.DFloat: + return float64(*datumTyp) < 0 + case *tree.DDecimal: + return datumTyp.Negative + case *tree.DInterval: + return false + default: + t.Errorf("unexpected error: %v", errors.AssertionFailedf("unsupported datum: %v", datumTyp)) + return false + } + } + + for { + val := randgen.RandDatumSimple(rng, typ) + if isNegative(val) { + // Offsets must be non-null and non-negative. + continue + } + return val + } +} + +// GetOffsetTypeFromOrderColType returns the correct offset type for the given +// order column type for a window frame in RANGE mode with offsets. For numeric +// columns, the order and offset types are the same. For datetime columns, +// offsets are of type interval. GetOffsetTypeFromOrderColType is intended for +// use in testing window functions. +func GetOffsetTypeFromOrderColType(t *testing.T, orderColType *types.T) *types.T { + if !types.IsAdditiveType(orderColType) { + t.Errorf("unexpected order column type: %v", orderColType) + } + if types.IsDateTimeType(orderColType) { + return types.Interval + } + return orderColType +} diff --git a/pkg/sql/colexec/execgen/cmd/execgen/BUILD.bazel b/pkg/sql/colexec/execgen/cmd/execgen/BUILD.bazel index 69c5b1860235..9a6ca3d63c16 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/BUILD.bazel +++ b/pkg/sql/colexec/execgen/cmd/execgen/BUILD.bazel @@ -21,6 +21,7 @@ go_library( "default_cmp_proj_ops_gen.go", "default_cmp_sel_ops_gen.go", "distinct_gen.go", + "first_last_nth_value_gen.go", "hash_aggregator_gen.go", "hash_utils_gen.go", "hashjoiner_gen.go", diff --git a/pkg/sql/colexec/execgen/cmd/execgen/first_last_nth_value_gen.go b/pkg/sql/colexec/execgen/cmd/execgen/first_last_nth_value_gen.go new file mode 100644 index 000000000000..d4a41b814ede --- /dev/null +++ b/pkg/sql/colexec/execgen/cmd/execgen/first_last_nth_value_gen.go @@ -0,0 +1,63 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package main + +import ( + "io" + "strings" + "text/template" + + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" +) + +const firstLastNthTmpl = "pkg/sql/colexec/colexecwindow/first_last_nth_value_tmpl.go" + +func init() { + firstLastNthValueOpsGenerator := func(lowerCaseName, upperCaseName string) generator { + return func(inputFileContents string, wr io.Writer) error { + r := strings.NewReplacer( + "_CANONICAL_TYPE_FAMILY", "{{.CanonicalTypeFamilyStr}}", + "_TYPE_WIDTH", typeWidthReplacement, + "_GOTYPESLICE", "{{.GoTypeSliceName}}", + "_TYPE", "{{.VecMethod}}", + "TemplateType", "{{.VecMethod}}", + ".IsFirstValue", `eq "_OP_NAME" "firstValue"`, + ".IsLastValue", `eq "_OP_NAME" "lastValue"`, + ".IsNthValue", `eq "_OP_NAME" "nthValue"`, + ) + s := r.Replace(inputFileContents) + + r = strings.NewReplacer( + "_OP_NAME", lowerCaseName, + "_UPPERCASE_NAME", upperCaseName, + ) + s = r.Replace(s) + + // Now, generate the op, from the template. + tmpl, err := template.New(lowerCaseName + "Op").Funcs(template.FuncMap{"buildDict": buildDict}).Parse(s) + if err != nil { + return err + } + + return tmpl.Execute(wr, sameTypeComparisonOpToOverloads[tree.EQ]) + } + } + + registerGenerator( + firstLastNthValueOpsGenerator("firstValue", "FirstValue"), + "first_value.eg.go", firstLastNthTmpl) + registerGenerator( + firstLastNthValueOpsGenerator("lastValue", "LastValue"), + "last_value.eg.go", firstLastNthTmpl) + registerGenerator( + firstLastNthValueOpsGenerator("nthValue", "NthValue"), + "nth_value.eg.go", firstLastNthTmpl) +} diff --git a/pkg/sql/colexec/execgen/cmd/execgen/overloads_bin.go b/pkg/sql/colexec/execgen/cmd/execgen/overloads_bin.go index 59dcf909c7b7..040e1a7c6249 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/overloads_bin.go +++ b/pkg/sql/colexec/execgen/cmd/execgen/overloads_bin.go @@ -803,15 +803,25 @@ if _path == nil { } } +// timestampRangeCheck should be added at the end of operations that modify and +// return timestamps in order to ensure that the vectorized engine returns the +// same errors as the row engine. The range check expects the timestamp to be +// stored in a local variable named 't_res'. +const timestampRangeCheck = ` +rounded_res := t_res.Round(time.Microsecond) +if rounded_res.After(tree.MaxSupportedTime) || rounded_res.Before(tree.MinSupportedTime) { + colexecerror.ExpectedError(errors.Newf("timestamp %q exceeds supported timestamp bounds", t_res.Format(time.RFC3339))) +}` + func (c timestampIntervalCustomizer) getBinOpAssignFunc() assignFunc { return func(op *lastArgWidthOverload, targetElem, leftElem, rightElem, targetCol, leftCol, rightCol string) string { switch op.overloadBase.BinOp { case tree.Plus: - return fmt.Sprintf(`%[1]s = duration.Add(%[2]s, %[3]s)`, - targetElem, leftElem, rightElem) + return fmt.Sprintf(`t_res := duration.Add(%[1]s, %[2]s)`, + leftElem, rightElem) + timestampRangeCheck + fmt.Sprintf("\n%s = t_res", targetElem) case tree.Minus: - return fmt.Sprintf(`%[1]s = duration.Add(%[2]s, %[3]s.Mul(-1))`, - targetElem, leftElem, rightElem) + return fmt.Sprintf(`t_res := duration.Add(%[1]s, %[2]s.Mul(-1))`, + leftElem, rightElem) + timestampRangeCheck + fmt.Sprintf("\n%s = t_res", targetElem) default: colexecerror.InternalError(errors.AssertionFailedf("unhandled binary operator %s", op.overloadBase.BinOp.String())) } @@ -824,8 +834,8 @@ func (c intervalTimestampCustomizer) getBinOpAssignFunc() assignFunc { return func(op *lastArgWidthOverload, targetElem, leftElem, rightElem, targetCol, leftCol, rightCol string) string { switch op.overloadBase.BinOp { case tree.Plus: - return fmt.Sprintf(`%[1]s = duration.Add(%[3]s, %[2]s)`, - targetElem, leftElem, rightElem) + return fmt.Sprintf(`t_res := duration.Add(%[2]s, %[1]s)`, + leftElem, rightElem) + timestampRangeCheck + fmt.Sprintf("\n%s = t_res", targetElem) default: colexecerror.InternalError(errors.AssertionFailedf("unhandled binary operator %s", op.overloadBase.BinOp.String())) } diff --git a/pkg/sql/colexec/execgen/cmd/execgen/range_offset_handler_gen.go b/pkg/sql/colexec/execgen/cmd/execgen/range_offset_handler_gen.go index a3880f4d199e..3cc1198e1bff 100644 --- a/pkg/sql/colexec/execgen/cmd/execgen/range_offset_handler_gen.go +++ b/pkg/sql/colexec/execgen/cmd/execgen/range_offset_handler_gen.go @@ -303,23 +303,28 @@ func typeName(typeFamily types.Family, typeWidth int32) string { return toVecMethod(typeconv.TypeFamilyToCanonicalTypeFamily(typeFamily), typeWidth) } -func dateAssignFunc( - op *lastArgWidthOverload, targetElem, leftElem, rightElem, targetCol, leftCol, rightCol string, -) string { - // Date rows are stored as int64s representing the number of days since the - // unix epoch. We have to convert to timestamps before executing the binary - // operator. - const castVarName = "t" - castStr := fmt.Sprintf(` - d, err := pgdate.MakeDateFromUnixEpoch(%s) +// This format string should be used with the left operand of the binary +// operation as the first argument, and the desired result variable name as the +// second argument. +const dateToTimeCastStr = ` + d_casted, err := pgdate.MakeDateFromUnixEpoch(%s) if err != nil { colexecerror.ExpectedError(err) } - %s, err := d.ToTime() + %s, err := d_casted.ToTime() if err != nil { colexecerror.ExpectedError(err) } - `, leftElem, castVarName) +` + +func dateAssignFunc( + op *lastArgWidthOverload, targetElem, leftElem, rightElem, targetCol, leftCol, rightCol string, +) string { + // Date rows are stored as int64s representing the number of days since the + // unix epoch. We have to convert to timestamps before executing the binary + // operator. + const castVarName = "t_casted" + castStr := fmt.Sprintf(dateToTimeCastStr, leftElem, castVarName) var o timestampIntervalCustomizer return castStr + o.getBinOpAssignFunc()( op, targetElem, castVarName, rightElem, targetCol, leftCol, rightCol) @@ -329,17 +334,8 @@ func dateCmpFunc(targetElem, leftElem, rightElem, leftCol, rightCol string) stri // Date rows are stored as int64s representing the number of days since the // unix epoch. We have to convert to timestamps before executing the // comparison operator. - const castVarName = "t" - castStr := fmt.Sprintf(` - d, err := pgdate.MakeDateFromUnixEpoch(%s) - if err != nil { - colexecerror.InternalError(err) - } - %s, err := d.ToTime() - if err != nil { - colexecerror.InternalError(err) - } - `, leftElem, castVarName) + const castVarName = "t_casted" + castStr := fmt.Sprintf(dateToTimeCastStr, leftElem, castVarName) var o timestampCustomizer return castStr + o.getCmpOpCompareFunc()(targetElem, castVarName, rightElem, leftCol, rightCol) } diff --git a/pkg/sql/distsql/columnar_operators_test.go b/pkg/sql/distsql/columnar_operators_test.go index a808ba26ac21..678a43599d6f 100644 --- a/pkg/sql/distsql/columnar_operators_test.go +++ b/pkg/sql/distsql/columnar_operators_test.go @@ -1092,17 +1092,21 @@ func TestWindowFunctionsAgainstProcessor(t *testing.T) { typs[i] = types.Int } for windowFn := range colexecwindow.SupportedWindowFns { - useRandomTypes := rand.Float64() < randTypesProbability var argTypes []*types.T + useRandomTypes := rand.Float64() < randTypesProbability + randArgType := types.Int + if useRandomTypes { + randArgType = generateRandomSupportedTypes(rng, 1 /* nCols */)[0] + } switch windowFn { case execinfrapb.WindowerSpec_NTILE: argTypes = []*types.T{types.Int} case execinfrapb.WindowerSpec_LAG, execinfrapb.WindowerSpec_LEAD: - argType := types.Int - if useRandomTypes { - argType = generateRandomSupportedTypes(rng, 1 /* nCols */)[0] - } - argTypes = []*types.T{argType, types.Int, argType} + argTypes = []*types.T{randArgType, types.Int, randArgType} + case execinfrapb.WindowerSpec_FIRST_VALUE, execinfrapb.WindowerSpec_LAST_VALUE: + argTypes = []*types.T{randArgType} + case execinfrapb.WindowerSpec_NTH_VALUE: + argTypes = []*types.T{randArgType, types.Int} } for _, partitionBy := range [][]uint32{ {}, // No PARTITION BY clause. @@ -1129,11 +1133,7 @@ func TestWindowFunctionsAgainstProcessor(t *testing.T) { argsIdxs = append(argsIdxs, uint32(nCols+i)) } - if useRandomTypes && - (windowFn == execinfrapb.WindowerSpec_LAG || - windowFn == execinfrapb.WindowerSpec_LEAD) { - // Only lag and lead take non-integer arguments. In addition, ntile - // will error if given a non-positive argument. + if useRandomTypes { rows = randgen.RandEncDatumRowsOfTypes(rng, nRows, inputTypes) } else { rows = randgen.MakeRandIntRowsInRange(rng, nRows, len(inputTypes), maxNum, nullProbability) @@ -1142,7 +1142,10 @@ func TestWindowFunctionsAgainstProcessor(t *testing.T) { if windowFn == execinfrapb.WindowerSpec_ROW_NUMBER || windowFn == execinfrapb.WindowerSpec_NTILE || windowFn == execinfrapb.WindowerSpec_LAG || - windowFn == execinfrapb.WindowerSpec_LEAD { + windowFn == execinfrapb.WindowerSpec_LEAD || + windowFn == execinfrapb.WindowerSpec_FIRST_VALUE || + windowFn == execinfrapb.WindowerSpec_LAST_VALUE || + windowFn == execinfrapb.WindowerSpec_NTH_VALUE { // The outputs of these window functions are not deterministic if // there are columns that are not present in either PARTITION BY or // ORDER BY clauses, so we require that all non-partitioning columns @@ -1150,18 +1153,20 @@ func TestWindowFunctionsAgainstProcessor(t *testing.T) { nOrderingCols = len(inputTypes) } + ordering := generateOrderingGivenPartitionBy(rng, len(inputTypes), nOrderingCols, partitionBy) windowerSpec := &execinfrapb.WindowerSpec{ PartitionBy: partitionBy, WindowFns: []execinfrapb.WindowerSpec_WindowFn{ { Func: execinfrapb.WindowerSpec_Func{WindowFunc: &windowFn}, ArgsIdxs: argsIdxs, - Ordering: generateOrderingGivenPartitionBy(rng, len(inputTypes), nOrderingCols, partitionBy), + Ordering: ordering, OutputColIdx: uint32(len(inputTypes)), FilterColIdx: tree.NoColumnIdx, }, }, } + windowerSpec.WindowFns[0].Frame = generateWindowFrame(t, rng, &ordering, inputTypes) _, outputType, err := execinfrapb.GetWindowFunctionInfo(execinfrapb.WindowerSpec_Func{WindowFunc: &windowFn}, argTypes...) require.NoError(t, err) @@ -1177,6 +1182,12 @@ func TestWindowFunctionsAgainstProcessor(t *testing.T) { pspec: pspec, } if err := verifyColOperator(t, args); err != nil { + if strings.Contains(err.Error(), "different errors returned") { + // Columnar and row-based windowers are likely to hit + // different errors, and we will swallow those and move + // on. + continue + } fmt.Printf("window function: %s\n", windowFn.String()) fmt.Printf("seed = %d\n", seed) prettyPrintTypes(inputTypes, "t" /* tableName */) @@ -1278,6 +1289,97 @@ func generateOrderingGivenPartitionBy( return ordering } +func generateWindowFrame( + t *testing.T, rng *rand.Rand, ordering *execinfrapb.Ordering, inputTypes []*types.T, +) *execinfrapb.WindowerSpec_Frame { + var modes = []execinfrapb.WindowerSpec_Frame_Mode{ + execinfrapb.WindowerSpec_Frame_RANGE, + execinfrapb.WindowerSpec_Frame_ROWS, + execinfrapb.WindowerSpec_Frame_GROUPS, + } + var boundTypes = []execinfrapb.WindowerSpec_Frame_BoundType{ + execinfrapb.WindowerSpec_Frame_UNBOUNDED_PRECEDING, + execinfrapb.WindowerSpec_Frame_OFFSET_PRECEDING, + execinfrapb.WindowerSpec_Frame_CURRENT_ROW, + execinfrapb.WindowerSpec_Frame_OFFSET_FOLLOWING, + execinfrapb.WindowerSpec_Frame_UNBOUNDED_FOLLOWING, + } + var exclusionTypes = []execinfrapb.WindowerSpec_Frame_Exclusion{ + execinfrapb.WindowerSpec_Frame_NO_EXCLUSION, + execinfrapb.WindowerSpec_Frame_EXCLUDE_CURRENT_ROW, + execinfrapb.WindowerSpec_Frame_EXCLUDE_GROUP, + execinfrapb.WindowerSpec_Frame_EXCLUDE_TIES, + } + + mode := modes[rng.Intn(len(modes))] + + // Ensure that start and end bound types are syntactically valid. + startBoundIdx := rng.Intn(len(boundTypes) - 1) + var endBoundIdx int + for { + endBoundIdx = rng.Intn(len(boundTypes)-1) + 1 + if endBoundIdx >= startBoundIdx { + break + } + } + startBoundType := boundTypes[startBoundIdx] + endBoundType := boundTypes[endBoundIdx] + + windowFrameBoundIsOffset := func(boundType execinfrapb.WindowerSpec_Frame_BoundType) bool { + return boundType == execinfrapb.WindowerSpec_Frame_OFFSET_PRECEDING || + boundType == execinfrapb.WindowerSpec_Frame_OFFSET_FOLLOWING + } + + if mode == execinfrapb.WindowerSpec_Frame_RANGE && + (len(ordering.Columns) != 1 || !types.IsAdditiveType(inputTypes[ordering.Columns[0].ColIdx])) { + // RANGE mode with OFFSET PRECEDING or OFFSET FOLLOWING requires there to + // be exactly one ordering column that is numeric or datetime. + if windowFrameBoundIsOffset(startBoundType) { + startBoundType = execinfrapb.WindowerSpec_Frame_UNBOUNDED_PRECEDING + } + if windowFrameBoundIsOffset(endBoundType) { + endBoundType = execinfrapb.WindowerSpec_Frame_UNBOUNDED_FOLLOWING + } + } + + exclusion := exclusionTypes[rng.Intn(len(exclusionTypes))] + + frame := &execinfrapb.WindowerSpec_Frame{ + Mode: mode, + Bounds: execinfrapb.WindowerSpec_Frame_Bounds{ + Start: execinfrapb.WindowerSpec_Frame_Bound{BoundType: startBoundType}, + End: &execinfrapb.WindowerSpec_Frame_Bound{BoundType: endBoundType}, + }, + Exclusion: exclusion, + } + + const maxUInt64Offset = 10 + if windowFrameBoundIsOffset(startBoundType) || windowFrameBoundIsOffset(endBoundType) { + if frame.Mode == execinfrapb.WindowerSpec_Frame_ROWS || + frame.Mode == execinfrapb.WindowerSpec_Frame_GROUPS { + frame.Bounds.Start.IntOffset = rng.Uint64() % maxUInt64Offset + frame.Bounds.End.IntOffset = rng.Uint64() % maxUInt64Offset + } else { + // We can assume that there is exactly one ordering column of an additive + // type, since we checked above. + colIdx := ordering.Columns[0].ColIdx + colEncoding := descpb.DatumEncoding_ASCENDING_KEY + if ordering.Columns[0].Direction == execinfrapb.Ordering_Column_DESC { + colEncoding = descpb.DatumEncoding_DESCENDING_KEY + } + offsetType := colexecwindow.GetOffsetTypeFromOrderColType(t, inputTypes[colIdx]) + startOffset := colexecwindow.MakeRandWindowFrameRangeOffset(t, rng, offsetType) + endOffset := colexecwindow.MakeRandWindowFrameRangeOffset(t, rng, offsetType) + frame.Bounds.Start.TypedOffset = colexecwindow.EncodeWindowFrameOffset(t, startOffset) + frame.Bounds.End.TypedOffset = colexecwindow.EncodeWindowFrameOffset(t, endOffset) + frame.Bounds.Start.OffsetType = execinfrapb.DatumInfo{Encoding: colEncoding, Type: offsetType} + frame.Bounds.End.OffsetType = execinfrapb.DatumInfo{Encoding: colEncoding, Type: offsetType} + } + } + + return frame +} + // prettyPrintTypes prints out typs as a CREATE TABLE statement. func prettyPrintTypes(typs []*types.T, tableName string) { fmt.Printf("CREATE TABLE %s(", tableName) diff --git a/pkg/sql/execinfrapb/processors_sql.pb.go b/pkg/sql/execinfrapb/processors_sql.pb.go index 665f04cc73c9..6b1f18893486 100644 --- a/pkg/sql/execinfrapb/processors_sql.pb.go +++ b/pkg/sql/execinfrapb/processors_sql.pb.go @@ -845,12 +845,13 @@ type JoinReaderSpec struct { // more complicated than a simple equality between input columns and index // columns. In this case, LookupExpr specifies the expression that will be // used to construct the spans for each lookup. Currently, the only - // expressions supported are conjunctions (AND expressions) of equality and - // IN expressions, specifically: + // expressions supported are conjunctions (AND expressions) of equality, IN + // expressions, and simple inequalities, specifically: // 1. equalities between two variables (one from the input and one from the // index) representing the equi-join condition(s), // 2. equalities between an index column and a constant, and // 3. IN expressions between an index column and a tuple of constants. + // 4. LT,GT,GE,LE between an index var and a constant. // // Variables in this expression are assigned in the same way as the ON // condition below. Assuming that the left stream has N columns and the right diff --git a/pkg/sql/execinfrapb/processors_sql.proto b/pkg/sql/execinfrapb/processors_sql.proto index c6faafb2ca8d..e348c922bd31 100644 --- a/pkg/sql/execinfrapb/processors_sql.proto +++ b/pkg/sql/execinfrapb/processors_sql.proto @@ -280,12 +280,13 @@ message JoinReaderSpec { // more complicated than a simple equality between input columns and index // columns. In this case, LookupExpr specifies the expression that will be // used to construct the spans for each lookup. Currently, the only - // expressions supported are conjunctions (AND expressions) of equality and - // IN expressions, specifically: + // expressions supported are conjunctions (AND expressions) of equality, IN + // expressions, and simple inequalities, specifically: // 1. equalities between two variables (one from the input and one from the // index) representing the equi-join condition(s), // 2. equalities between an index column and a constant, and // 3. IN expressions between an index column and a tuple of constants. + // 4. LT,GT,GE,LE between an index var and a constant. // // Variables in this expression are assigned in the same way as the ON // condition below. Assuming that the left stream has N columns and the right diff --git a/pkg/sql/logictest/testdata/logic_test/lookup_join_spans b/pkg/sql/logictest/testdata/logic_test/lookup_join_spans new file mode 100644 index 000000000000..6012d510b17e --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/lookup_join_spans @@ -0,0 +1,483 @@ +statement ok +CREATE TABLE metrics ( + id SERIAL PRIMARY KEY, + nullable INT, + name STRING, + INDEX name_index (name) +) + +statement ok +insert into metrics (id,nullable,name) values (1,NULL,'cpu'), (2,1,'cpu'), (3,NULL,'mem'), (4,2,'disk') + +statement ok +CREATE TABLE metric_values ( + metric_id INT8, + time TIMESTAMPTZ, + nullable INT, + value INT8, + PRIMARY KEY (metric_id, time), + INDEX secondary (metric_id, nullable, time) +) + +statement ok +insert into metric_values (metric_id, time, nullable, value) values + (1,'2020-01-01 00:00:00+00:00',NULL,0), + (1,'2020-01-01 00:00:01+00:00',1,1), + (2,'2020-01-01 00:00:00+00:00',NULL,2), + (2,'2020-01-01 00:00:01+00:00',2,3), + (2,'2020-01-01 00:01:01+00:00',-11,4), + (2,'2020-01-01 00:01:02+00:00',-10,5), + (3,'2020-01-01 00:01:00+00:00',NULL,6), + (3,'2020-01-01 00:01:01+00:00',3,7) + +# metric_values_desc is a descending time version of metric_values. +statement ok +CREATE TABLE metric_values_desc ( + metric_id INT8, + time TIMESTAMPTZ, + nullable INT, + value INT8, + PRIMARY KEY (metric_id, time DESC), + INDEX secondary (metric_id, nullable DESC, time DESC) +) + +statement ok +insert into metric_values_desc select * from metric_values + +# The final statements below need some stats to chose the lookup join. +statement ok +ALTER TABLE metric_values INJECT STATISTICS +'[ + { + "columns": ["metric_id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 10 + }, + { + "columns": ["time"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + }, + { + "columns": ["nullable"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 10, + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "-10"}, + {"num_eq": 0, "num_range": 1000, "distinct_range": 10, "upper_bound": "0"} + ], + "histo_col_type": "INT" + }, + { + "columns": ["value"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + } +]' + +statement ok +ALTER TABLE metrics INJECT STATISTICS +'[ + { + "columns": ["id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + }, + { + "columns": ["nullable"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + }, + { + "columns": ["name"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + } +]' + +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + time > '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time > '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time >= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time >= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:00+00:00' AND + name='cpu' +---- + +query ITIIIIT +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:00+00:00' AND + name='cpu' +---- + +query ITIIIIT +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time <= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time <= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +query ITIIIIT +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:00 +0000 UTC NULL 0 1 NULL cpu +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:00 +0000 UTC NULL 2 2 1 cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +# Test lookup conditions w/ a left join. +query IITITII +SELECT * +FROM metrics +LEFT JOIN metric_values +ON metric_id=id +AND time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' +AND name='cpu' +ORDER BY value, id +---- +3 NULL mem NULL NULL NULL NULL +4 2 disk NULL NULL NULL NULL +1 NULL cpu 1 2020-01-01 00:00:00 +0000 UTC NULL 0 +1 NULL cpu 1 2020-01-01 00:00:01 +0000 UTC 1 1 +2 1 cpu 2 2020-01-01 00:00:00 +0000 UTC NULL 2 +2 1 cpu 2 2020-01-01 00:00:01 +0000 UTC 2 3 +2 1 cpu 2 2020-01-01 00:01:01 +0000 UTC -11 4 +2 1 cpu 2 2020-01-01 00:01:02 +0000 UTC -10 5 + +# Test lookup conditions w/ a semi join. +query IIT +SELECT * +FROM metrics m +WHERE EXISTS (SELECT * FROM metric_values mv WHERE mv.metric_id = m.id AND time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00') +ORDER BY m.id +---- +1 NULL cpu +2 1 cpu +3 NULL mem + +# Test NULL values in pre-join where conditions. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +AND v.nullable = m.nullable +WHERE + time > '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- + +# Test NULL values in bounded lookup span. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable BETWEEN -20 AND -10 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +# Test NULL values in bounded lookup span (descending). +query ITIIIIT +SELECT * +FROM metric_values_desc as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable BETWEEN -20 AND -10 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +# Test NULL values in > unbounded lookup span. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable > 1 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu + +# Test NULL values in > unbounded lookup span (descending). +query ITIIIIT +SELECT * +FROM metric_values_desc as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable > 1 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu + +# Test NULL values in >= unbounded lookup span. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable >= 1 AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu + +# Test NULL values in >= unbounded lookup span (descending). +query ITIIIIT +SELECT * +FROM metric_values_desc as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable >= 1 AND + name='cpu' +ORDER BY value +---- +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu +2 2020-01-01 00:00:01 +0000 UTC 2 3 2 1 cpu + +# Test NULL values in < unbounded lookup span. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable < -10 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu + +# Test NULL values in < unbounded lookup span (descending). +query ITIIIIT +SELECT * +FROM metric_values_desc as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable < -10 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu + +# Test NULL values in <= unbounded lookup span. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable <= -10 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +# Test NULL values in <= unbounded lookup span (descending). +query ITIIIIT +SELECT * +FROM metric_values_desc as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable <= -10 AND + name='cpu' +ORDER BY value +---- +2 2020-01-01 00:01:01 +0000 UTC -11 4 2 1 cpu +2 2020-01-01 00:01:02 +0000 UTC -10 5 2 1 cpu + +# Test NULL values in WHERE equality conditions. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' AND + v.nullable = m.nullable +ORDER BY value +---- + +# Test NULL values in simple equality condition. +query ITIIIIT +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' AND + v.nullable = 1 +ORDER BY value +---- +1 2020-01-01 00:00:01 +0000 UTC 1 1 1 NULL cpu diff --git a/pkg/sql/logictest/testdata/logic_test/vectorize_window b/pkg/sql/logictest/testdata/logic_test/vectorize_window new file mode 100644 index 000000000000..4cbcb2db3761 --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/vectorize_window @@ -0,0 +1,247 @@ +# LogicTest: local fakedist fakedist-disk 3node-tenant + +# Ensure that all these queries can be executed in the vectorized engine. +statement ok +SET vectorize = experimental_always + +statement ok +CREATE TABLE t (a INT, b INT, c INT PRIMARY KEY) + +statement ok +INSERT INTO t VALUES + (0, 1, 0), + (1, 1, 1), + (0, 2, 2), + (1, 2, 3) + +# We sort the output on all queries with row_number window function to get +# deterministic results. +query III +SELECT a, b, row_number() OVER (ORDER BY a, b) FROM t ORDER BY a, b +---- +0 1 1 +0 2 2 +1 1 3 +1 2 4 + +query III +SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM t ORDER BY a, b +---- +0 1 1 +0 2 2 +1 1 1 +1 2 2 + +query III +SELECT a, b, row_number() OVER (PARTITION BY a, b) FROM t ORDER BY a, b +---- +0 1 1 +0 2 1 +1 1 1 +1 2 1 + +query III rowsort +SELECT a, b, rank() OVER () FROM t +---- +0 1 1 +0 2 1 +1 1 1 +1 2 1 + +query III rowsort +SELECT a, b, rank() OVER (ORDER BY a) FROM t +---- +0 1 1 +0 2 1 +1 1 3 +1 2 3 + +query IIII rowsort +SELECT a, b, c, rank() OVER (PARTITION BY a ORDER BY c) FROM t +---- +0 1 0 1 +0 2 2 2 +1 1 1 1 +1 2 3 2 + +query III rowsort +SELECT a, b, dense_rank() OVER () FROM t +---- +0 1 1 +0 2 1 +1 1 1 +1 2 1 + +query III rowsort +SELECT a, b, dense_rank() OVER (ORDER BY a) FROM t +---- +0 1 1 +0 2 1 +1 1 2 +1 2 2 + +query IIII rowsort +SELECT a, b, c, dense_rank() OVER (PARTITION BY a ORDER BY c) FROM t +---- +0 1 0 1 +0 2 2 2 +1 1 1 1 +1 2 3 2 + +query IIIIRR rowsort +SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS () +---- +0 1 1 1 0 1 +1 1 1 1 0 1 +0 2 1 1 0 1 +1 2 1 1 0 1 + +query IIIIRR rowsort +SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS (PARTITION BY a) +---- +0 1 1 1 0 1 +0 2 1 1 0 1 +1 1 1 1 0 1 +1 2 1 1 0 1 + + +query IIIIRR rowsort +SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS (ORDER BY a) +---- +0 1 1 1 0 0.5 +0 2 1 1 0 0.5 +1 1 3 2 0.666666666666667 1 +1 2 3 2 0.666666666666667 1 + +query IIIIRR rowsort +SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS (PARTITION BY a ORDER BY b) +---- +0 1 1 1 0 0.5 +0 2 2 2 1 1 +1 1 1 1 0 0.5 +1 2 2 2 1 1 + +query IIR rowsort +SELECT a, b, percent_rank() OVER () FROM t +---- +0 1 0 +1 1 0 +0 2 0 +1 2 0 + +query IIR rowsort +SELECT a, b, percent_rank() OVER (ORDER BY a) FROM t +---- +0 1 0 +0 2 0 +1 1 0.666666666666667 +1 2 0.666666666666667 + +query IIIR rowsort +SELECT a, b, c, percent_rank() OVER (PARTITION BY a ORDER BY c) FROM t +---- +0 1 0 0 +0 2 2 1 +1 1 1 0 +1 2 3 1 + +query IIR rowsort +SELECT a, b, cume_dist() OVER () FROM t +---- +0 1 1 +0 2 1 +1 1 1 +1 2 1 + +query IIR rowsort +SELECT a, b, cume_dist() OVER (ORDER BY a) FROM t +---- +0 1 0.5 +0 2 0.5 +1 1 1 +1 2 1 + +query IIIR rowsort +SELECT a, b, c, cume_dist() OVER (PARTITION BY a ORDER BY c) FROM t +---- +0 1 0 0.5 +0 2 2 1 +1 1 1 0.5 +1 2 3 1 + +query III rowsort +SELECT a, b, ntile(2) OVER (ORDER BY a, b) FROM t +---- +0 1 1 +0 2 1 +1 1 2 +1 2 2 + +query IIII rowsort +SELECT a, b, c, ntile(2) OVER (PARTITION BY a ORDER BY c) FROM t +---- +0 1 0 1 +0 2 2 2 +1 1 1 1 +1 2 3 2 + +query IIII rowsort +SELECT a, b, lag(a, b) OVER w, lead(a, b) OVER w FROM t WINDOW w AS (ORDER BY a, b) +---- +0 1 NULL 0 +0 2 NULL 1 +1 1 0 1 +1 2 0 NULL + +query IIIII rowsort +SELECT a, b, c, lag(a, b) OVER w, lead(a, b) OVER w FROM t WINDOW w AS (PARTITION BY a ORDER BY c) +---- +0 1 0 NULL 0 +0 2 2 NULL NULL +1 1 1 NULL 1 +1 2 3 NULL NULL + +query IIIIII rowsort +SELECT a, b, c, first_value(a) OVER w, last_value(a) OVER w, nth_value(a, b) OVER w +FROM t WINDOW w AS (ORDER BY c, b) +---- +0 1 0 0 0 0 +1 1 1 0 1 0 +0 2 2 0 0 1 +1 2 3 0 1 1 + +query IIIIII rowsort +SELECT a, b, c, first_value(a) OVER w, last_value(a) OVER w, nth_value(a, b) OVER w +FROM t WINDOW w AS (PARTITION BY a ORDER BY c) +---- +0 1 0 0 0 0 +0 2 2 0 0 0 +1 1 1 1 1 1 +1 2 3 1 1 1 + +query IIIIII rowsort +SELECT a, b, c, first_value(a) OVER w, last_value(a) OVER w, nth_value(a, b) OVER w +FROM t WINDOW w AS (ORDER BY c, b ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) +---- +0 1 0 0 0 0 +1 1 1 0 1 0 +0 2 2 0 0 1 +1 2 3 0 1 1 + +query IIIIII rowsort +SELECT a, b, c, first_value(a) OVER w, last_value(a) OVER w, nth_value(a, b) OVER w +FROM t WINDOW w AS (ORDER BY c, b GROUPS BETWEEN 5 PRECEDING AND CURRENT ROW) +---- +0 1 0 0 0 0 +1 1 1 0 1 0 +0 2 2 0 0 1 +1 2 3 0 1 1 + +query IIII rowsort +SELECT c, first_value(c) OVER w, last_value(c) OVER w, nth_value(c, 2) OVER w +FROM t WINDOW w AS (ORDER BY c RANGE BETWEEN 5 PRECEDING AND CURRENT ROW) +---- +0 0 0 NULL +1 0 1 1 +2 0 2 1 +3 0 3 1 diff --git a/pkg/sql/logictest/testdata/logic_test/window b/pkg/sql/logictest/testdata/logic_test/window index 0234e1c8b85a..d6ff31d0bda8 100644 --- a/pkg/sql/logictest/testdata/logic_test/window +++ b/pkg/sql/logictest/testdata/logic_test/window @@ -3778,123 +3778,6 @@ SELECT count(*) >= 26 FROM crdb_internal.feature_usage WHERE feature_name LIKE ' ---- true -statement ok -DROP TABLE t; CREATE TABLE t (a INT, b INT, c INT PRIMARY KEY) - -statement ok -INSERT INTO t VALUES - (0, 1, 0), - (1, 1, 1), - (0, 2, 2), - (1, 2, 3) - -# We sort the output on all queries with row_number window function to get -# deterministic results. -query III -SELECT a, b, row_number() OVER (ORDER BY a, b) FROM t ORDER BY a, b ----- -0 1 1 -0 2 2 -1 1 3 -1 2 4 - -query III -SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM t ORDER BY a, b ----- -0 1 1 -0 2 2 -1 1 1 -1 2 2 - -query III -SELECT a, b, row_number() OVER (PARTITION BY a, b) FROM t ORDER BY a, b ----- -0 1 1 -0 2 1 -1 1 1 -1 2 1 - -query III rowsort -SELECT a, b, rank() OVER () FROM t ----- -0 1 1 -0 2 1 -1 1 1 -1 2 1 - -query III rowsort -SELECT a, b, rank() OVER (ORDER BY a) FROM t ----- -0 1 1 -0 2 1 -1 1 3 -1 2 3 - -query IIII rowsort -SELECT a, b, c, rank() OVER (PARTITION BY a ORDER BY c) FROM t ----- -0 1 0 1 -0 2 2 2 -1 1 1 1 -1 2 3 2 - -query III rowsort -SELECT a, b, dense_rank() OVER () FROM t ----- -0 1 1 -0 2 1 -1 1 1 -1 2 1 - -query III rowsort -SELECT a, b, dense_rank() OVER (ORDER BY a) FROM t ----- -0 1 1 -0 2 1 -1 1 2 -1 2 2 - -query IIII rowsort -SELECT a, b, c, dense_rank() OVER (PARTITION BY a ORDER BY c) FROM t ----- -0 1 0 1 -0 2 2 2 -1 1 1 1 -1 2 3 2 - -query IIIIRR rowsort -SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS () ----- -0 1 1 1 0 1 -1 1 1 1 0 1 -0 2 1 1 0 1 -1 2 1 1 0 1 - -query IIIIRR rowsort -SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS (PARTITION BY a) ----- -0 1 1 1 0 1 -0 2 1 1 0 1 -1 1 1 1 0 1 -1 2 1 1 0 1 - - -query IIIIRR rowsort -SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS (ORDER BY a) ----- -0 1 1 1 0 0.5 -0 2 1 1 0 0.5 -1 1 3 2 0.666666666666667 1 -1 2 3 2 0.666666666666667 1 - -query IIIIRR rowsort -SELECT a, b, rank() OVER w, dense_rank() OVER w, percent_rank() OVER w, cume_dist() OVER w FROM t WINDOW w AS (PARTITION BY a ORDER BY b) ----- -0 1 1 1 0 0.5 -0 2 2 2 1 1 -1 1 1 1 0 0.5 -1 2 2 2 1 1 - # Regression test for peer group number computation overflow (#53654). query II rowsort SELECT diff --git a/pkg/sql/opt/exec/execbuilder/testdata/lookup_join b/pkg/sql/opt/exec/execbuilder/testdata/lookup_join index 2b683114a0ba..160f05ce5d77 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/lookup_join +++ b/pkg/sql/opt/exec/execbuilder/testdata/lookup_join @@ -75,8 +75,7 @@ vectorized: true │ columns: (a, b, c, d, e, f) │ estimated row count: 33 │ table: def@primary -│ equality: (b) = (f) -│ pred: e > 1 +│ lookup condition: (f = b) AND (e > 1) │ └── • scan columns: (a, b, c) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/lookup_join_spans b/pkg/sql/opt/exec/execbuilder/testdata/lookup_join_spans new file mode 100644 index 000000000000..5acfa1590fa7 --- /dev/null +++ b/pkg/sql/opt/exec/execbuilder/testdata/lookup_join_spans @@ -0,0 +1,849 @@ +# LogicTest: local + +statement ok +CREATE TABLE metrics ( + id SERIAL PRIMARY KEY, + nullable INT, + name STRING, + INDEX name_index (name) +) + +statement ok +insert into metrics (id,nullable,name) values (1,NULL,'cpu'), (2,1,'cpu'), (3,NULL,'mem'), (4,2,'disk') + +statement ok +CREATE TABLE metric_values ( + metric_id INT8, + time TIMESTAMPTZ, + nullable INT, + value INT8, + PRIMARY KEY (metric_id, time), + INDEX secondary (metric_id, nullable, time) +) + +statement ok +insert into metric_values (metric_id, time, nullable, value) values + (1,'2020-01-01 00:00:00+00:00',NULL,0), + (1,'2020-01-01 00:00:01+00:00',1,1), + (2,'2020-01-01 00:00:00+00:00',NULL,2), + (2,'2020-01-01 00:00:01+00:00',2,3), + (2,'2020-01-01 00:01:01+00:00',-11,4), + (2,'2020-01-01 00:01:02+00:00',-10,5), + (3,'2020-01-01 00:01:00+00:00',NULL,6), + (3,'2020-01-01 00:01:01+00:00',3,7) + +# metric_values_desc is a descending time version of metric_values. +statement ok +CREATE TABLE metric_values_desc ( + metric_id INT8, + time TIMESTAMPTZ, + nullable INT, + value INT8, + PRIMARY KEY (metric_id, time DESC), + INDEX secondary (metric_id, nullable, time DESC) +) + +statement ok +insert into metric_values_desc select * from metric_values + +# The final statements below need some stats to chose the lookup join. +statement ok +ALTER TABLE metric_values INJECT STATISTICS +'[ + { + "columns": ["metric_id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 10 + }, + { + "columns": ["time"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + }, + { + "columns": ["nullable"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 10, + "histo_buckets": [ + {"num_eq": 0, "num_range": 0, "distinct_range": 0, "upper_bound": "-10"}, + {"num_eq": 0, "num_range": 1000, "distinct_range": 10, "upper_bound": "0"} + ], + "histo_col_type": "INT" + }, + { + "columns": ["value"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + } +]' + +statement ok +ALTER TABLE metrics INJECT STATISTICS +'[ + { + "columns": ["id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + }, + { + "columns": ["nullable"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + }, + { + "columns": ["name"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + } +]' + +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + time > '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 33 +│ order: +value +│ +└── • lookup join + │ estimated row count: 33 + │ table: metric_values@primary + │ lookup condition: (metric_id = id) AND ("time" > '2020-01-01 00:00:00+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time > '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ order: +value +│ +└── • lookup join + │ table: metric_values_desc@primary + │ lookup condition: (metric_id = id) AND ("time" > '2020-01-01 00:00:00+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time >= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 33 +│ order: +value +│ +└── • lookup join + │ estimated row count: 33 + │ table: metric_values@primary + │ lookup condition: (metric_id = id) AND ("time" >= '2020-01-01 00:00:00+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time >= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ order: +value +│ +└── • lookup join + │ table: metric_values_desc@primary + │ lookup condition: (metric_id = id) AND ("time" >= '2020-01-01 00:00:00+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:00+00:00' AND + name='cpu' +---- +distribution: local +vectorized: true +· +• lookup join +│ estimated row count: 33 +│ table: metric_values@primary +│ lookup condition: (metric_id = id) AND ("time" < '2020-01-01 00:00:00+00:00') +│ +└── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:00+00:00' AND + name='cpu' +---- +distribution: local +vectorized: true +· +• lookup join +│ table: metric_values_desc@primary +│ lookup condition: (metric_id = id) AND ("time" < '2020-01-01 00:00:00+00:00') +│ +└── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time <= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 33 +│ order: +value +│ +└── • lookup join + │ estimated row count: 33 + │ table: metric_values@primary + │ lookup condition: (metric_id = id) AND ("time" <= '2020-01-01 00:00:00+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time <= '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ order: +value +│ +└── • lookup join + │ table: metric_values_desc@primary + │ lookup condition: (metric_id = id) AND ("time" <= '2020-01-01 00:00:00+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 33 +│ order: +value +│ +└── • lookup join + │ estimated row count: 33 + │ table: metric_values@primary + │ lookup condition: (metric_id = id) AND ("time" < '2020-01-01 00:00:10+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ order: +value +│ +└── • lookup join + │ table: metric_values_desc@primary + │ lookup condition: (metric_id = id) AND ("time" < '2020-01-01 00:00:10+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 11 +│ order: +value +│ +└── • lookup join + │ estimated row count: 11 + │ table: metric_values@primary + │ lookup condition: (metric_id = id) AND (("time" >= '2020-01-01 00:00:00+00:00') AND ("time" <= '2020-01-01 00:10:00+00:00')) + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +query T +EXPLAIN +SELECT * +FROM metric_values_desc +INNER JOIN metrics +ON metric_id=id +WHERE + time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ order: +value +│ +└── • lookup join + │ table: metric_values_desc@primary + │ lookup condition: (metric_id = id) AND (("time" >= '2020-01-01 00:00:00+00:00') AND ("time" <= '2020-01-01 00:10:00+00:00')) + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +# Test lookup conditions w/ a left join. +query T +EXPLAIN +SELECT * +FROM metrics +LEFT JOIN metric_values +ON metric_id=id +AND time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' +AND name='cpu' +ORDER BY value, id +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 11 +│ order: +value,+id +│ +└── • lookup join (left outer) + │ estimated row count: 11 + │ table: metric_values@primary + │ lookup condition: (metric_id = id) AND (("time" >= '2020-01-01 00:00:00+00:00') AND ("time" <= '2020-01-01 00:10:00+00:00')) + │ pred: name = 'cpu' + │ + └── • scan + estimated row count: 10 (100% of the table; stats collected ago) + table: metrics@primary + spans: FULL SCAN + +# Test lookup conditions w/ a semi join. +query T +EXPLAIN +SELECT * +FROM metrics m +WHERE EXISTS (SELECT * FROM metric_values mv WHERE mv.metric_id = m.id AND time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00') +ORDER BY m.id +---- +distribution: local +vectorized: true +· +• lookup join (semi) +│ estimated row count: 10 +│ table: metric_values@primary +│ lookup condition: (metric_id = id) AND (("time" >= '2020-01-01 00:00:00+00:00') AND ("time" <= '2020-01-01 00:10:00+00:00')) +│ +└── • scan + estimated row count: 10 (100% of the table; stats collected ago) + table: metrics@primary + spans: FULL SCAN + +# Test NULL values in pre-join where conditions. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +AND v.nullable = m.nullable +WHERE + time > '2020-01-01 00:00:00+00:00' AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 3 +│ order: +value +│ +└── • lookup join + │ estimated row count: 3 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ estimated row count: 3 + │ table: metric_values@secondary + │ lookup condition: ((metric_id = id) AND (nullable = nullable)) AND ("time" > '2020-01-01 00:00:00+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +# Test NULL values in bounded lookup span. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable BETWEEN -20 AND -10 AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• lookup join +│ estimated row count: 0 +│ table: metrics@primary +│ equality: (id) = (id) +│ equality cols are key +│ +└── • sort + │ estimated row count: 0 + │ order: +value + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@secondary + │ lookup condition: (metric_id = id) AND ((nullable >= -20) AND (nullable <= -10)) + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +# Test NULL values in > unbounded lookup span. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable > 1 AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• lookup join +│ estimated row count: 0 +│ table: metrics@primary +│ equality: (id) = (id) +│ equality cols are key +│ +└── • sort + │ estimated row count: 0 + │ order: +value + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@secondary + │ lookup condition: (metric_id = id) AND (nullable > 1) + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +# Test NULL values in >= unbounded lookup span. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable >= 1 AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• lookup join +│ estimated row count: 0 +│ table: metrics@primary +│ equality: (id) = (id) +│ equality cols are key +│ +└── • sort + │ estimated row count: 0 + │ order: +value + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@secondary + │ lookup condition: (metric_id = id) AND (nullable >= 1) + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + + +# Test NULL values in < unbounded lookup span. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable < -10 AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• lookup join +│ estimated row count: 0 +│ table: metrics@primary +│ equality: (id) = (id) +│ equality cols are key +│ +└── • sort + │ estimated row count: 0 + │ order: +value + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@secondary + │ lookup condition: (metric_id = id) AND (nullable < -10) + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +# Test NULL values in <= unbounded lookup span. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + v.nullable <= -10 AND + name='cpu' +ORDER BY value +---- +distribution: local +vectorized: true +· +• lookup join +│ estimated row count: 0 +│ table: metrics@primary +│ equality: (id) = (id) +│ equality cols are key +│ +└── • sort + │ estimated row count: 0 + │ order: +value + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@secondary + │ lookup condition: (metric_id = id) AND (nullable <= -10) + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + +# Test NULL values in WHERE equality conditions. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' AND + v.nullable = m.nullable +ORDER BY value +---- +distribution: local +vectorized: true +· +• sort +│ estimated row count: 3 +│ order: +value +│ +└── • lookup join + │ estimated row count: 3 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ estimated row count: 3 + │ table: metric_values@secondary + │ lookup condition: ((metric_id = id) AND (nullable = nullable)) AND ("time" < '2020-01-01 00:00:10+00:00') + │ + └── • index join + │ estimated row count: 1 + │ table: metrics@primary + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] + + +# Test NULL values in simple equality condition. +query T +EXPLAIN +SELECT * +FROM metric_values as v +INNER JOIN metrics as m +ON metric_id=id +WHERE + time < '2020-01-01 00:00:10+00:00' AND + name='cpu' AND + v.nullable = 1 +ORDER BY value +---- +distribution: local +vectorized: true +· +• lookup join +│ estimated row count: 0 +│ table: metrics@primary +│ equality: (id) = (id) +│ equality cols are key +│ +└── • sort + │ estimated row count: 0 + │ order: +value + │ + └── • lookup join + │ estimated row count: 0 + │ table: metric_values@primary + │ equality: (metric_id, time) = (metric_id,time) + │ equality cols are key + │ + └── • lookup join + │ table: metric_values@secondary + │ lookup condition: ((metric_id = id) AND (nullable = 1)) AND ("time" < '2020-01-01 00:00:10+00:00') + │ + └── • render + │ estimated row count: 1 + │ + └── • scan + estimated row count: 1 (10% of the table; stats collected ago) + table: metrics@name_index + spans: [/'cpu' - /'cpu'] diff --git a/pkg/sql/opt/memo/testdata/logprops/lookup-join b/pkg/sql/opt/memo/testdata/logprops/lookup-join index 61df384a6fe0..c152872db68c 100644 --- a/pkg/sql/opt/memo/testdata/logprops/lookup-join +++ b/pkg/sql/opt/memo/testdata/logprops/lookup-join @@ -85,16 +85,20 @@ inner-join (lookup abcd) ├── interesting orderings: (+6,+7) ├── inner-join (lookup abcd@secondary) │ ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int!null) abcd.rowid:9(int!null) - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── eq [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ │ ├── variable: a:6 [type=int] + │ │ │ └── variable: m:1 [type=int] + │ │ └── gt [type=bool, outer=(7), constraints=(/7: [/3 - ]; tight)] + │ │ ├── variable: b:7 [type=int] + │ │ └── const: 2 [type=int] │ ├── fd: (9)-->(6,7), (1)==(6), (6)==(1) │ ├── scan small │ │ ├── columns: m:1(int) n:2(int) │ │ ├── prune: (1,2) │ │ └── unfiltered-cols: (1-5) - │ └── filters - │ └── gt [type=bool, outer=(7), constraints=(/7: [/3 - ]; tight)] - │ ├── variable: b:7 [type=int] - │ └── const: 2 [type=int] + │ └── filters (true) └── filters (true) # Filter that can only be applied after the primary index join. diff --git a/pkg/sql/opt/memo/testdata/stats/lookup-join b/pkg/sql/opt/memo/testdata/stats/lookup-join index 189677947c32..2e344e5d008d 100644 --- a/pkg/sql/opt/memo/testdata/stats/lookup-join +++ b/pkg/sql/opt/memo/testdata/stats/lookup-join @@ -81,14 +81,16 @@ inner-join (lookup abcd) ├── fd: (1)==(6), (6)==(1) ├── inner-join (lookup abcd@secondary) │ ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int!null) abcd.rowid:9(int!null) - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── a:6 = m:1 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ └── b:7 > 2 [type=bool, outer=(7), constraints=(/7: [/3 - ]; tight)] │ ├── stats: [rows=33, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0, distinct(7)=33, null(7)=0] │ ├── fd: (9)-->(6,7), (1)==(6), (6)==(1) │ ├── scan small │ │ ├── columns: m:1(int) n:2(int) │ │ └── stats: [rows=10, distinct(1)=10, null(1)=0] - │ └── filters - │ └── b:7 > 2 [type=bool, outer=(7), constraints=(/7: [/3 - ]; tight)] + │ └── filters (true) └── filters (true) # Filter that can only be applied after the primary index join. diff --git a/pkg/sql/opt/xform/BUILD.bazel b/pkg/sql/opt/xform/BUILD.bazel index b4b488a433cb..d8b16139a6fb 100644 --- a/pkg/sql/opt/xform/BUILD.bazel +++ b/pkg/sql/opt/xform/BUILD.bazel @@ -56,6 +56,8 @@ go_test( srcs = [ "coster_test.go", "general_funcs_test.go", + "join_funcs_export_test.go", + "join_funcs_test.go", "join_order_builder_test.go", "main_test.go", "optimizer_test.go", @@ -73,6 +75,7 @@ go_test( "//pkg/security/securitytest", "//pkg/settings/cluster", "//pkg/sql/opt", + "//pkg/sql/opt/constraint", "//pkg/sql/opt/memo", "//pkg/sql/opt/norm", "//pkg/sql/opt/testutils", diff --git a/pkg/sql/opt/xform/coster.go b/pkg/sql/opt/xform/coster.go index 6344ae9c8d33..084d02337e53 100644 --- a/pkg/sql/opt/xform/coster.go +++ b/pkg/sql/opt/xform/coster.go @@ -1474,7 +1474,11 @@ func lookupJoinInputLimitHint(inputRowCount, outputRowCount, outputLimitHint flo func lookupExprCost(join memo.RelExpr) memo.Cost { lookupExpr, ok := join.(*memo.LookupJoinExpr) if ok { - return cpuCostFactor * memo.Cost(len(lookupExpr.LookupExpr)) + // 1.1 is a fudge factor that pushes some plans over the edge when choosing + // between a partial index vs full index plus lookup expr in the + // regional_by_row. + // TODO(treilly): do some empirical analysis and model this better + return cpuCostFactor * memo.Cost(len(lookupExpr.LookupExpr)) * 1.1 } return 0 } diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 43dbf2a3efca..671442c014d1 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" + "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" "github.com/cockroachdb/cockroach/pkg/sql/opt/invertedidx" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/ordering" @@ -297,8 +298,13 @@ func (c *CustomFuncs) GenerateLookupJoins( // join implements logic equivalent to simple equality between // columns (where NULL never equals anything). foundVals, allIdx, ok := c.findJoinFilterConstants(allFilters, idxCol) + var foundRange bool if !ok { - break + // Also allow a limited form of range condition filters. + allIdx, foundRange = c.findJoinFilterRange(allFilters, idxCol) + if !foundRange { + break + } } if len(foundVals) > 1 { @@ -321,6 +327,11 @@ func (c *CustomFuncs) GenerateLookupJoins( } } + if foundRange { + shouldBuildMultiSpanLookupJoin = true + break + } + // We will join these constant values with the input to make // equality columns for the lookup join. if constFilters == nil { @@ -343,11 +354,12 @@ func (c *CustomFuncs) GenerateLookupJoins( } if shouldBuildMultiSpanLookupJoin { - // Some of the index columns were constrained to multiple constant values, - // and we did not use the method constructJoinWithConstants to create a - // cross join as the input (either because it would have been incorrect or - // because it would have eliminated the opportunity to apply other - // optimizations such as locality optimized search; see above). + // Some of the index columns were constrained to multiple constant values + // or a range expression, and we did not use the method + // constructJoinWithConstants to create a cross join as the input (either + // because it would have been incorrect or because it would have + // eliminated the opportunity to apply other optimizations such as + // locality optimized search; see above). // // As an alternative, we store all the filters needed for the lookup in // LookupExpr, which will be used to construct spans at execution time. @@ -566,27 +578,41 @@ func (c *CustomFuncs) findFiltersForIndexLookup( continue } + var foundRange bool // Try to find a filter that constrains this column to non-NULL // constant values. We cannot use a NULL value because the lookup // join implements logic equivalent to simple equality between // columns (where NULL never equals anything). values, allIdx, ok := c.findJoinFilterConstants(filters, idxCol) if !ok { - break + // If there's no const filters look for an inequality range. + allIdx, foundRange = c.findJoinFilterRange(filters, idxCol) + if !foundRange { + break + } } if constFilters == nil { constFilters = make(memo.FiltersExpr, 0, numIndexKeyCols-j) } - // Ensure that the constant filter is either an equality or an IN expression. - // These are the only two types of expressions currently supported by the - // lookupJoiner for building lookup spans. + // Ensure that the constant filter is an equality, IN or inequality + // expression. These are the only types of expressions currently supported + // by the lookupJoiner for building lookup spans. constFilter := filters[allIdx] - if !c.isCanonicalConstFilter(constFilter) { - constFilter = c.makeConstFilter(idxCol, values) + if !c.isCanonicalLookupJoinFilter(constFilter) { + if len(values) > 0 { + constFilter = c.makeConstFilter(idxCol, values) + } else if foundRange { + constFilter = c.makeRangeFilter(idxCol, constFilter) + } } constFilters = append(constFilters, constFilter) + + // Generating additional columns after a range isn't helpful so stop here. + if foundRange { + break + } } if len(eqFilters) == 0 { @@ -597,24 +623,34 @@ func (c *CustomFuncs) findFiltersForIndexLookup( return eqFilters, constFilters, rightSideCols } -// isCanonicalConstFilter checks that the given filter is a constant filter in -// one of two possible canonical formats: -// 1. It is an equality between a variable and a constant. -// 2. It is an IN expression between a variable and a tuple of constants. -// Returns true if the filter matches one of these two formats. Otherwise -// returns false. -func (c *CustomFuncs) isCanonicalConstFilter(filter memo.FiltersItem) bool { - switch t := filter.Condition.(type) { - case *memo.EqExpr: - if t.Left.Op() == opt.VariableOp && opt.IsConstValueOp(t.Right) { - return true - } - case *memo.InExpr: - if t.Left.Op() == opt.VariableOp && memo.CanExtractConstTuple(t.Right) { +// isCanonicalLookupJoinFilter returns true for the limited set of expr's that are +// supported by the lookup joiner at execution time. +func (c *CustomFuncs) isCanonicalLookupJoinFilter(filter memo.FiltersItem) bool { + var checkExpr func(expr opt.Expr) bool + checkExpr = func(expr opt.Expr) bool { + switch t := expr.(type) { + case *memo.RangeExpr: + return checkExpr(t.And) + case *memo.AndExpr: + return checkExpr(t.Left) && checkExpr(t.Right) + case *memo.GeExpr: + return checkExpr(t.Left) && checkExpr(t.Right) + case *memo.GtExpr: + return checkExpr(t.Left) && checkExpr(t.Right) + case *memo.LeExpr: + return checkExpr(t.Left) && checkExpr(t.Right) + case *memo.LtExpr: + return checkExpr(t.Left) && checkExpr(t.Right) + case *memo.VariableExpr: return true + case *memo.EqExpr: + return checkExpr(t.Left) && checkExpr(t.Right) + case *memo.InExpr: + return checkExpr(t.Left) && memo.CanExtractConstTuple(t.Right) } + return opt.IsConstValueOp(expr) } - return false + return checkExpr(filter.Condition) } // makeConstFilter builds a filter that constrains the given column to the given @@ -640,6 +676,59 @@ func (c *CustomFuncs) makeConstFilter(col opt.ColumnID, values tree.Datums) memo )) } +// makeRangeFilter builds a filter from a constrained column, we assume the +// column is constrained by at least 1 tight constraint. This code doesn't +// handle descending columns. +func (c *CustomFuncs) makeRangeFilter(col opt.ColumnID, filter memo.FiltersItem) memo.FiltersItem { + props := filter.ScalarProps() + if props.Constraints.Length() == 0 || + props.Constraints.Constraint(0).Spans.Count() != 1 || + props.Constraints.Constraint(0).Columns.Get(0).Descending() { + panic(errors.AssertionFailedf("makeRangeFilter needs at least one ascending constraint with one span")) + } + span := props.Constraints.Constraint(0).Spans.Get(0) + return c.makeRangeFilterFromSpan(col, span) +} + +// makeRangeFilterFromSpan constructs a filter from a constraint.Span. +func (c *CustomFuncs) makeRangeFilterFromSpan( + col opt.ColumnID, span *constraint.Span, +) memo.FiltersItem { + variable := c.e.f.ConstructVariable(col) + var left, right opt.ScalarExpr + + // Here and below we need to check for IsEmpty and IsNull because sometimes + // Null is used for unbounded spans. Found empirically by forcing + // findFiltersForIndexLookup to always wrap the filters with makeRangeFilter. + if !span.StartKey().IsEmpty() && !span.StartKey().IsNull() { + val := span.StartKey().Value(0) + if span.StartBoundary() == constraint.IncludeBoundary { + left = c.e.f.ConstructGe(variable, c.e.f.ConstructConstVal(val, val.ResolvedType())) + } else { + left = c.e.f.ConstructGt(variable, c.e.f.ConstructConstVal(val, val.ResolvedType())) + } + } + + if !span.EndKey().IsEmpty() && !span.EndKey().IsNull() { + val := span.EndKey().Value(0) + if span.EndBoundary() == constraint.IncludeBoundary { + right = c.e.f.ConstructLe(variable, c.e.f.ConstructConstVal(val, val.ResolvedType())) + } else { + right = c.e.f.ConstructLt(variable, c.e.f.ConstructConstVal(val, val.ResolvedType())) + } + } + + if left != nil && right != nil { + return c.e.f.ConstructFiltersItem(c.e.f.ConstructRange(c.e.f.ConstructAnd(right, left))) + } else if left != nil { + return c.e.f.ConstructFiltersItem(left) + } else if right != nil { + return c.e.f.ConstructFiltersItem(right) + } + + panic(errors.AssertionFailedf("Constraint needs a valid start or end key")) +} + // constructContinuationColumnForPairedJoin constructs a continuation column // ID for the paired-joiners used for left outer/semi/anti joins when the // first join generates false positives (due to an inverted index or @@ -975,6 +1064,28 @@ func (c *CustomFuncs) findJoinFilterConstants( return bestValues, bestFilterIdx, true } +// findJoinFilterRange tries to find an inequality range for this column. +func (c *CustomFuncs) findJoinFilterRange( + filters memo.FiltersExpr, col opt.ColumnID, +) (filterIdx int, ok bool) { + for filterIdx := range filters { + props := filters[filterIdx].ScalarProps() + if props.TightConstraints && !props.Constraints.IsUnconstrained() { + constraint := props.Constraints.Constraint(0) + constraintCol := constraint.Columns.Get(0).ID() + // See comment in findFiltersForIndexLookup for why we check filter here. + // We only support 1 span in the execution engine so check that. + if constraintCol != col || + constraint.Spans.Count() != 1 || + !c.isCanonicalLookupJoinFilter(filters[filterIdx]) { + continue + } + return filterIdx, true + } + } + return 0, false +} + // constructJoinWithConstants constructs a cross join that joins every row in // the input with every value in vals. The cross join will be converted into a // projection by inlining normalization rules if vals contains only a single diff --git a/pkg/sql/opt/xform/join_funcs_export_test.go b/pkg/sql/opt/xform/join_funcs_export_test.go new file mode 100644 index 000000000000..5abd356b36b8 --- /dev/null +++ b/pkg/sql/opt/xform/join_funcs_export_test.go @@ -0,0 +1,14 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package xform + +var TestingMakeRangeFilterFromSpan = (*CustomFuncs).makeRangeFilterFromSpan +var TestingIsCanonicalLookupJoinFilter = (*CustomFuncs).isCanonicalLookupJoinFilter diff --git a/pkg/sql/opt/xform/join_funcs_test.go b/pkg/sql/opt/xform/join_funcs_test.go new file mode 100644 index 000000000000..fccace1b1d38 --- /dev/null +++ b/pkg/sql/opt/xform/join_funcs_test.go @@ -0,0 +1,191 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package xform_test + +import ( + "reflect" + "testing" + + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/sql/opt" + "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" + "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" + "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" + "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils" + "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat" + "github.com/cockroachdb/cockroach/pkg/sql/opt/xform" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" +) + +func TestCustomFuncs_makeRangeFilter(t *testing.T) { + defer leaktest.AfterTest(t)() + fb := makeFilterBuilder(t) + col := fb.tbl.ColumnID(0) + intLow := tree.NewDInt(0) + intHigh := tree.NewDInt(1) + nullKey := constraint.MakeKey(tree.DNull) + + tests := []struct { + name string + filter string + start constraint.Key + startBoundary constraint.SpanBoundary + end constraint.Key + endBoundary constraint.SpanBoundary + }{ + {"lt", "@1 < 1", + constraint.EmptyKey, constraint.IncludeBoundary, + constraint.MakeKey(intHigh), constraint.ExcludeBoundary, + }, + {"le", "@1 <= 1", + constraint.EmptyKey, constraint.IncludeBoundary, + constraint.MakeKey(intHigh), constraint.IncludeBoundary, + }, + {"gt", "@1 > 0", + constraint.MakeKey(intLow), constraint.ExcludeBoundary, + constraint.EmptyKey, constraint.IncludeBoundary, + }, + {"ge", "@1 >= 0", + constraint.MakeKey(intLow), constraint.IncludeBoundary, + constraint.EmptyKey, constraint.IncludeBoundary, + }, + {"lt-null", "@1 < 1", + nullKey, constraint.ExcludeBoundary, + constraint.MakeKey(intHigh), constraint.ExcludeBoundary, + }, + {"le-null", "@1 <= 1", + nullKey, constraint.ExcludeBoundary, + constraint.MakeKey(intHigh), constraint.IncludeBoundary, + }, + {"gt-null", "@1 > 0", + constraint.MakeKey(intLow), constraint.ExcludeBoundary, + nullKey, constraint.IncludeBoundary, + }, + {"ge-null", "@1 >= 0", + constraint.MakeKey(intLow), constraint.IncludeBoundary, + nullKey, constraint.IncludeBoundary, + }, + {"ge<", "@1 >= 0 AND @1 < 1", + constraint.MakeKey(intLow), constraint.IncludeBoundary, + constraint.MakeKey(intHigh), constraint.ExcludeBoundary, + }, + {"ge&le", "@1 >= 0 AND @1 <= 1", + constraint.MakeKey(intLow), constraint.IncludeBoundary, + constraint.MakeKey(intHigh), constraint.IncludeBoundary, + }, + {"gt<", "@1 > 0 AND @1 < 1", + constraint.MakeKey(intLow), constraint.ExcludeBoundary, + constraint.MakeKey(intHigh), constraint.ExcludeBoundary, + }, + {"gt&le", "@1 > 0 AND @1 <= 1", + constraint.MakeKey(intLow), constraint.ExcludeBoundary, + constraint.MakeKey(intHigh), constraint.IncludeBoundary, + }, + } + fut := xform.TestingMakeRangeFilterFromSpan + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := fb.o.CustomFuncs() + var sp constraint.Span + sp.Init(tt.start, tt.startBoundary, tt.end, tt.endBoundary) + want := fb.buildFilter(tt.filter) + if got := fut(c, col, &sp); !reflect.DeepEqual(got, want) { + t.Errorf("makeRangeFilter() = %v, want %v", got, want) + } + }) + } +} + +type testFilterBuilder struct { + t *testing.T + semaCtx *tree.SemaContext + ctx *tree.EvalContext + o *xform.Optimizer + f *norm.Factory + tbl opt.TableID +} + +func makeFilterBuilder(t *testing.T) testFilterBuilder { + var o xform.Optimizer + ctx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) + o.Init(&ctx, nil) + f := o.Factory() + cat := testcat.New() + if _, err := cat.ExecuteDDL("CREATE TABLE a (i INT PRIMARY KEY, b BOOL)"); err != nil { + t.Fatal(err) + } + tn := tree.NewTableNameWithSchema("t", tree.PublicSchemaName, "a") + tbl := f.Metadata().AddTable(cat.Table(tn), tn) + return testFilterBuilder{ + t: t, + semaCtx: &tree.SemaContext{}, + ctx: &ctx, + o: &o, + f: f, + tbl: tbl, + } +} + +func (fb *testFilterBuilder) buildFilter(str string) memo.FiltersItem { + return testutils.BuildFilters(fb.t, fb.f, fb.semaCtx, fb.ctx, str)[0] +} + +func TestCustomFuncs_isCanonicalFilter(t *testing.T) { + defer leaktest.AfterTest(t)() + fb := makeFilterBuilder(t) + + tests := []struct { + name string + filter string + want bool + }{ + // Test that True, False, Null values are hit as const. + {name: "eq-int", + filter: "i = 10", + want: true, + }, + {name: "neq-int", + filter: "i != 10", + want: false, + }, + {name: "eq-null", + filter: "i = NULL", + want: true, + }, + {name: "eq-true", + filter: "b = TRUE", + want: true, + }, + {name: "in-tuple", + filter: "i IN (1,2)", + want: true, + }, + {name: "and-eq-lt", + filter: "i = 10 AND i < 10", + want: true, + }, + {name: "or-eq-lt", + filter: "i = 10 OR i < 10", + want: false, + }, + } + fut := xform.TestingIsCanonicalLookupJoinFilter + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := fb.o.CustomFuncs() + filter := fb.buildFilter(tt.filter) + if got := fut(c, filter); got != tt.want { + t.Errorf("isCanonicalLookupJoinFilter() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/sql/opt/xform/optimizer.go b/pkg/sql/opt/xform/optimizer.go index 72c3ce2a0bc4..74f4f9c8f50d 100644 --- a/pkg/sql/opt/xform/optimizer.go +++ b/pkg/sql/opt/xform/optimizer.go @@ -977,3 +977,8 @@ func (o *Optimizer) recomputeCostImpl( func (o *Optimizer) FormatExpr(e opt.Expr, flags memo.ExprFmtFlags) string { return memo.FormatExpr(e, flags, o.mem, o.catalog) } + +// CustomFuncs exports the xform.CustomFuncs for testing purposes. +func (o *Optimizer) CustomFuncs() *CustomFuncs { + return &o.explorer.funcs +} diff --git a/pkg/sql/opt/xform/testdata/coster/zone b/pkg/sql/opt/xform/testdata/coster/zone index ab3a37c64783..88d18eb1000e 100644 --- a/pkg/sql/opt/xform/testdata/coster/zone +++ b/pkg/sql/opt/xform/testdata/coster/zone @@ -752,7 +752,7 @@ anti-join (lookup abc_part@bc_idx [as=a2]) │ └── a2.r:7 = 'west' [outer=(7), constraints=(/7: [/'west' - /'west']; tight), fd=()-->(7)] ├── cardinality: [0 - 1] ├── stats: [rows=1e-10] - ├── cost: 18.153533 + ├── cost: 18.1549817 ├── key: () ├── fd: ()-->(1-4) ├── anti-join (lookup abc_part@bc_idx [as=a2]) @@ -763,7 +763,7 @@ anti-join (lookup abc_part@bc_idx [as=a2]) │ │ └── a2.r:7 = 'east' [outer=(7), constraints=(/7: [/'east' - /'east']; tight), fd=()-->(7)] │ ├── cardinality: [0 - 1] │ ├── stats: [rows=0.900900001, distinct(1)=0.89738934, null(1)=0, distinct(2)=0.900900001, null(2)=0, distinct(3)=0.900900001, null(3)=0, distinct(4)=0.900900001, null(4)=0] - │ ├── cost: 10.8531367 + │ ├── cost: 10.8538647 │ ├── key: () │ ├── fd: ()-->(1-4) │ ├── locality-optimized-search diff --git a/pkg/sql/opt/xform/testdata/external/tpce b/pkg/sql/opt/xform/testdata/external/tpce index 8e19660c2ff5..dace24b450df 100644 --- a/pkg/sql/opt/xform/testdata/external/tpce +++ b/pkg/sql/opt/xform/testdata/external/tpce @@ -4113,7 +4113,10 @@ limit │ │ ├── ordering: +20 opt(24) [actual: +20] │ │ └── inner-join (lookup watch_item) │ │ ├── columns: wi_wl_id:19!null wi_s_symb:20!null wl_id:23!null wl_c_id:24!null - │ │ ├── key columns: [23] = [19] + │ │ ├── lookup expression + │ │ │ └── filters + │ │ │ ├── wi_wl_id:19 = wl_id:23 [outer=(19,23), constraints=(/19: (/NULL - ]; /23: (/NULL - ]), fd=(19)==(23), (23)==(19)] + │ │ │ └── wi_s_symb:20 > 'SYMB' [outer=(20), constraints=(/20: [/e'SYMB\x00' - ]; tight)] │ │ ├── key: (20,23) │ │ ├── fd: ()-->(24), (19)==(23), (23)==(19) │ │ ├── select @@ -4126,8 +4129,7 @@ limit │ │ │ │ └── fd: (23)-->(24) │ │ │ └── filters │ │ │ └── wl_c_id:24 = 0 [outer=(24), constraints=(/24: [/0 - /0]; tight), fd=()-->(24)] - │ │ └── filters - │ │ └── wi_s_symb:20 > 'SYMB' [outer=(20), constraints=(/20: [/e'SYMB\x00' - ]; tight)] + │ │ └── filters (true) │ └── filters (true) └── 1 diff --git a/pkg/sql/opt/xform/testdata/external/tpce-no-stats b/pkg/sql/opt/xform/testdata/external/tpce-no-stats index b53395de1c80..bbd0fc1b1eb2 100644 --- a/pkg/sql/opt/xform/testdata/external/tpce-no-stats +++ b/pkg/sql/opt/xform/testdata/external/tpce-no-stats @@ -4133,7 +4133,10 @@ limit │ │ ├── ordering: +20 opt(24) [actual: +20] │ │ └── inner-join (lookup watch_item) │ │ ├── columns: wi_wl_id:19!null wi_s_symb:20!null wl_id:23!null wl_c_id:24!null - │ │ ├── key columns: [23] = [19] + │ │ ├── lookup expression + │ │ │ └── filters + │ │ │ ├── wi_wl_id:19 = wl_id:23 [outer=(19,23), constraints=(/19: (/NULL - ]; /23: (/NULL - ]), fd=(19)==(23), (23)==(19)] + │ │ │ └── wi_s_symb:20 > 'SYMB' [outer=(20), constraints=(/20: [/e'SYMB\x00' - ]; tight)] │ │ ├── key: (20,23) │ │ ├── fd: ()-->(24), (19)==(23), (23)==(19) │ │ ├── select @@ -4146,8 +4149,7 @@ limit │ │ │ │ └── fd: (23)-->(24) │ │ │ └── filters │ │ │ └── wl_c_id:24 = 0 [outer=(24), constraints=(/24: [/0 - /0]; tight), fd=()-->(24)] - │ │ └── filters - │ │ └── wi_s_symb:20 > 'SYMB' [outer=(20), constraints=(/20: [/e'SYMB\x00' - ]; tight)] + │ │ └── filters (true) │ └── filters (true) └── 1 diff --git a/pkg/sql/opt/xform/testdata/external/trading b/pkg/sql/opt/xform/testdata/external/trading index 47823f8b24b0..0076de7353d5 100644 --- a/pkg/sql/opt/xform/testdata/external/trading +++ b/pkg/sql/opt/xform/testdata/external/trading @@ -845,7 +845,12 @@ project │ │ │ │ └── ordering: +10 opt(9) [actual: +10] │ │ │ ├── left-join (lookup transactiondetails@detailscardidindex) │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null transactiondetails.dealerid:20 isbuy:21 transactiondate:22 transactiondetails.cardid:23 quantity:24 - │ │ │ │ ├── key columns: [42 43 1] = [20 21 23] + │ │ │ │ ├── lookup expression + │ │ │ │ │ └── filters + │ │ │ │ │ ├── transactiondetails.cardid:23 = id:1 [outer=(1,23), constraints=(/1: (/NULL - ]; /23: (/NULL - ]), fd=(1)==(23), (23)==(1)] + │ │ │ │ │ ├── transactiondetails.dealerid:20 = 1 [outer=(20), constraints=(/20: [/1 - /1]; tight), fd=()-->(20)] + │ │ │ │ │ ├── NOT isbuy:21 [outer=(21), constraints=(/21: [/false - /false]; tight), fd=()-->(21)] + │ │ │ │ │ └── (transactiondate:22 >= '2020-02-28 00:00:00+00:00') AND (transactiondate:22 <= '2020-03-01 00:00:00+00:00') [outer=(22), constraints=(/22: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] │ │ │ │ ├── immutable │ │ │ │ ├── stats: [rows=3543333.33, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0, distinct(23)=19000, null(23)=0] │ │ │ │ ├── key: (1,22-24) @@ -854,7 +859,7 @@ project │ │ │ │ ├── project │ │ │ │ │ ├── columns: "lookup_join_const_col_@21":43!null "lookup_join_const_col_@20":42!null id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null │ │ │ │ │ ├── immutable - │ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0, distinct(42)=1, null(42)=0, distinct(43)=1, null(43)=0] + │ │ │ │ │ ├── stats: [rows=19000] │ │ │ │ │ ├── key: (1) │ │ │ │ │ ├── fd: ()-->(42,43), (1)-->(2-6), (2,4,5)~~>(1,3,6) │ │ │ │ │ ├── ordering: +1 @@ -876,8 +881,7 @@ project │ │ │ │ │ └── projections │ │ │ │ │ ├── false [as="lookup_join_const_col_@21":43] │ │ │ │ │ └── 1 [as="lookup_join_const_col_@20":42] - │ │ │ │ └── filters - │ │ │ │ └── (transactiondate:22 >= '2020-02-28 00:00:00+00:00') AND (transactiondate:22 <= '2020-03-01 00:00:00+00:00') [outer=(22), constraints=(/22: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] + │ │ │ │ └── filters (true) │ │ │ └── filters (true) │ │ └── aggregations │ │ ├── sum [as=sum:30, outer=(24)] diff --git a/pkg/sql/opt/xform/testdata/external/trading-mutation b/pkg/sql/opt/xform/testdata/external/trading-mutation index 264a3cd8fcd2..bba7a2fb1290 100644 --- a/pkg/sql/opt/xform/testdata/external/trading-mutation +++ b/pkg/sql/opt/xform/testdata/external/trading-mutation @@ -849,7 +849,12 @@ project │ │ │ │ └── ordering: +10 opt(9) [actual: +10] │ │ │ ├── left-join (lookup transactiondetails@detailscardidindex) │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null transactiondetails.dealerid:24 isbuy:25 transactiondate:26 transactiondetails.cardid:27 quantity:28 - │ │ │ │ ├── key columns: [48 49 1] = [24 25 27] + │ │ │ │ ├── lookup expression + │ │ │ │ │ └── filters + │ │ │ │ │ ├── transactiondetails.cardid:27 = id:1 [outer=(1,27), constraints=(/1: (/NULL - ]; /27: (/NULL - ]), fd=(1)==(27), (27)==(1)] + │ │ │ │ │ ├── transactiondetails.dealerid:24 = 1 [outer=(24), constraints=(/24: [/1 - /1]; tight), fd=()-->(24)] + │ │ │ │ │ ├── NOT isbuy:25 [outer=(25), constraints=(/25: [/false - /false]; tight), fd=()-->(25)] + │ │ │ │ │ └── (transactiondate:26 >= '2020-02-28 00:00:00+00:00') AND (transactiondate:26 <= '2020-03-01 00:00:00+00:00') [outer=(26), constraints=(/26: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] │ │ │ │ ├── immutable │ │ │ │ ├── stats: [rows=3543333.33, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0, distinct(27)=19000, null(27)=0] │ │ │ │ ├── key: (1,26-28) @@ -858,7 +863,7 @@ project │ │ │ │ ├── project │ │ │ │ │ ├── columns: "lookup_join_const_col_@25":49!null "lookup_join_const_col_@24":48!null id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null │ │ │ │ │ ├── immutable - │ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0, distinct(48)=1, null(48)=0, distinct(49)=1, null(49)=0] + │ │ │ │ │ ├── stats: [rows=19000] │ │ │ │ │ ├── key: (1) │ │ │ │ │ ├── fd: ()-->(48,49), (1)-->(2-6), (2,4,5)~~>(1,3,6) │ │ │ │ │ ├── ordering: +1 @@ -880,8 +885,7 @@ project │ │ │ │ │ └── projections │ │ │ │ │ ├── false [as="lookup_join_const_col_@25":49] │ │ │ │ │ └── 1 [as="lookup_join_const_col_@24":48] - │ │ │ │ └── filters - │ │ │ │ └── (transactiondate:26 >= '2020-02-28 00:00:00+00:00') AND (transactiondate:26 <= '2020-03-01 00:00:00+00:00') [outer=(26), constraints=(/26: [/'2020-02-28 00:00:00+00:00' - /'2020-03-01 00:00:00+00:00']; tight)] + │ │ │ │ └── filters (true) │ │ │ └── filters (true) │ │ └── aggregations │ │ ├── sum [as=sum:36, outer=(28)] diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 18706790196e..17aa2b23c551 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -3078,12 +3078,14 @@ SELECT a,b,n,m FROM small JOIN abcd ON a=m AND b>1 ---- inner-join (lookup abcd@secondary) ├── columns: a:6!null b:7!null n:2 m:1!null - ├── key columns: [1] = [6] + ├── lookup expression + │ └── filters + │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] ├── fd: (1)==(6), (6)==(1) ├── scan small │ └── columns: m:1 n:2 - └── filters - └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] + └── filters (true) # Covering case, left-join. opt expect=GenerateLookupJoinsWithFilter @@ -3091,11 +3093,13 @@ SELECT a,b,n,m FROM small LEFT JOIN abcd ON a=m AND b>1 ---- left-join (lookup abcd@secondary) ├── columns: a:6 b:7 n:2 m:1 - ├── key columns: [1] = [6] + ├── lookup expression + │ └── filters + │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] ├── scan small │ └── columns: m:1 n:2 - └── filters - └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] + └── filters (true) # Non-covering case. opt expect=GenerateLookupJoinsWithFilter @@ -3108,12 +3112,14 @@ inner-join (lookup abcd) ├── fd: (1)==(6), (6)==(1) ├── inner-join (lookup abcd@secondary) │ ├── columns: m:1!null n:2 a:6!null b:7!null abcd.rowid:9!null - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] │ ├── fd: (9)-->(6,7), (1)==(6), (6)==(1) │ ├── scan small │ │ └── columns: m:1 n:2 - │ └── filters - │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] + │ └── filters (true) └── filters (true) # Non-covering case, left join. @@ -3126,12 +3132,14 @@ left-join (lookup abcd) ├── lookup columns are key ├── left-join (lookup abcd@secondary) │ ├── columns: m:1 n:2 a:6 b:7 abcd.rowid:9 - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] │ ├── fd: (9)-->(6,7) │ ├── scan small │ │ └── columns: m:1 n:2 - │ └── filters - │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] + │ └── filters (true) └── filters (true) # Non-covering case, extra filter bound by index. @@ -3145,13 +3153,15 @@ inner-join (lookup abcd) ├── fd: (1)==(6), (6)==(1) ├── inner-join (lookup abcd@secondary) │ ├── columns: m:1!null n:2!null a:6!null b:7!null abcd.rowid:9!null - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] │ ├── fd: (9)-->(6,7), (1)==(6), (6)==(1) │ ├── scan small │ │ └── columns: m:1 n:2 │ └── filters - │ ├── b:7 > n:2 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ])] - │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] + │ └── b:7 > n:2 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ])] └── filters (true) # Non-covering case, extra filter bound by index, left join. @@ -3164,13 +3174,15 @@ left-join (lookup abcd) ├── lookup columns are key ├── left-join (lookup abcd@secondary) │ ├── columns: m:1 n:2 a:6 b:7 abcd.rowid:9 - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] │ ├── fd: (9)-->(6,7) │ ├── scan small │ │ └── columns: m:1 n:2 │ └── filters - │ ├── b:7 > n:2 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ])] - │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] + │ └── b:7 > n:2 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ])] └── filters (true) # Non-covering case, extra filter not bound by index. @@ -3184,12 +3196,14 @@ inner-join (lookup abcd) ├── fd: (1)==(6), (6)==(1) ├── inner-join (lookup abcd@secondary) │ ├── columns: m:1!null n:2 a:6!null b:7!null abcd.rowid:9!null - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] │ ├── fd: (9)-->(6,7), (1)==(6), (6)==(1) │ ├── scan small │ │ └── columns: m:1 n:2 - │ └── filters - │ └── b:7 > 1 [outer=(7), constraints=(/7: [/2 - ]; tight)] + │ └── filters (true) └── filters └── c:8 > n:2 [outer=(2,8), constraints=(/2: (/NULL - ]; /8: (/NULL - ])] @@ -3444,12 +3458,14 @@ inner-join (lookup abcde) ├── fd: (1)==(6), (6)==(1) ├── inner-join (lookup abcde@secondary) │ ├── columns: m:1!null n:2 a:6!null b:7!null c:8 abcde.rowid:11!null - │ ├── key columns: [1] = [6] + │ ├── lookup expression + │ │ └── filters + │ │ ├── a:6 = m:1 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ │ └── b:7 < 10 [outer=(7), constraints=(/7: (/NULL - /9]; tight)] │ ├── fd: (11)-->(6-8), (1)==(6), (6)==(1) │ ├── scan small │ │ └── columns: m:1 n:2 - │ └── filters - │ └── b:7 < 10 [outer=(7), constraints=(/7: (/NULL - /9]; tight)] + │ └── filters (true) └── filters (true) # Lookup Joiner uses the constant equality columns at the same time as the explicit @@ -8466,3 +8482,162 @@ anti-join (lookup abc_part) │ │ └── fd: ()-->(19-22) │ └── filters (true) └── filters (true) + +# illustrative examples from GH #51576 +exec-ddl +CREATE TABLE metrics ( + id SERIAL PRIMARY KEY, + name STRING, + INDEX name_index (name) +) +---- + +exec-ddl +CREATE TABLE metric_values ( + metric_id INT8, + time TIMESTAMPTZ, + value INT8, + PRIMARY KEY (metric_id, time) +) +---- + +# Add some metrics to force lookup join to be chosen. +exec-ddl +ALTER TABLE metric_values INJECT STATISTICS +'[ + { + "columns": ["metric_id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 1000, + "distinct_count": 10 + }, + { + "columns": ["time"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + }, + { + "columns": ["value"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 1000, + "distinct_count": 1000 + } +]' +---- + +exec-ddl +ALTER TABLE metrics INJECT STATISTICS +'[ + { + "columns": ["id"], + "created_at": "2018-01-01 1:00:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + }, + { + "columns": ["name"], + "created_at": "2018-01-01 1:30:00.00000+00:00", + "row_count": 10, + "distinct_count": 10 + } +]' +---- + +opt expect=GenerateLookupJoinsWithFilter +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND + name='cpu' +---- +inner-join (lookup metric_values) + ├── columns: metric_id:1!null time:2!null value:3 id:6!null name:7!null + ├── lookup expression + │ └── filters + │ ├── metric_id:1 = id:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ └── (time:2 >= '2020-01-01 00:00:00+00:00') AND (time:2 <= '2020-01-01 00:10:00+00:00') [outer=(2), constraints=(/2: [/'2020-01-01 00:00:00+00:00' - /'2020-01-01 00:10:00+00:00']; tight)] + ├── key: (2,6) + ├── fd: ()-->(7), (1,2)-->(3), (1)==(6), (6)==(1) + ├── scan metrics@name_index + │ ├── columns: id:6!null name:7!null + │ ├── constraint: /7/6: [/'cpu' - /'cpu'] + │ ├── key: (6) + │ └── fd: ()-->(7) + └── filters (true) + +opt expect=GenerateLookupJoinsWithFilter +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' AND + name IN ('cpu','mem') +---- +inner-join (lookup metric_values) + ├── columns: metric_id:1!null time:2!null value:3 id:6!null name:7!null + ├── lookup expression + │ └── filters + │ ├── metric_id:1 = id:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + │ └── (time:2 >= '2020-01-01 00:00:00+00:00') AND (time:2 <= '2020-01-01 00:10:00+00:00') [outer=(2), constraints=(/2: [/'2020-01-01 00:00:00+00:00' - /'2020-01-01 00:10:00+00:00']; tight)] + ├── key: (2,6) + ├── fd: (1,2)-->(3), (6)-->(7), (1)==(6), (6)==(1) + ├── scan metrics@name_index + │ ├── columns: id:6!null name:7!null + │ ├── constraint: /7/6 + │ │ ├── [/'cpu' - /'cpu'] + │ │ └── [/'mem' - /'mem'] + │ ├── key: (6) + │ └── fd: (6)-->(7) + └── filters (true) + +# We don't support turning LIKE into scans yet, test that we fall back to a +# filter. +opt expect-not=GenerateLookupJoins +SELECT * +FROM metric_values +INNER JOIN metrics +ON metric_id=id +WHERE + time::STRING LIKE '202%' AND + name='cpu' +---- +inner-join (lookup metric_values) + ├── columns: metric_id:1!null time:2!null value:3 id:6!null name:7!null + ├── key columns: [6] = [1] + ├── stable + ├── key: (2,6) + ├── fd: ()-->(7), (1,2)-->(3), (1)==(6), (6)==(1) + ├── scan metrics@name_index + │ ├── columns: id:6!null name:7!null + │ ├── constraint: /7/6: [/'cpu' - /'cpu'] + │ ├── key: (6) + │ └── fd: ()-->(7) + └── filters + └── time:2::STRING LIKE '202%' [outer=(2), stable] + +opt expect=GenerateLookupJoinsWithFilter +SELECT * +FROM metrics +LEFT JOIN metric_values +ON metric_id=id +AND time BETWEEN '2020-01-01 00:00:00+00:00' AND '2020-01-01 00:10:00+00:00' +AND name='cpu' +---- +left-join (lookup metric_values) + ├── columns: id:1!null name:2 metric_id:5 time:6 value:7 + ├── lookup expression + │ └── filters + │ ├── metric_id:5 = id:1 [outer=(1,5), constraints=(/1: (/NULL - ]; /5: (/NULL - ]), fd=(1)==(5), (5)==(1)] + │ └── (time:6 >= '2020-01-01 00:00:00+00:00') AND (time:6 <= '2020-01-01 00:10:00+00:00') [outer=(6), constraints=(/6: [/'2020-01-01 00:00:00+00:00' - /'2020-01-01 00:10:00+00:00']; tight)] + ├── key: (1,5,6) + ├── fd: (1)-->(2), (5,6)-->(7) + ├── scan metrics + │ ├── columns: id:1!null name:2 + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── filters + └── name:2 = 'cpu' [outer=(2), constraints=(/2: [/'cpu' - /'cpu']; tight), fd=()-->(2)] diff --git a/pkg/sql/rowexec/joinreader.go b/pkg/sql/rowexec/joinreader.go index f03d52086e83..c4714b3fbe8f 100644 --- a/pkg/sql/rowexec/joinreader.go +++ b/pkg/sql/rowexec/joinreader.go @@ -382,14 +382,13 @@ func (jr *joinReader) initJoinReaderStrategy( spanBuilder.SetNeededColumns(neededRightCols) var generator joinReaderSpanGenerator - var keyToInputRowIndices map[string][]int - if readerType != indexJoinReaderType { - keyToInputRowIndices = make(map[string][]int) - } - // Else: see the comment in defaultSpanGenerator on why we don't need - // this map for index joins. - if jr.lookupExpr.Expr == nil { + var keyToInputRowIndices map[string][]int + // See the comment in defaultSpanGenerator on why we don't need + // this map for index joins. + if readerType != indexJoinReaderType { + keyToInputRowIndices = make(map[string][]int) + } generator = &defaultSpanGenerator{ spanBuilder: spanBuilder, keyToInputRowIndices: keyToInputRowIndices, @@ -415,7 +414,6 @@ func (jr *joinReader) initJoinReaderStrategy( spanBuilder, numKeyCols, len(jr.input.OutputTypes()), - keyToInputRowIndices, &jr.lookupExpr, tableOrdToIndexOrd, ); err != nil { @@ -428,7 +426,6 @@ func (jr *joinReader) initJoinReaderStrategy( spanBuilder, numKeyCols, len(jr.input.OutputTypes()), - keyToInputRowIndices, &jr.lookupExpr, &jr.remoteLookupExpr, tableOrdToIndexOrd, diff --git a/pkg/sql/rowexec/joinreader_span_generator.go b/pkg/sql/rowexec/joinreader_span_generator.go index f019c8b8a3b1..ba5db29c0aa2 100644 --- a/pkg/sql/rowexec/joinreader_span_generator.go +++ b/pkg/sql/rowexec/joinreader_span_generator.go @@ -12,6 +12,7 @@ package rowexec import ( "fmt" + "sort" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" @@ -137,6 +138,19 @@ func (g *defaultSpanGenerator) maxLookupCols() int { return len(g.lookupCols) } +type spanRowIndex struct { + span roachpb.Span + rowIndices []int +} + +type spanRowIndices []spanRowIndex + +func (s spanRowIndices) Len() int { return len(s) } +func (s spanRowIndices) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s spanRowIndices) Less(i, j int) bool { return s[i].span.Key.Compare(s[j].span.Key) < 0 } + +var _ sort.Interface = &spanRowIndices{} + // multiSpanGenerator is the joinReaderSpanGenerator used when each lookup will // scan multiple spans in the index. This is the case when some of the index // columns can take on multiple constant values. For example, the @@ -152,8 +166,8 @@ type multiSpanGenerator struct { // indexColInfos stores info about the values that each index column can // take on in the spans produced by the multiSpanGenerator. See the comment - // above multiSpanGeneratorIndexColInfo for more details. - indexColInfos []multiSpanGeneratorIndexColInfo + // above multiSpanGeneratorColInfo for more details. + indexColInfos []multiSpanGeneratorColInfo // indexKeyRows and indexKeySpans are used to generate the spans for a single // input row. They are allocated once in init(), and then reused for every row. @@ -162,9 +176,18 @@ type multiSpanGenerator struct { // keyToInputRowIndices maps a lookup span key to the input row indices that // desire that span. This is used for de-duping spans, and to map the fetched - // rows to the input rows that need to join with them. + // rows to the input rows that need to join with them. If we have inequality + // exprs we can't use this from getMatchingRowIndices because the spans are + // ranges and not point spans so we build this map using the start keys and + // then convert it into a spanToInputRowIndices. keyToInputRowIndices map[string][]int + // spanToInputRowIndices maps a lookup span to the input row indices that + // desire that span. This is a range based equivalent of the + // keyToInputRowIndices that is only used when there are range based, i.e. + // inequality conditions. This is a sorted set we do binary searches on. + spanToInputRowIndices spanRowIndices + // spansCount is the number of spans generated for each input row. spansCount int @@ -180,32 +203,74 @@ type multiSpanGenerator struct { // numInputCols is the number of columns in the input to the joinReader. numInputCols int + // inequalityColIdx is the index of inequality colinfo (there can be only one), + // -1 otherwise. + inequalityColIdx int + scratchSpans roachpb.Spans } -// multiSpanGeneratorIndexColInfo contains info about the values that a specific +// multiSpanGeneratorColInfo contains info about the values that a specific // index column can take on in the spans produced by the multiSpanGenerator. The // column ordinal is not contained in this struct, but depends on the location // of this struct in the indexColInfos slice; the position in the slice // corresponds to the position in the index. -// - If len(constVals) > 0, the index column can equal any of the given -// constant values. This is the case when there is a join filter such as -// c IN ('a', 'b', 'c'), where c is a key column in the index. -// - If constVals is empty, then inputRowIdx corresponds to an index into the -// input row. This is the case for join filters such as c = a, where c is a -// column in the index and a is a column in the input. -type multiSpanGeneratorIndexColInfo struct { - constVals tree.Datums +type multiSpanGeneratorColInfo interface { + String() string +} + +// multiSpanGeneratorValuesColInfo is used to represent a column constrained +// by a set of constants (i.e. '=' or 'in' expressions). +type multiSpanGeneratorValuesColInfo struct { + constVals tree.Datums +} + +func (i multiSpanGeneratorValuesColInfo) String() string { + return fmt.Sprintf("[constVals: %s]", i.constVals.String()) +} + +// multiSpanGeneratorIndexVarColInfo represents a column that matches a column +// in the input row. inputRowIdx corresponds to an index into the input row. +// This is the case for join filters such as c = a, where c is a column in the +// index and a is a column in the input. +type multiSpanGeneratorIndexVarColInfo struct { inputRowIdx int } -func (i multiSpanGeneratorIndexColInfo) String() string { - if len(i.constVals) > 0 { - return fmt.Sprintf("[constVals: %s]", i.constVals.String()) - } +func (i multiSpanGeneratorIndexVarColInfo) String() string { return fmt.Sprintf("[inputRowIdx: %d]", i.inputRowIdx) } +// multiSpanGeneratorInequalityColInfo represents a column that is bound by a +// range expression. If there are <,>, >= or <= inequalities we distill them +// into a start and end datum. +type multiSpanGeneratorInequalityColInfo struct { + start tree.Datum + startInclusive bool + end tree.Datum + endInclusive bool +} + +func (i multiSpanGeneratorInequalityColInfo) String() string { + var startBoundary byte + if i.startInclusive { + startBoundary = '[' + } else { + startBoundary = '(' + } + var endBoundary rune + if i.endInclusive { + endBoundary = ']' + } else { + endBoundary = ')' + } + return fmt.Sprintf("%c%v - %v%c", startBoundary, i.start, i.end, endBoundary) +} + +var _ multiSpanGeneratorColInfo = &multiSpanGeneratorValuesColInfo{} +var _ multiSpanGeneratorColInfo = &multiSpanGeneratorIndexVarColInfo{} +var _ multiSpanGeneratorColInfo = &multiSpanGeneratorInequalityColInfo{} + // maxLookupCols is part of the joinReaderSpanGenerator interface. func (g *multiSpanGenerator) maxLookupCols() int { return len(g.indexColInfos) @@ -217,14 +282,14 @@ func (g *multiSpanGenerator) init( spanBuilder *span.Builder, numKeyCols int, numInputCols int, - keyToInputRowIndices map[string][]int, exprHelper *execinfrapb.ExprHelper, tableOrdToIndexOrd util.FastIntMap, ) error { g.spanBuilder = spanBuilder g.numInputCols = numInputCols - g.keyToInputRowIndices = keyToInputRowIndices + g.keyToInputRowIndices = make(map[string][]int) g.tableOrdToIndexOrd = tableOrdToIndexOrd + g.inequalityColIdx = -1 // Initialize the spansCount to 1, since we'll always have at least one span. // This number may increase when we call fillInIndexColInfos() below. @@ -232,7 +297,7 @@ func (g *multiSpanGenerator) init( // Process the given expression to fill in g.indexColInfos with info from the // join conditions. This info will be used later to generate the spans. - g.indexColInfos = make([]multiSpanGeneratorIndexColInfo, 0, numKeyCols) + g.indexColInfos = make([]multiSpanGeneratorColInfo, 0, numKeyCols) if err := g.fillInIndexColInfos(exprHelper.Expr); err != nil { return err } @@ -272,19 +337,21 @@ func (g *multiSpanGenerator) init( // [ 'east' - 2 - ] // [ 'west' - 2 - ] // + + // Make first pass flushing out the structure with const values. g.indexKeyRows = make([]rowenc.EncDatumRow, 1, g.spansCount) g.indexKeyRows[0] = make(rowenc.EncDatumRow, 0, lookupColsCount) for _, info := range g.indexColInfos { - if len(info.constVals) > 0 { + if valuesInfo, ok := info.(multiSpanGeneratorValuesColInfo); ok { for i, n := 0, len(g.indexKeyRows); i < n; i++ { indexKeyRow := g.indexKeyRows[i] - for j := 1; j < len(info.constVals); j++ { + for j := 1; j < len(valuesInfo.constVals); j++ { newIndexKeyRow := make(rowenc.EncDatumRow, len(indexKeyRow), lookupColsCount) copy(newIndexKeyRow, indexKeyRow) - newIndexKeyRow = append(newIndexKeyRow, rowenc.EncDatum{Datum: info.constVals[j]}) + newIndexKeyRow = append(newIndexKeyRow, rowenc.EncDatum{Datum: valuesInfo.constVals[j]}) g.indexKeyRows = append(g.indexKeyRows, newIndexKeyRow) } - g.indexKeyRows[i] = append(indexKeyRow, rowenc.EncDatum{Datum: info.constVals[0]}) + g.indexKeyRows[i] = append(indexKeyRow, rowenc.EncDatum{Datum: valuesInfo.constVals[0]}) } } else { for i := 0; i < len(g.indexKeyRows); i++ { @@ -307,8 +374,11 @@ func (g *multiSpanGenerator) init( // 1. Equalities between input columns and index columns, such as c1 = c2. // 2. Equalities or IN conditions between index columns and constants, such // as c = 5 or c IN ('a', 'b', 'c'). +// 3. Inequalities from (possibly AND'd) <,>,<=,>= exprs. +// // The optimizer should have ensured that all conditions fall into one of -// these two categories. Any other expression types will return an error. +// these categories. Any other expression types will return an error. +// TODO(treilly): We should probably be doing this at compile time, see #65773 func (g *multiSpanGenerator) fillInIndexColInfos(expr tree.TypedExpr) error { switch t := expr.(type) { case *tree.AndExpr: @@ -318,16 +388,26 @@ func (g *multiSpanGenerator) fillInIndexColInfos(expr tree.TypedExpr) error { return g.fillInIndexColInfos(t.Right.(tree.TypedExpr)) case *tree.ComparisonExpr: - if t.Operator.Symbol != tree.EQ && t.Operator.Symbol != tree.In { - return errors.AssertionFailedf("comparison operator must be EQ or In. Found %s", t.Operator) + setOfVals := false + inequality := false + switch t.Operator.Symbol { + case tree.EQ, tree.In: + setOfVals = true + case tree.GE, tree.LE, tree.GT, tree.LT: + inequality = true + default: + // This should never happen because of enforcement at opt time. + return errors.AssertionFailedf("comparison operator not supported. Found %s", t.Operator) } tabOrd := -1 - info := multiSpanGeneratorIndexColInfo{inputRowIdx: -1} - // Since we only support EQ and In, we don't need to check anything other - // than the types of the arguments in order to extract the info. - getInfo := func(typedExpr tree.TypedExpr) error { + var info multiSpanGeneratorColInfo + + // For EQ and In, we just need to check the types of the arguments in order + // to extract the info. For inequalities we return the const datums that + // will form the span boundaries. + getInfo := func(typedExpr tree.TypedExpr) (tree.Datum, error) { switch t := typedExpr.(type) { case *tree.IndexedVar: // IndexedVars can either be from the input or the index. If the @@ -336,38 +416,71 @@ func (g *multiSpanGenerator) fillInIndexColInfos(expr tree.TypedExpr) error { if t.Idx >= g.numInputCols { tabOrd = t.Idx - g.numInputCols } else { - info.inputRowIdx = t.Idx + info = multiSpanGeneratorIndexVarColInfo{inputRowIdx: t.Idx} } case tree.Datum: - switch t.ResolvedType().Family() { - case types.TupleFamily: - info.constVals = t.(*tree.DTuple).D - default: - info.constVals = tree.Datums{t} + if setOfVals { + var values tree.Datums + switch t.ResolvedType().Family() { + case types.TupleFamily: + values = t.(*tree.DTuple).D + default: + values = tree.Datums{t} + } + // Every time there are multiple possible values, we multiply the + // spansCount by the number of possibilities. We will need to create + // spans representing the cartesian product of possible values for + // each column. + info = multiSpanGeneratorValuesColInfo{constVals: values} + g.spansCount *= len(values) + } else { + return t, nil } - // Every time there are multiple possible values, we multiply the - // spansCount by the number of possibilities. We will need to create - // spans representing the cartesian product of possible values for - // each column. - g.spansCount *= len(info.constVals) default: - return errors.AssertionFailedf("unhandled comparison argument type %T", t) + return nil, errors.AssertionFailedf("unhandled comparison argument type %T", t) } - return nil + return nil, nil } - if err := getInfo(t.Left.(tree.TypedExpr)); err != nil { + + // NB: we make no attempt to deal with column direction here, that is sorted + // out later in the span builder. + var inequalityInfo multiSpanGeneratorInequalityColInfo + if lval, err := getInfo(t.Left.(tree.TypedExpr)); err != nil { return err + } else if lval != nil { + if t.Operator.Symbol == tree.LT || t.Operator.Symbol == tree.LE { + inequalityInfo.start = lval + inequalityInfo.startInclusive = t.Operator.Symbol == tree.LE + } else { + inequalityInfo.end = lval + inequalityInfo.endInclusive = t.Operator.Symbol == tree.GE + } } - if err := getInfo(t.Right.(tree.TypedExpr)); err != nil { + + if rval, err := getInfo(t.Right.(tree.TypedExpr)); err != nil { return err + } else if rval != nil { + if t.Operator.Symbol == tree.LT || t.Operator.Symbol == tree.LE { + inequalityInfo.end = rval + inequalityInfo.endInclusive = t.Operator.Symbol == tree.LE + } else { + inequalityInfo.start = rval + inequalityInfo.startInclusive = t.Operator.Symbol == tree.GE + } } idxOrd, ok := g.tableOrdToIndexOrd.Get(tabOrd) if !ok { return errors.AssertionFailedf("table column %d not found in index", tabOrd) } + + if inequality { + info = inequalityInfo + g.inequalityColIdx = idxOrd + } + if len(g.indexColInfos) <= idxOrd { g.indexColInfos = g.indexColInfos[:idxOrd+1] } @@ -383,36 +496,76 @@ func (g *multiSpanGenerator) fillInIndexColInfos(expr tree.TypedExpr) error { // generateNonNullSpans generates spans for a given row. It does not include // null values, since those values would not match the lookup condition anyway. func (g *multiSpanGenerator) generateNonNullSpans(row rowenc.EncDatumRow) (roachpb.Spans, error) { - // Fill in the holes in g.indexKeyRows that correspond to input row - // values. - for j, info := range g.indexColInfos { - if len(info.constVals) == 0 { - for i := 0; i < len(g.indexKeyRows); i++ { - g.indexKeyRows[i][j] = row[info.inputRowIdx] + // Fill in the holes in g.indexKeyRows that correspond to input row values. + for i := 0; i < len(g.indexKeyRows); i++ { + for j, info := range g.indexColInfos { + if inf, ok := info.(multiSpanGeneratorIndexVarColInfo); ok { + g.indexKeyRows[i][j] = row[inf.inputRowIdx] } } } // Convert the index key rows to spans. g.indexKeySpans = g.indexKeySpans[:0] + + // Hoist inequality lookup out of loop if we have one. + var inequalityInfo multiSpanGeneratorInequalityColInfo + if g.inequalityColIdx != -1 { + inequalityInfo = g.indexColInfos[g.inequalityColIdx].(multiSpanGeneratorInequalityColInfo) + } + + // Build spans for each row. for _, indexKeyRow := range g.indexKeyRows { - span, containsNull, err := g.spanBuilder.SpanFromEncDatums(indexKeyRow, len(g.indexColInfos)) + var s roachpb.Span + var err error + var containsNull bool + if g.inequalityColIdx == -1 { + s, containsNull, err = g.spanBuilder.SpanFromEncDatums(indexKeyRow, len(g.indexColInfos)) + } else { + s, containsNull, err = g.spanBuilder.SpanFromEncDatumsWithRange(indexKeyRow, len(g.indexColInfos), + inequalityInfo.start, inequalityInfo.startInclusive, inequalityInfo.end, inequalityInfo.endInclusive) + } + if err != nil { return roachpb.Spans{}, err } + if !containsNull { - g.indexKeySpans = append(g.indexKeySpans, span) + g.indexKeySpans = append(g.indexKeySpans, s) } } + return g.indexKeySpans, nil } +// findInputRowIndicesByKey does a binary search to find the span that contains +// the given key. +func (s *spanRowIndices) findInputRowIndicesByKey(key roachpb.Key) []int { + i, j := 0, s.Len() + for i < j { + h := (i + j) >> 1 + sp := (*s)[h] + switch sp.span.CompareKey(key) { + case 0: + return sp.rowIndices + case -1: + j = h + case 1: + i = h + 1 + } + } + + return nil +} + // generateSpans is part of the joinReaderSpanGenerator interface. func (g *multiSpanGenerator) generateSpans(rows []rowenc.EncDatumRow) (roachpb.Spans, error) { // This loop gets optimized to a runtime.mapclear call. for k := range g.keyToInputRowIndices { delete(g.keyToInputRowIndices, k) } + g.spanToInputRowIndices = g.spanToInputRowIndices[:0] + // We maintain a map from index key to the corresponding input rows so we can // join the index results to the inputs. g.scratchSpans = g.scratchSpans[:0] @@ -425,18 +578,43 @@ func (g *multiSpanGenerator) generateSpans(rows []rowenc.EncDatumRow) (roachpb.S generatedSpan := &generatedSpans[j] inputRowIndices := g.keyToInputRowIndices[string(generatedSpan.Key)] if inputRowIndices == nil { - g.scratchSpans = g.spanBuilder.MaybeSplitSpanIntoSeparateFamilies( - g.scratchSpans, *generatedSpan, len(g.indexColInfos), false /* containsNull */) + // MaybeSplitSpanIntoSeparateFamilies is an optimization for doing more + // efficient point lookups when the span hits multiple column families. + // It doesn't work with inequality ranges because the prefixLen we pass + // in here is wrong and possibly other reasons. + if g.inequalityColIdx != -1 { + g.scratchSpans = append(g.scratchSpans, *generatedSpan) + } else { + g.scratchSpans = g.spanBuilder.MaybeSplitSpanIntoSeparateFamilies( + g.scratchSpans, *generatedSpan, len(g.indexColInfos), false /* containsNull */) + } } + g.keyToInputRowIndices[string(generatedSpan.Key)] = append(inputRowIndices, i) } } + // If we need to map against range spans instead of point spans convert the + // map into a sorted set of spans we can binary search against. + if g.inequalityColIdx != -1 { + for _, s := range g.scratchSpans { + g.spanToInputRowIndices = append(g.spanToInputRowIndices, spanRowIndex{span: s, rowIndices: g.keyToInputRowIndices[string(s.Key)]}) + } + sort.Sort(g.spanToInputRowIndices) + // We don't need this anymore. + for k := range g.keyToInputRowIndices { + delete(g.keyToInputRowIndices, k) + } + } + return g.scratchSpans, nil } // getMatchingRowIndices is part of the joinReaderSpanGenerator interface. func (g *multiSpanGenerator) getMatchingRowIndices(key roachpb.Key) []int { + if g.inequalityColIdx != -1 { + return g.spanToInputRowIndices.findInputRowIndicesByKey(key) + } return g.keyToInputRowIndices[string(key)] } @@ -455,18 +633,17 @@ func (g *localityOptimizedSpanGenerator) init( spanBuilder *span.Builder, numKeyCols int, numInputCols int, - keyToInputRowIndices map[string][]int, localExprHelper *execinfrapb.ExprHelper, remoteExprHelper *execinfrapb.ExprHelper, tableOrdToIndexOrd util.FastIntMap, ) error { if err := g.localSpanGen.init( - spanBuilder, numKeyCols, numInputCols, keyToInputRowIndices, localExprHelper, tableOrdToIndexOrd, + spanBuilder, numKeyCols, numInputCols, localExprHelper, tableOrdToIndexOrd, ); err != nil { return err } if err := g.remoteSpanGen.init( - spanBuilder, numKeyCols, numInputCols, keyToInputRowIndices, remoteExprHelper, tableOrdToIndexOrd, + spanBuilder, numKeyCols, numInputCols, remoteExprHelper, tableOrdToIndexOrd, ); err != nil { return err } diff --git a/pkg/sql/sem/builtins/window_builtins.go b/pkg/sql/sem/builtins/window_builtins.go index 2d32fd290215..3975b67dd5dc 100644 --- a/pkg/sql/sem/builtins/window_builtins.go +++ b/pkg/sql/sem/builtins/window_builtins.go @@ -805,7 +805,9 @@ func newNthValueWindow([]*types.T, *tree.EvalContext) tree.WindowFunc { return &nthValueWindow{} } -var errInvalidArgumentForNthValue = pgerror.Newf( +// ErrInvalidArgumentForNthValue should be thrown when the nth_value window +// function is given a value of 'n' less than zero. +var ErrInvalidArgumentForNthValue = pgerror.Newf( pgcode.InvalidParameterValue, "argument of nth_value() must be greater than zero") func (nthValueWindow) Compute( @@ -822,7 +824,7 @@ func (nthValueWindow) Compute( nth := int(tree.MustBeDInt(arg)) if nth <= 0 { - return nil, errInvalidArgumentForNthValue + return nil, ErrInvalidArgumentForNthValue } frameStartIdx, err := wfr.FrameStartIdx(ctx, evalCtx) diff --git a/pkg/sql/span/span_builder.go b/pkg/sql/span/span_builder.go index e4619bd3e4b4..bc97605fecc4 100644 --- a/pkg/sql/span/span_builder.go +++ b/pkg/sql/span/span_builder.go @@ -145,6 +145,76 @@ func (s *Builder) SpanFromEncDatums( values[:prefixLen], s.indexColTypes[:prefixLen], s.indexColDirs[:prefixLen], s.table, s.index, &s.alloc, s.KeyPrefix) } +// SpanFromEncDatumsWithRange encodes a range span. The inequality is assumed to +// be the end of the span and the start/end keys are generated by putting them +// in the values row at the prefixLen - 1 position. Only one of start or end +// need be non-nil, omitted one causing an open ended range span to be +// generated. Since the exec code knows nothing about index column sorting +// direction we assume ascending if they are descending we deal with that here. +func (s *Builder) SpanFromEncDatumsWithRange( + values rowenc.EncDatumRow, + prefixLen int, + startDatum tree.Datum, + startInclusive bool, + endDatum tree.Datum, + endInclusive bool, +) (_ roachpb.Span, containsNull bool, err error) { + + if s.indexColDirs[prefixLen-1] == descpb.IndexDescriptor_DESC { + startDatum, endDatum = endDatum, startDatum + startInclusive, endInclusive = endInclusive, startInclusive + } + + makeKeyFromRow := func(r rowenc.EncDatumRow, l int) (k roachpb.Key, cn bool, e error) { + k, _, cn, e = rowenc.MakeKeyFromEncDatums(r[:l], s.indexColTypes[:l], s.indexColDirs[:l], + s.table, s.index, &s.alloc, s.KeyPrefix) + return + } + + var startKey, endKey roachpb.Key + var startContainsNull, endContainsNull bool + if startDatum != nil { + values[prefixLen-1] = rowenc.EncDatum{Datum: startDatum} + startKey, startContainsNull, err = makeKeyFromRow(values, prefixLen) + if !startInclusive { + startKey = startKey.Next() + } + } else { + startKey, startContainsNull, err = makeKeyFromRow(values, prefixLen-1) + // If we have a ascending index make sure not to include NULLs. + if s.indexColDirs[prefixLen-1] == descpb.IndexDescriptor_ASC { + startKey = encoding.EncodeNullAscending(startKey) + } + startKey = startKey.Next() + } + + if err != nil { + return roachpb.Span{}, false, err + } + + if endDatum != nil { + values[prefixLen-1] = rowenc.EncDatum{Datum: endDatum} + endKey, endContainsNull, err = makeKeyFromRow(values, prefixLen) + if endInclusive { + endKey = endKey.PrefixEnd() + } + } else { + endKey, endContainsNull, err = makeKeyFromRow(values, prefixLen-1) + // If we have a descending index make sure not to include NULLs. + if s.indexColDirs[prefixLen-1] == descpb.IndexDescriptor_DESC { + endKey = encoding.EncodeNullDescending(endKey) + } else { + endKey = endKey.PrefixEnd() + } + } + + if err != nil { + return roachpb.Span{}, false, err + } + + return roachpb.Span{Key: startKey, EndKey: endKey}, startContainsNull || endContainsNull, nil +} + // SpanFromDatumRow generates an index span with prefixLen constraint columns from the index. // SpanFromDatumRow assumes that values is a valid table row for the Builder's table. // It also returns whether or not the input values contain a null value or not, which can be diff --git a/vendor b/vendor index dbbaf01b648e..2d407144afe7 160000 --- a/vendor +++ b/vendor @@ -1 +1 @@ -Subproject commit dbbaf01b648ec01a5f772cbf18b3078b3e3c2f07 +Subproject commit 2d407144afe7e1fd2e9c04fe8af0509dcb007e4f