Skip to content

Commit

Permalink
sql: introduce array_cat_agg aggregate builtin
Browse files Browse the repository at this point in the history
This commit introduces a new `array_cat_agg` aggregate builtin function
that takes in an array type as its input, and then unnests each array
and appends all its elements into a single result array. In other
words, it behaves similar to `array_agg(unnest(array_column))`. This
function doesn't have an analogue in Postgres. However, some of our SQL
observability tools need this functionality, and the current workaround
of using a LATERAL JOIN often results in slow apply joins, so this new
builtin should speed things up significantly. In particular,
`crdb_internal.statement_statistics` view is now refactored to use the
new builtin which removes an apply join from it. The choice of this
particular name comes from the fact that we have the `array_cat` builtin
which concatenates two arrays.

Release note (sql change): New aggregate builtin function
`array_cat_agg` is introduced. It behaves similar to how
`array_agg(unnest(array_column))` would - namely, it takes arrays as its
input, unnests them into the array elements which are then aggregated
into a single result array (i.e. it's similar to concatenating all input
arrays into a single one).
  • Loading branch information
yuzefovich committed Mar 7, 2023
1 parent ce79347 commit 02fb46e
Show file tree
Hide file tree
Showing 19 changed files with 653 additions and 359 deletions.
44 changes: 44 additions & 0 deletions docs/generated/sql/aggregates.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,50 @@
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_agg"></a><code>array_agg(arg1: varbit) &rarr; varbit[]</code></td><td><span class="funcdesc"><p>Aggregates the selected values into an array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="bool.html">bool</a>[]) &rarr; <a href="bool.html">bool</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="bytes.html">bytes</a>[]) &rarr; <a href="bytes.html">bytes</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="date.html">date</a>[]) &rarr; <a href="date.html">date</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="decimal.html">decimal</a>[]) &rarr; <a href="decimal.html">decimal</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="float.html">float</a>[]) &rarr; <a href="float.html">float</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="inet.html">inet</a>[]) &rarr; <a href="inet.html">inet</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="int.html">int</a>[]) &rarr; <a href="int.html">int</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="interval.html">interval</a>[]) &rarr; <a href="interval.html">interval</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="string.html">string</a>[]) &rarr; <a href="string.html">string</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="time.html">time</a>[]) &rarr; <a href="time.html">time</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="timestamp.html">timestamp</a>[]) &rarr; <a href="timestamp.html">timestamp</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="timestamp.html">timestamptz</a>[]) &rarr; <a href="timestamp.html">timestamptz</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: <a href="uuid.html">uuid</a>[]) &rarr; <a href="uuid.html">uuid</a>[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: anyenum[]) &rarr; anyenum[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: box2d[]) &rarr; box2d[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: geography[]) &rarr; geography[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: geometry[]) &rarr; geometry[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: jsonb[]) &rarr; jsonb[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: oid[]) &rarr; oid[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: timetz[]) &rarr; timetz[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: tuple[]) &rarr; tuple[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="array_cat_agg"></a><code>array_cat_agg(arg1: varbit[]) &rarr; varbit[]</code></td><td><span class="funcdesc"><p>Unnests the selected arrays into elements that are then aggregated into a single array.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="avg"></a><code>avg(arg1: <a href="decimal.html">decimal</a>) &rarr; <a href="decimal.html">decimal</a></code></td><td><span class="funcdesc"><p>Calculates the average of the selected values.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="avg"></a><code>avg(arg1: <a href="float.html">float</a>) &rarr; <a href="float.html">float</a></code></td><td><span class="funcdesc"><p>Calculates the average of the selected values.</p>
Expand Down
3 changes: 1 addition & 2 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -5723,7 +5723,7 @@ SELECT
crdb_internal.merge_statement_stats(array_agg(DISTINCT statistics)),
max(sampled_plan),
aggregation_interval,
array_remove(array_agg(index_rec), NULL) AS index_recommendations
array_remove(array_cat_agg(index_recommendations), NULL) AS index_recommendations
FROM (
SELECT
aggregated_ts,
Expand Down Expand Up @@ -5753,7 +5753,6 @@ FROM (
FROM
system.statement_statistics
)
LEFT JOIN LATERAL unnest(index_recommendations) AS index_rec ON true
GROUP BY
aggregated_ts,
fingerprint_id,
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/distsql/columnar_operators_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ var aggregateFuncToNumArguments = map[execinfrapb.AggregatorSpec_Func]int{
execinfrapb.FinalCovarSamp: 1,
execinfrapb.FinalCorr: 1,
execinfrapb.FinalSqrdiff: 3,
execinfrapb.ArrayCatAgg: 1,
}

// TestAggregateFuncToNumArguments ensures that all aggregate functions are
Expand Down Expand Up @@ -261,7 +262,8 @@ func TestAggregatorAgainstProcessor(t *testing.T) {
execinfrapb.StExtent,
execinfrapb.StUnion,
execinfrapb.StCollect,
execinfrapb.ArrayAgg:
execinfrapb.ArrayAgg,
execinfrapb.ArrayCatAgg:
for _, typ := range aggFnInputTypes {
if typ.Family() == types.TupleFamily || (typ.Family() == types.ArrayFamily && typ.ArrayContents().Family() == types.TupleFamily) {
invalid = true
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/execinfrapb/aggregate_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,5 @@ const (
FinalCovarSamp = AggregatorSpec_FINAL_COVAR_SAMP
FinalCorr = AggregatorSpec_FINAL_CORR
FinalSqrdiff = AggregatorSpec_FINAL_SQRDIFF
ArrayCatAgg = AggregatorSpec_ARRAY_CAT_AGG
)
1 change: 1 addition & 0 deletions pkg/sql/execinfrapb/processors_sql.proto
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,7 @@ message AggregatorSpec {
FINAL_COVAR_SAMP = 58;
FINAL_CORR = 59;
FINAL_SQRDIFF = 60;
ARRAY_CAT_AGG = 61;
}

enum Type {
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/function_resolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ CREATE FUNCTION sc1.lower(a STRING) RETURNS STRING IMMUTABLE LANGUAGE SQL AS $$
exprStr: "lower('HI')",
searchPath: []string{"sc1", "sc2"},
expectedFuncBody: "",
expectedFuncOID: 831,
expectedFuncOID: 853,
desiredType: types.String,
},
{
Expand Down
82 changes: 69 additions & 13 deletions pkg/sql/logictest/testdata/logic_test/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ subtest other

statement ok
CREATE TABLE kv (
k INT PRIMARY KEY,
v INT,
w INT,
s STRING,
i INTERVAL
k INT PRIMARY KEY,
v INT,
w INT,
s STRING,
i INTERVAL,
arr INT[]
)

# Aggregate functions return NULL if there are no rows.
Expand All @@ -31,6 +32,11 @@ SELECT array_agg(1) FROM kv
----
NULL

query T
SELECT array_cat_agg('{1}'::INT[]) FROM kv
----
NULL

query T
SELECT json_agg(1) FROM kv
----
Expand Down Expand Up @@ -60,6 +66,11 @@ SELECT array_agg(v) FROM kv
----
NULL

query T
SELECT array_cat_agg(arr) FROM kv
----
NULL

query T
SELECT json_agg(v) FROM kv
----
Expand All @@ -85,6 +96,17 @@ SELECT array_agg(1)
----
{1}

query T
SELECT array_cat_agg('{1}'::INT[])
----
{1}

# Array-concatenating empty arrays results in an empty array.
query T
SELECT array_cat_agg(arr) FROM [SELECT ('{}'::INT[]) AS arr FROM generate_series(1, 3)];
----
{}

query T
SELECT json_agg(1)
----
Expand Down Expand Up @@ -164,12 +186,12 @@ SELECT (SELECT COALESCE(max(1), 0) FROM generate_series(1,0))

statement OK
INSERT INTO kv VALUES
(1, 2, 3, 'a', '1min'),
(3, 4, 5, 'a', '2sec'),
(5, NULL, 5, NULL, NULL),
(6, 2, 3, 'b', '1ms'),
(7, 2, 2, 'b', '4 days'),
(8, 4, 2, 'A', '3 years')
(1, 2, 3, 'a', '1min', '{1, 2, NULL}'),
(3, 4, 5, 'a', '2sec', '{3, 4, 5}'),
(5, NULL, 5, NULL, NULL, NULL),
(6, 2, 3, 'b', '1ms', '{6, 2, 3}'),
(7, 2, 2, 'b', '4 days', '{7, 2, 2}'),
(8, 4, 2, 'A', '3 years', '{NULL, 4, 2}')

# Aggregate functions triggers aggregation and computation for every row even when applied to a constant.
# NB: The XOR result is 00 because \x01 is XOR'd an even number of times.
Expand All @@ -186,6 +208,11 @@ SELECT array_agg(1) FROM kv
----
{1,1,1,1,1,1}

query T
SELECT array_cat_agg('{1, 2}'::INT[]) FROM kv
----
{1,2,1,2,1,2,1,2,1,2,1,2}

query T
SELECT json_agg(1) FROM kv
----
Expand Down Expand Up @@ -594,11 +621,26 @@ SELECT array_agg(k) || 1 FROM (SELECT k FROM kv ORDER BY k)
----
{1,3,5,6,7,8,1}

query T
SELECT array_cat_agg(arr) FROM (SELECT arr FROM kv ORDER BY k)
----
{1,2,NULL,3,4,5,6,2,3,7,2,2,NULL,4,2}

query T
SELECT array_agg(s) FROM kv WHERE s IS NULL
----
{NULL}

query T
SELECT array_cat_agg(arr) FROM kv WHERE arr IS NULL
----
NULL

query TTT
SELECT array_cat_agg(arr ORDER BY k), array_cat_agg(NULL::INT[]), array_cat_agg('{NULL, NULL}'::INT[]) FROM kv WHERE arr IS NOT NULL
----
{1,2,NULL,3,4,5,6,2,3,7,2,2,NULL,4,2} NULL {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}

query T
SELECT json_agg(s) FROM kv WHERE s IS NULL
----
Expand Down Expand Up @@ -2866,7 +2908,8 @@ statement ok
CREATE TABLE tab (
col1 INT PRIMARY KEY,
col2 INT,
col3 STRING
col3 STRING,
arr INT[]
)

# Ordered aggregations when there are no rows.
Expand All @@ -2876,20 +2919,33 @@ SELECT array_agg(col1 ORDER BY col2) FROM TAB
NULL

statement ok
INSERT INTO tab VALUES (-3, 7, 'a'), (-2, 6, 'a'), (-1, 5, 'a'), (0, 7, 'b'), (1, 5, 'b'), (2, 6, 'b')
INSERT INTO tab VALUES (-3, 7, 'a', '{-3, 7}'), (-2, 6, 'a', '{-2, 6}'), (-1, 5, 'a', '{-1, 5}'),
(0, 7, 'b', '{0, 7}'), (1, 5, 'b', '{1, 5}'), (2, 6, 'b', '{2, 6}')

query T colnames
SELECT array_agg(col1 ORDER BY col1) FROM tab
----
array_agg
{-3,-2,-1,0,1,2}

query T colnames
SELECT array_cat_agg(arr ORDER BY col1) FROM tab
----
array_cat_agg
{-3,7,-2,6,-1,5,0,7,1,5,2,6}

query T colnames
SELECT array_agg(col1 ORDER BY col2*100+col1) FROM tab
----
array_agg
{-1,1,-2,2,-3,0}

query T colnames
SELECT array_cat_agg(arr ORDER BY col2*100+col1) FROM tab
----
array_cat_agg
{-1,5,1,5,-2,6,2,6,-3,7,0,7}

query T colnames
SELECT json_agg(col1 ORDER BY col1) FROM tab
----
Expand Down
Loading

0 comments on commit 02fb46e

Please sign in to comment.