Skip to content

Commit

Permalink
Merge pull request #99323 from jordanlewis/backport23.1-97685-97697
Browse files Browse the repository at this point in the history
release-23.1: sql,tsearch: add default_text_search_config and ts_rank
  • Loading branch information
jordanlewis authored Mar 23, 2023
2 parents 6f9c186 + da1f1ef commit 39a47ac
Show file tree
Hide file tree
Showing 19 changed files with 700 additions and 26 deletions.
26 changes: 21 additions & 5 deletions docs/generated/sql/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -888,16 +888,32 @@ available replica will error.</p>
<table>
<thead><tr><th>Function &rarr; Returns</th><th>Description</th><th>Volatility</th></tr></thead>
<tbody>
<tr><td><a name="phraseto_tsquery"></a><code>phraseto_tsquery(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts text to a tsquery, normalizing words according to the specified or default configuration. The &lt;-&gt; operator is inserted between each token in the input.</p>
<tr><td><a name="phraseto_tsquery"></a><code>phraseto_tsquery(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts text to a tsquery, normalizing words according to the specified configuration. The &lt;-&gt; operator is inserted between each token in the input.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="plainto_tsquery"></a><code>plainto_tsquery(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts text to a tsquery, normalizing words according to the specified or default configuration. The &amp; operator is inserted between each token in the input.</p>
<tr><td><a name="phraseto_tsquery"></a><code>phraseto_tsquery(text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts text to a tsquery, normalizing words according to the default configuration. The &lt;-&gt; operator is inserted between each token in the input.</p>
</span></td><td>Stable</td></tr>
<tr><td><a name="plainto_tsquery"></a><code>plainto_tsquery(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts text to a tsquery, normalizing words according to the specified configuration. The &amp; operator is inserted between each token in the input.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="to_tsquery"></a><code>to_tsquery(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts the input text into a tsquery by normalizing each word in the input according to the specified or default configuration. The input must already be formatted like a tsquery, in other words, subsequent tokens must be connected by a tsquery operator (&amp;, |, &lt;-&gt;, !).</p>
<tr><td><a name="plainto_tsquery"></a><code>plainto_tsquery(text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts text to a tsquery, normalizing words according to the default configuration. The &amp; operator is inserted between each token in the input.</p>
</span></td><td>Stable</td></tr>
<tr><td><a name="to_tsquery"></a><code>to_tsquery(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts the input text into a tsquery by normalizing each word in the input according to the specified configuration. The input must already be formatted like a tsquery, in other words, subsequent tokens must be connected by a tsquery operator (&amp;, |, &lt;-&gt;, !).</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="to_tsvector"></a><code>to_tsvector(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsvector</code></td><td><span class="funcdesc"><p>Converts text to a tsvector, normalizing words according to the specified or default configuration. Position information is included in the result.</p>
<tr><td><a name="to_tsquery"></a><code>to_tsquery(text: <a href="string.html">string</a>) &rarr; tsquery</code></td><td><span class="funcdesc"><p>Converts the input text into a tsquery by normalizing each word in the input according to the default configuration. The input must already be formatted like a tsquery, in other words, subsequent tokens must be connected by a tsquery operator (&amp;, |, &lt;-&gt;, !).</p>
</span></td><td>Stable</td></tr>
<tr><td><a name="to_tsvector"></a><code>to_tsvector(config: <a href="string.html">string</a>, text: <a href="string.html">string</a>) &rarr; tsvector</code></td><td><span class="funcdesc"><p>Converts text to a tsvector, normalizing words according to the specified configuration. Position information is included in the result.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="to_tsvector"></a><code>to_tsvector(text: <a href="string.html">string</a>) &rarr; tsvector</code></td><td><span class="funcdesc"><p>Converts text to a tsvector, normalizing words according to the default configuration. Position information is included in the result.</p>
</span></td><td>Stable</td></tr>
<tr><td><a name="ts_parse"></a><code>ts_parse(parser_name: <a href="string.html">string</a>, document: <a href="string.html">string</a>) &rarr; tuple{int AS tokid, string AS token}</code></td><td><span class="funcdesc"><p>ts_parse parses the given document and returns a series of records, one for each token produced by parsing. Each record includes a tokid showing the assigned token type and a token which is the text of the token.</p>
</span></td><td>Stable</td></tr></tbody>
</span></td><td>Stable</td></tr>
<tr><td><a name="ts_rank"></a><code>ts_rank(vector: tsvector, query: tsquery) &rarr; float4</code></td><td><span class="funcdesc"><p>Ranks vectors based on the frequency of their matching lexemes.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="ts_rank"></a><code>ts_rank(vector: tsvector, query: tsquery, normalization: <a href="int.html">int</a>) &rarr; float4</code></td><td><span class="funcdesc"><p>Ranks vectors based on the frequency of their matching lexemes.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="ts_rank"></a><code>ts_rank(weights: <a href="float.html">float</a>[], vector: tsvector, query: tsquery) &rarr; float4</code></td><td><span class="funcdesc"><p>Ranks vectors based on the frequency of their matching lexemes.</p>
</span></td><td>Immutable</td></tr>
<tr><td><a name="ts_rank"></a><code>ts_rank(weights: <a href="float.html">float</a>[], vector: tsvector, query: tsquery, normalization: <a href="int.html">int</a>) &rarr; float4</code></td><td><span class="funcdesc"><p>Ranks vectors based on the frequency of their matching lexemes.</p>
</span></td><td>Immutable</td></tr></tbody>
</table>

### Fuzzy String Matching functions
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ go_library(
"//pkg/util/tracing",
"//pkg/util/tracing/collector",
"//pkg/util/tracing/tracingpb",
"//pkg/util/tsearch",
"//pkg/util/uint128",
"//pkg/util/uuid",
"@com_github_cockroachdb_apd_v3//:apd",
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3490,6 +3490,10 @@ func (m *sessionDataMutator) SetAllowRoleMembershipsToChangeDuringTransaction(va
m.data.AllowRoleMembershipsToChangeDuringTransaction = val
}

func (m *sessionDataMutator) SetDefaultTextSearchConfig(val string) {
m.data.DefaultTextSearchConfig = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -5196,6 +5196,7 @@ declare_cursor_statement_timeout_enabled on
default_int_size 8
default_table_access_method heap
default_tablespace ·
default_text_search_config pg_catalog.english
default_transaction_isolation serializable
default_transaction_priority normal
default_transaction_quality_of_service regular
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2685,6 +2685,7 @@ declare_cursor_statement_timeout_enabled on NULL
default_int_size 8 NULL NULL NULL string
default_table_access_method heap NULL NULL NULL string
default_tablespace · NULL NULL NULL string
default_text_search_config pg_catalog.english NULL NULL NULL string
default_transaction_isolation serializable NULL NULL NULL string
default_transaction_priority normal NULL NULL NULL string
default_transaction_quality_of_service regular NULL NULL NULL string
Expand Down Expand Up @@ -2835,6 +2836,7 @@ declare_cursor_statement_timeout_enabled on NULL
default_int_size 8 NULL user NULL 8 8
default_table_access_method heap NULL user NULL heap heap
default_tablespace · NULL user NULL · ·
default_text_search_config pg_catalog.english NULL user NULL pg_catalog.english pg_catalog.english
default_transaction_isolation serializable NULL user NULL default default
default_transaction_priority normal NULL user NULL normal normal
default_transaction_quality_of_service regular NULL user NULL regular regular
Expand Down Expand Up @@ -2981,6 +2983,7 @@ declare_cursor_statement_timeout_enabled NULL NULL NULL
default_int_size NULL NULL NULL NULL NULL
default_table_access_method NULL NULL NULL NULL NULL
default_tablespace NULL NULL NULL NULL NULL
default_text_search_config NULL NULL NULL NULL NULL
default_transaction_isolation NULL NULL NULL NULL NULL
default_transaction_priority NULL NULL NULL NULL NULL
default_transaction_quality_of_service NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ declare_cursor_statement_timeout_enabled on
default_int_size 8
default_table_access_method heap
default_tablespace ·
default_text_search_config pg_catalog.english
default_transaction_isolation serializable
default_transaction_priority normal
default_transaction_quality_of_service regular
Expand Down
59 changes: 59 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/tsvector
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,62 @@ SELECT * FROM to_tsquery('simple', 'a | b & c <-> d')

query error syntax
SELECT * FROM to_tsquery('simple', 'Hello this is a parsi-ng t.est 1.234 4 case324')

# Test default variants of the to_ts* functions.

query T
SHOW default_text_search_config
----
pg_catalog.english

query T
SELECT to_tsvector('Hello I am a potato')
----
'hello':1 'potato':5

statement error text search configuration \"blah\" does not exist
SET default_text_search_config = 'blah'

statement ok
SET default_text_search_config = 'spanish'

query T
SELECT to_tsvector('Hello I am a potato')
----
'am':3 'hell':1 'i':2 'potat':5

query TT
SELECT to_tsvector('english', ''), to_tsvector('english', 'and the')
----
· ·

statement error doesn't contain lexemes
SELECT to_tsquery('english', 'the')

statement ok
CREATE TABLE sentences (sentence text, v TSVECTOR AS (to_tsvector('english', sentence)) STORED, INVERTED INDEX (v));
INSERT INTO sentences VALUES
('Future users of large data banks must be protected from having to know how the data is organized in the machine (the internal representation).'),
('A prompting service which supplies such information is not a satisfactory solution.'),
('Activities of users at terminals and most application programs should remain unaffected when the internal representation of data is changed and even when some aspects of the external representation
are changed.'),
('Changes in data representation will often be needed as a result of changes in query, update, and report traffic and natural growth in the types of stored information.'),
('Existing noninferential, formatted data systems provide users with tree-structured files or slightly more general network models of the data.'),
('In Section 1, inadequacies of these models are discussed.'),
('A model based on n-ary relations, a normal form for data base relations, and the concept of a universal data sublanguage are introduced.'),
('In Section 2, certain operations on relations (other than logical inference) are discussed and applied to the problems of redundancy and consistency in the user’s model.')

query FFFFT
SELECT
ts_rank(v, query) AS rank,
ts_rank(ARRAY[0.2, 0.3, 0.5, 0.9]:::FLOAT[], v, query) AS wrank,
ts_rank(v, query, 2|8) AS nrank,
ts_rank(ARRAY[0.3, 0.4, 0.6, 0.95]:::FLOAT[], v, query, 1|2|4|8|16|32) AS wnrank,
v
FROM sentences, to_tsquery('english', 'relation') query
WHERE query @@ v
ORDER BY rank DESC
LIMIT 10
----
0.075990885 0.15198177 0.00042217158 8.555783e-05 'ari':6 'base':3,13 'concept':17 'data':12,21 'form':10 'introduc':24 'model':2 'n':5 'normal':9 'relat':7,14 'sublanguag':22 'univers':20
0.06079271 0.12158542 0.0003101669 6.095758e-05 '2':3 'appli':15 'certain':4 'consist':22 'discuss':13 'infer':11 'logic':10 'model':27 'oper':5 'problem':18 'redund':20 'relat':7 'section':2 'user':25
1 change: 0 additions & 1 deletion pkg/sql/sem/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -3765,7 +3765,6 @@ value if you rely on the HLC for accuracy.`,
"jsonb_to_tsvector": makeBuiltin(tree.FunctionProperties{UnsupportedWithIssue: 7821, Category: builtinconstants.CategoryFullTextSearch}),
"ts_delete": makeBuiltin(tree.FunctionProperties{UnsupportedWithIssue: 7821, Category: builtinconstants.CategoryFullTextSearch}),
"ts_filter": makeBuiltin(tree.FunctionProperties{UnsupportedWithIssue: 7821, Category: builtinconstants.CategoryFullTextSearch}),
"ts_rank": makeBuiltin(tree.FunctionProperties{UnsupportedWithIssue: 7821, Category: builtinconstants.CategoryFullTextSearch}),
"ts_rank_cd": makeBuiltin(tree.FunctionProperties{UnsupportedWithIssue: 7821, Category: builtinconstants.CategoryFullTextSearch}),
"ts_rewrite": makeBuiltin(tree.FunctionProperties{UnsupportedWithIssue: 7821, Category: builtinconstants.CategoryFullTextSearch}),
"tsquery_phrase": makeBuiltin(tree.FunctionProperties{UnsupportedWithIssue: 7821, Category: builtinconstants.CategoryFullTextSearch}),
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/sem/builtins/fixed_oids.go
Original file line number Diff line number Diff line change
Expand Up @@ -2369,6 +2369,14 @@ var builtinOidsArray = []string{
2395: `array_cat_agg(arg1: anyenum[]) -> anyenum[]`,
2396: `array_cat_agg(arg1: tuple[]) -> tuple[]`,
2397: `crdb_internal.update_tenant_resource_limits(tenant_name: string, available_request_units: float, refill_rate: float, max_burst_request_units: float, as_of: timestamp, as_of_consumed_request_units: float) -> int`,
2398: `to_tsquery(text: string) -> tsquery`,
2399: `to_tsvector(text: string) -> tsvector`,
2400: `phraseto_tsquery(text: string) -> tsquery`,
2401: `plainto_tsquery(text: string) -> tsquery`,
2402: `ts_rank(weights: float[], vector: tsvector, query: tsquery, normalization: int) -> float4`,
2403: `ts_rank(vector: tsvector, query: tsquery, normalization: int) -> float4`,
2404: `ts_rank(vector: tsvector, query: tsquery) -> float4`,
2405: `ts_rank(weights: float[], vector: tsvector, query: tsquery) -> float4`,
}

var builtinOidsBySignature map[string]oid.Oid
Expand Down
Loading

0 comments on commit 39a47ac

Please sign in to comment.