Skip to content

Commit 4f9801c

Browse files
committed
Support missing and empty values in search (#3231)
Add support for indexing and searching missing and empty values. Currently there are some limitation from the server side, for example empty values are supported only for TEXT and TAG fields.
1 parent 9738105 commit 4f9801c

File tree

3 files changed

+205
-44
lines changed

3 files changed

+205
-44
lines changed

redis/commands/search/commands.py

+34-33
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from ._util import to_string
1010
from .aggregation import AggregateRequest, AggregateResult, Cursor
1111
from .document import Document
12+
from .field import Field
13+
from .indexDefinition import IndexDefinition
1214
from .query import Query
1315
from .result import Result
1416
from .suggestion import SuggestionParser
@@ -151,44 +153,43 @@ def batch_indexer(self, chunk_size=100):
151153

152154
def create_index(
153155
self,
154-
fields,
155-
no_term_offsets=False,
156-
no_field_flags=False,
157-
stopwords=None,
158-
definition=None,
156+
fields: List[Field],
157+
no_term_offsets: bool = False,
158+
no_field_flags: bool = False,
159+
stopwords: Optional[List[str]] = None,
160+
definition: Optional[IndexDefinition] = None,
159161
max_text_fields=False,
160162
temporary=None,
161-
no_highlight=False,
162-
no_term_frequencies=False,
163-
skip_initial_scan=False,
163+
no_highlight: bool = False,
164+
no_term_frequencies: bool = False,
165+
skip_initial_scan: bool = False,
164166
):
165167
"""
166-
Create the search index. The index must not already exist.
167-
168-
### Parameters:
169-
170-
- **fields**: a list of TextField or NumericField objects
171-
- **no_term_offsets**: If true, we will not save term offsets in
172-
the index
173-
- **no_field_flags**: If true, we will not save field flags that
174-
allow searching in specific fields
175-
- **stopwords**: If not None, we create the index with this custom
176-
stopword list. The list can be empty
177-
- **max_text_fields**: If true, we will encode indexes as if there
178-
were more than 32 text fields which allows you to add additional
179-
fields (beyond 32).
180-
- **temporary**: Create a lightweight temporary index which will
181-
expire after the specified period of inactivity (in seconds). The
182-
internal idle timer is reset whenever the index is searched or added to.
183-
- **no_highlight**: If true, disabling highlighting support.
184-
Also implied by no_term_offsets.
185-
- **no_term_frequencies**: If true, we avoid saving the term frequencies
186-
in the index.
187-
- **skip_initial_scan**: If true, we do not scan and index.
188-
189-
For more information see `FT.CREATE <https://redis.io/commands/ft.create>`_.
190-
""" # noqa
168+
Creates the search index. The index must not already exist.
169+
170+
For more information, see https://redis.io/commands/ft.create/
171+
172+
Args:
173+
fields: A list of Field objects.
174+
no_term_offsets: If `true`, term offsets will not be saved in the index.
175+
no_field_flags: If true, field flags that allow searching in specific fields
176+
will not be saved.
177+
stopwords: If provided, the index will be created with this custom stopword
178+
list. The list can be empty.
179+
definition: If provided, the index will be created with this custom index
180+
definition.
181+
max_text_fields: If true, indexes will be encoded as if there were more than
182+
32 text fields, allowing for additional fields beyond 32.
183+
temporary: Creates a lightweight temporary index which will expire after the
184+
specified period of inactivity. The internal idle timer is reset
185+
whenever the index is searched or added to.
186+
no_highlight: If true, disables highlighting support. Also implied by
187+
`no_term_offsets`.
188+
no_term_frequencies: If true, term frequencies will not be saved in the
189+
index.
190+
skip_initial_scan: If true, the initial scan and indexing will be skipped.
191191
192+
"""
192193
args = [CREATE_CMD, self.index_name]
193194
if definition is not None:
194195
args += definition.args

redis/commands/search/field.py

+26
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55

66
class Field:
7+
"""
8+
A class representing a field in a document.
9+
"""
10+
711
NUMERIC = "NUMERIC"
812
TEXT = "TEXT"
913
WEIGHT = "WEIGHT"
@@ -14,15 +18,33 @@ class Field:
1418
NOINDEX = "NOINDEX"
1519
AS = "AS"
1620
GEOSHAPE = "GEOSHAPE"
21+
INDEX_MISSING = "INDEXMISSING"
22+
INDEX_EMPTY = "INDEXEMPTY"
1723

1824
def __init__(
1925
self,
2026
name: str,
2127
args: List[str] = None,
2228
sortable: bool = False,
2329
no_index: bool = False,
30+
index_missing: bool = False,
31+
index_empty: bool = False,
2432
as_name: str = None,
2533
):
34+
"""
35+
Create a new field object.
36+
37+
Args:
38+
name: The name of the field.
39+
args:
40+
sortable: If `True`, the field will be sortable.
41+
no_index: If `True`, the field will not be indexed.
42+
index_missing: If `True`, it will be possible to search for documents that
43+
have this field missing.
44+
index_empty: If `True`, it will be possible to search for documents that
45+
have this field empty.
46+
as_name: If provided, this alias will be used for the field.
47+
"""
2648
if args is None:
2749
args = []
2850
self.name = name
@@ -34,6 +56,10 @@ def __init__(
3456
self.args_suffix.append(Field.SORTABLE)
3557
if no_index:
3658
self.args_suffix.append(Field.NOINDEX)
59+
if index_missing:
60+
self.args_suffix.append(Field.INDEX_MISSING)
61+
if index_empty:
62+
self.args_suffix.append(Field.INDEX_EMPTY)
3763

3864
if no_index and not sortable:
3965
raise ValueError("Non-Sortable non-Indexable fields are ignored")

tests/test_search.py

+145-11
Original file line numberDiff line numberDiff line change
@@ -2107,7 +2107,7 @@ def test_geo_params(client):
21072107
params_dict = {"lat": "34.95126", "lon": "29.69465", "radius": 1000, "units": "km"}
21082108
q = Query("@g:[$lon $lat $radius $units]").dialect(2)
21092109
res = client.ft().search(q, query_params=params_dict)
2110-
_assert_geosearch_result(client, res, ["doc1", "doc2", "doc3"])
2110+
_assert_search_result(client, res, ["doc1", "doc2", "doc3"])
21112111

21122112

21132113
@pytest.mark.redismod
@@ -2124,13 +2124,13 @@ def test_geoshapes_query_intersects_and_disjoint(client):
21242124
Query("@g:[intersects $shape]").dialect(3),
21252125
query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"},
21262126
)
2127-
_assert_geosearch_result(client, intersection, ["doc_point2", "doc_polygon1"])
2127+
_assert_search_result(client, intersection, ["doc_point2", "doc_polygon1"])
21282128

21292129
disjunction = client.ft().search(
21302130
Query("@g:[disjoint $shape]").dialect(3),
21312131
query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"},
21322132
)
2133-
_assert_geosearch_result(client, disjunction, ["doc_point1", "doc_polygon2"])
2133+
_assert_search_result(client, disjunction, ["doc_point1", "doc_polygon2"])
21342134

21352135

21362136
@pytest.mark.redismod
@@ -2148,19 +2148,19 @@ def test_geoshapes_query_contains_and_within(client):
21482148
Query("@g:[contains $shape]").dialect(3),
21492149
query_params={"shape": "POINT(25 25)"},
21502150
)
2151-
_assert_geosearch_result(client, contains_a, ["doc_polygon1"])
2151+
_assert_search_result(client, contains_a, ["doc_polygon1"])
21522152

21532153
contains_b = client.ft().search(
21542154
Query("@g:[contains $shape]").dialect(3),
21552155
query_params={"shape": "POLYGON((24 24, 24 26, 25 25, 24 24))"},
21562156
)
2157-
_assert_geosearch_result(client, contains_b, ["doc_polygon1"])
2157+
_assert_search_result(client, contains_b, ["doc_polygon1"])
21582158

21592159
within = client.ft().search(
21602160
Query("@g:[within $shape]").dialect(3),
21612161
query_params={"shape": "POLYGON((15 15, 75 15, 50 70, 20 40, 15 15))"},
21622162
)
2163-
_assert_geosearch_result(client, within, ["doc_point2", "doc_polygon1"])
2163+
_assert_search_result(client, within, ["doc_point2", "doc_polygon1"])
21642164

21652165

21662166
@pytest.mark.redismod
@@ -2324,19 +2324,153 @@ def test_geoshape(client: redis.Redis):
23242324
q2 = Query("@geom:[CONTAINS $poly]").dialect(3)
23252325
qp2 = {"poly": "POLYGON((2 2, 2 50, 50 50, 50 2, 2 2))"}
23262326
result = client.ft().search(q1, query_params=qp1)
2327-
_assert_geosearch_result(client, result, ["small"])
2327+
_assert_search_result(client, result, ["small"])
23282328
result = client.ft().search(q2, query_params=qp2)
2329-
_assert_geosearch_result(client, result, ["small", "large"])
2329+
_assert_search_result(client, result, ["small", "large"])
23302330

23312331

2332-
def _assert_geosearch_result(client, result, expected_doc_ids):
2332+
@pytest.mark.redismod
2333+
def test_search_missing_fields(client):
2334+
definition = IndexDefinition(prefix=["property:"], index_type=IndexType.HASH)
2335+
2336+
fields = [
2337+
TextField("title", sortable=True),
2338+
TagField("features", index_missing=True),
2339+
TextField("description", index_missing=True),
2340+
]
2341+
2342+
client.ft().create_index(fields, definition=definition)
2343+
2344+
# All fields present
2345+
client.hset(
2346+
"property:1",
2347+
mapping={
2348+
"title": "Luxury Villa in Malibu",
2349+
"features": "pool,sea view,modern",
2350+
"description": "A stunning modern villa overlooking the Pacific Ocean.",
2351+
},
2352+
)
2353+
2354+
# Missing features
2355+
client.hset(
2356+
"property:2",
2357+
mapping={
2358+
"title": "Downtown Flat",
2359+
"description": "Modern flat in central Paris with easy access to metro.",
2360+
},
2361+
)
2362+
2363+
# Missing description
2364+
client.hset(
2365+
"property:3",
2366+
mapping={
2367+
"title": "Beachfront Bungalow",
2368+
"features": "beachfront,sun deck",
2369+
},
2370+
)
2371+
2372+
with pytest.raises(redis.exceptions.ResponseError) as e:
2373+
client.ft().search(
2374+
Query("ismissing(@title)").dialect(5).return_field("id").no_content()
2375+
)
2376+
assert "to be defined with 'INDEXMISSING'" in e.value.args[0]
2377+
2378+
res = client.ft().search(
2379+
Query("ismissing(@features)").dialect(5).return_field("id").no_content()
2380+
)
2381+
_assert_search_result(client, res, ["property:2"])
2382+
2383+
res = client.ft().search(
2384+
Query("-ismissing(@features)").dialect(5).return_field("id").no_content()
2385+
)
2386+
_assert_search_result(client, res, ["property:1", "property:3"])
2387+
2388+
res = client.ft().search(
2389+
Query("ismissing(@description)").dialect(5).return_field("id").no_content()
2390+
)
2391+
_assert_search_result(client, res, ["property:3"])
2392+
2393+
res = client.ft().search(
2394+
Query("-ismissing(@description)").dialect(5).return_field("id").no_content()
2395+
)
2396+
_assert_search_result(client, res, ["property:1", "property:2"])
2397+
2398+
2399+
@pytest.mark.redismod
2400+
def test_search_empty_fields(client):
2401+
definition = IndexDefinition(prefix=["property:"], index_type=IndexType.HASH)
2402+
2403+
fields = [
2404+
TextField("title", sortable=True),
2405+
TagField("features", index_empty=True),
2406+
TextField("description", index_empty=True),
2407+
]
2408+
2409+
client.ft().create_index(fields, definition=definition)
2410+
2411+
# All fields present
2412+
client.hset(
2413+
"property:1",
2414+
mapping={
2415+
"title": "Luxury Villa in Malibu",
2416+
"features": "pool,sea view,modern",
2417+
"description": "A stunning modern villa overlooking the Pacific Ocean.",
2418+
},
2419+
)
2420+
2421+
# Empty features
2422+
client.hset(
2423+
"property:2",
2424+
mapping={
2425+
"title": "Downtown Flat",
2426+
"features": "",
2427+
"description": "Modern flat in central Paris with easy access to metro.",
2428+
},
2429+
)
2430+
2431+
# Empty description
2432+
client.hset(
2433+
"property:3",
2434+
mapping={
2435+
"title": "Beachfront Bungalow",
2436+
"features": "beachfront,sun deck",
2437+
"description": "",
2438+
},
2439+
)
2440+
2441+
with pytest.raises(redis.exceptions.ResponseError) as e:
2442+
client.ft().search(
2443+
Query("@title:''").dialect(5).return_field("id").no_content()
2444+
)
2445+
assert "to be defined with `INDEXEMPTY`" in e.value.args[0]
2446+
2447+
res = client.ft().search(
2448+
Query("@features:{ }").dialect(5).return_field("id").no_content()
2449+
)
2450+
_assert_search_result(client, res, ["property:2"])
2451+
2452+
res = client.ft().search(
2453+
Query("-@features:{ }").dialect(5).return_field("id").no_content()
2454+
)
2455+
_assert_search_result(client, res, ["property:1", "property:3"])
2456+
2457+
res = client.ft().search(
2458+
Query("@description:''").dialect(5).return_field("id").no_content()
2459+
)
2460+
_assert_search_result(client, res, ["property:3"])
2461+
2462+
res = client.ft().search(
2463+
Query("-@description:''").dialect(5).return_field("id").no_content()
2464+
)
2465+
_assert_search_result(client, res, ["property:1", "property:2"])
2466+
2467+
2468+
def _assert_search_result(client, result, expected_doc_ids):
23332469
"""
23342470
Make sure the result of a geo search is as expected, taking into account the RESP
23352471
version being used.
23362472
"""
23372473
if is_resp2_connection(client):
23382474
assert set([doc.id for doc in result.docs]) == set(expected_doc_ids)
2339-
assert result.total == len(expected_doc_ids)
23402475
else:
23412476
assert set([doc["id"] for doc in result["results"]]) == set(expected_doc_ids)
2342-
assert result["total_results"] == len(expected_doc_ids)

0 commit comments

Comments
 (0)