Skip to content

Add support for full text queries and hybrid search queries #303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
9500853
wip: adding Text and Hybrid queries
justin-cechmanek Mar 24, 2025
3fa93ff
tokenizer helper function
rbs333 Mar 24, 2025
3b8e2b6
adds TextQuery class
justin-cechmanek Mar 26, 2025
7e0f24d
adds nltk requirement
justin-cechmanek Mar 27, 2025
49b2aba
makes stopwords user defined in TextQuery
justin-cechmanek Mar 27, 2025
f7a4b9e
adds hybrid aggregation query and tests. modifies search index to acc…
justin-cechmanek Apr 1, 2025
6e007f7
Validate passed-in Redis clients (#296)
abrookins Mar 21, 2025
298d055
Add batch_search to sync Index (#305)
abrookins Mar 29, 2025
94eea52
Support client-side schema validation using Pydantic (#304)
tylerhutcherson Mar 31, 2025
123ee22
Run API tests once (#306)
abrookins Mar 31, 2025
9025bfe
Add option to normalize vector distances on query (#298)
rbs333 Mar 31, 2025
ae69ae9
adds TextQuery class
justin-cechmanek Mar 26, 2025
e403934
makes stopwords user defined in TextQuery
justin-cechmanek Mar 27, 2025
9348583
adds hybrid aggregation query and tests. modifies search index to acc…
justin-cechmanek Apr 1, 2025
10f4474
cleans text and hybrid tests
justin-cechmanek Apr 2, 2025
018fe9f
merge conflicts
justin-cechmanek Apr 2, 2025
3518121
updates lock file
justin-cechmanek Apr 2, 2025
091148c
mypy cannot find defined methods
justin-cechmanek Apr 2, 2025
9069dd5
updates nltk requirement
justin-cechmanek Apr 2, 2025
c5ad696
I swear I have changed this 4 times now
justin-cechmanek Apr 2, 2025
ea5d087
wip: debugging aggregations and filters
justin-cechmanek Apr 2, 2025
1672ea3
fixes query string parsing. adds more tests
justin-cechmanek Apr 3, 2025
f32067a
test now checks default dialect is 2
justin-cechmanek Apr 3, 2025
9b1dc18
makes methods private
justin-cechmanek Apr 3, 2025
ff44041
abstracts AggregationQuery to follow BaseQuery calls in search index
justin-cechmanek Apr 3, 2025
c0be24f
updates docstrings
justin-cechmanek Apr 3, 2025
aae3949
fixes docstring
justin-cechmanek Apr 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
555 changes: 242 additions & 313 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ tenacity = ">=8.2.2"
tabulate = "^0.9.0"
ml-dtypes = "^0.4.0"
python-ulid = "^3.0.0"
nltk = { version = "^3.8.1", optional = true }
jsonpath-ng = "^1.5.0"

openai = { version = "^1.13.0", optional = true }
sentence-transformers = { version = "^3.4.0", optional = true }
scipy = [
Expand All @@ -58,6 +58,7 @@ mistralai = ["mistralai"]
voyageai = ["voyageai"]
ranx = ["ranx"]
bedrock = ["boto3"]
nltk = ["nltk"]

[tool.poetry.group.dev.dependencies]
black = "^25.1.0"
Expand Down
88 changes: 78 additions & 10 deletions redisvl/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Union,
)

from redisvl.redis.utils import convert_bytes, make_dict
from redisvl.utils.utils import deprecated_argument, deprecated_function, sync_wrapper

if TYPE_CHECKING:
Expand All @@ -39,7 +40,14 @@
SchemaValidationError,
)
from redisvl.index.storage import BaseStorage, HashStorage, JsonStorage
from redisvl.query import BaseQuery, BaseVectorQuery, CountQuery, FilterQuery
from redisvl.query import (
AggregationQuery,
BaseQuery,
BaseVectorQuery,
CountQuery,
FilterQuery,
HybridQuery,
)
from redisvl.query.filter import FilterExpression
from redisvl.redis.connection import (
RedisConnectionFactory,
Expand Down Expand Up @@ -138,6 +146,34 @@ def _process(doc: "Document") -> Dict[str, Any]:
return [_process(doc) for doc in results.docs]


def process_aggregate_results(
results: "AggregateResult", query: AggregationQuery, storage_type: StorageType
) -> List[Dict[str, Any]]:
"""Convert an aggregate reslt object into a list of document dictionaries.

This function processes results from Redis, handling different storage
types and query types. For JSON storage with empty return fields, it
unpacks the JSON object while retaining the document ID. The 'payload'
field is also removed from all resulting documents for consistency.

Args:
results (AggregarteResult): The aggregart results from Redis.
query (AggregationQuery): The aggregation query object used for the aggregation.
storage_type (StorageType): The storage type of the search
index (json or hash).

Returns:
List[Dict[str, Any]]: A list of processed document dictionaries.
"""

def _process(row):
result = make_dict(convert_bytes(row))
result.pop("__score", None)
return result

return [_process(r) for r in results.rows]


class BaseSearchIndex:
"""Base search engine class"""

Expand Down Expand Up @@ -650,6 +686,17 @@ def fetch(self, id: str) -> Optional[Dict[str, Any]]:
return convert_bytes(obj[0])
return None

def _aggregate(self, aggregation_query: AggregationQuery) -> List[Dict[str, Any]]:
"""Execute an aggretation query and processes the results."""
results = self.aggregate(
aggregation_query, query_params=aggregation_query.params # type: ignore[attr-defined]
)
return process_aggregate_results(
results,
query=aggregation_query,
storage_type=self.schema.index.storage_type,
)

def aggregate(self, *args, **kwargs) -> "AggregateResult":
"""Perform an aggregation operation against the index.

Expand Down Expand Up @@ -772,14 +819,14 @@ def _query(self, query: BaseQuery) -> List[Dict[str, Any]]:
results = self.search(query.query, query_params=query.params)
return process_results(results, query=query, schema=self.schema)

def query(self, query: BaseQuery) -> List[Dict[str, Any]]:
def query(self, query: Union[BaseQuery, AggregationQuery]) -> List[Dict[str, Any]]:
"""Execute a query on the index.

This method takes a BaseQuery object directly, runs the search, and
This method takes a BaseQuery or AggregationQuery object directly, and
handles post-processing of the search.

Args:
query (BaseQuery): The query to run.
query (Union[BaseQuery, AggregateQuery]): The query to run.

Returns:
List[Result]: A list of search results.
Expand All @@ -797,7 +844,10 @@ def query(self, query: BaseQuery) -> List[Dict[str, Any]]:
results = index.query(query)

"""
return self._query(query)
if isinstance(query, AggregationQuery):
return self._aggregate(query)
else:
return self._query(query)

def paginate(self, query: BaseQuery, page_size: int = 30) -> Generator:
"""Execute a given query against the index and return results in
Expand Down Expand Up @@ -1303,6 +1353,19 @@ async def fetch(self, id: str) -> Optional[Dict[str, Any]]:
return convert_bytes(obj[0])
return None

async def _aggregate(
self, aggregation_query: AggregationQuery
) -> List[Dict[str, Any]]:
"""Execute an aggretation query and processes the results."""
results = await self.aggregate(
aggregation_query, query_params=aggregation_query.params # type: ignore[attr-defined]
)
return process_aggregate_results(
results,
query=aggregation_query,
storage_type=self.schema.index.storage_type,
)

async def aggregate(self, *args, **kwargs) -> "AggregateResult":
"""Perform an aggregation operation against the index.

Expand Down Expand Up @@ -1426,14 +1489,16 @@ async def _query(self, query: BaseQuery) -> List[Dict[str, Any]]:
results = await self.search(query.query, query_params=query.params)
return process_results(results, query=query, schema=self.schema)

async def query(self, query: BaseQuery) -> List[Dict[str, Any]]:
async def query(
self, query: Union[BaseQuery, AggregationQuery]
) -> List[Dict[str, Any]]:
"""Asynchronously execute a query on the index.

This method takes a BaseQuery object directly, runs the search, and
handles post-processing of the search.
This method takes a BaseQuery or AggregationQuery object directly, runs
the search, and handles post-processing of the search.

Args:
query (BaseQuery): The query to run.
query (Union[BaseQuery, AggregateQuery]): The query to run.

Returns:
List[Result]: A list of search results.
Expand All @@ -1450,7 +1515,10 @@ async def query(self, query: BaseQuery) -> List[Dict[str, Any]]:

results = await index.query(query)
"""
return await self._query(query)
if isinstance(query, AggregationQuery):
return await self._aggregate(query)
else:
return await self._query(query)

async def paginate(self, query: BaseQuery, page_size: int = 30) -> AsyncGenerator:
"""Execute a given query against the index and return results in
Expand Down
5 changes: 5 additions & 0 deletions redisvl/query/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from redisvl.query.aggregate import AggregationQuery, HybridQuery
from redisvl.query.query import (
BaseQuery,
BaseVectorQuery,
CountQuery,
FilterQuery,
RangeQuery,
TextQuery,
VectorQuery,
VectorRangeQuery,
)
Expand All @@ -16,4 +18,7 @@
"RangeQuery",
"VectorRangeQuery",
"CountQuery",
"TextQuery",
"AggregationQuery",
"HybridQuery",
]
Loading