Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a support for search (search_after parameter) #859

Merged
merged 3 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
## [Unreleased]
### Added
- Added option to pass custom headers to 'AWSV4SignerAsyncAuth' ([863](https://github.com/opensearch-project/opensearch-py/pull/863))
- Added sync and async sample that uses `search_after` parameter ([859](https://github.com/opensearch-project/opensearch-py/pull/859))
### Updated APIs
### Changed
### Deprecated
Expand Down
112 changes: 112 additions & 0 deletions samples/search/search_after_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

import asyncio
import os

from opensearchpy import AsyncOpenSearch


async def main() -> None:
"""
This sample uses asyncio and AsyncOpenSearch to asynchronously
connect to local OpenSearch cluster, performs a search query on an index,
retrieves the first page of results, and fetches the next page of results
using the search_after parameter.
"""

# connect to OpenSearch
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = AsyncOpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

# create an index
await client.indices.create(index="movies")

try:
# add a large dataset (100 movies)
for i in range(15):
await client.index(
index="movies",
id=i,
body={
"title": f"The Dark Knight {i}",
"director": "Christopher Nolan",
"year": 2008 + i,
},
)

for i in range(95):
await client.index(
index="movies",
id=i + 15,
body={
"title": f"Movie Title {i + 15}",
"director": f"Director {i + 15}",
"year": 1950 + i + 15,
},
)

# refresh the index to make the documents searchable
await client.indices.refresh(index="movies")

# define the search query with sorting and pagination options
search_body = {
"query": {"match": {"title": "dark knight"}},
"sort": [{"year": {"order": "asc"}}],
"size": 10,
}

page = 1
total_hits = 0
while True:
# execute the search
response = await client.search(index="movies", body=search_body)
hits = response["hits"]["hits"]

# break if no more results
if not hits:
break

print(f"\nPage {page}:")

for hit in hits:
print(hit)
total_hits += 1

# get the sort values of the last document for the next page
last_sort = hits[-1]["sort"]
search_body["search_after"] = last_sort
page += 1

print("\nPagination Summary:")
print(f"Total pages: {page - 1}")
print(f"Total hits: {total_hits}")
print(f"Results per page: {search_body['size']}")
finally:
# delete the index
await client.indices.delete(index="movies")
await client.close()


if __name__ == "__main__":
asyncio.run(main())
109 changes: 109 additions & 0 deletions samples/search/search_after_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python

# SPDX-License-Identifier: Apache-2.0
#
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
#
# Modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

import os

from opensearchpy import OpenSearch


def main() -> None:
"""
This sample shows how to use search_after to paginate through the search results.
It performs a search query on an index, retrieves the first page of results,
and then fetches the next page of results using the search_after parameter.
"""

# connect to OpenSearch
host = "localhost"
port = 9200
auth = (
"admin",
os.getenv("OPENSEARCH_PASSWORD", "admin"),
) # For testing only. Don't store credentials in code.

client = OpenSearch(
hosts=[{"host": host, "port": port}],
http_auth=auth,
use_ssl=True,
verify_certs=False,
ssl_show_warn=False,
)

# create an index
client.indices.create(index="movies")

try:
# add a large dataset (100 movies)
for i in range(15):
client.index(
index="movies",
id=i,
body={
"title": f"The Dark Knight {i}",
"director": "Christopher Nolan",
"year": 2008 + i,
},
)

for i in range(95):
client.index(
index="movies",
id=i + 15,
body={
"title": f"Movie Title {i + 15}",
"director": f"Director {i + 15}",
"year": 1950 + i + 15,
},
)

# refresh the index to make the documents searchable
client.indices.refresh(index="movies")

# define the search query with sorting and pagination options
search_body = {
"query": {"match": {"title": "dark knight"}},
"sort": [{"year": {"order": "asc"}}],
"size": 10,
}

page = 1
total_hits = 0
while True:
# execute the search
response = client.search(index="movies", body=search_body)
hits = response["hits"]["hits"]

# break if no more results
if not hits:
break

print(f"\nPage {page}:")

for hit in hits:
print(hit)
total_hits += 1

# get the sort values of the last document for the next page
last_sort = hits[-1]["sort"]
search_body["search_after"] = last_sort
page += 1

print("\nPagination Summary:")
print(f"Total pages: {page - 1}")
print(f"Total hits: {total_hits}")
print(f"Results per page: {search_body['size']}")
finally:
# delete the index
client.indices.delete(index="movies")


if __name__ == "__main__":
main()
Loading