diff --git a/examples/async/composite_agg.py b/examples/async/composite_agg.py index 0b27c10b..af9a01aa 100644 --- a/examples/async/composite_agg.py +++ b/examples/async/composite_agg.py @@ -19,7 +19,10 @@ import os from typing import Any, AsyncIterator, Dict, List, Optional, Union +from elasticsearch.helpers import async_bulk + from elasticsearch_dsl import A, Agg, AsyncSearch, Response, async_connections +from tests.test_integration.test_data import DATA, GIT_INDEX async def scan_aggs( @@ -56,8 +59,17 @@ async def run_search(**kwargs: Any) -> Response: async def main() -> None: # initiate the default connection to elasticsearch - async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + client = async_connections.create_connection( + hosts=[os.environ["ELASTICSEARCH_URL"]] + ) + + # create the index and populate it with some data + # note that the dataset is imported from the library's test suite + await client.indices.delete(index="git", ignore_unavailable=True) + await client.indices.create(index="git", **GIT_INDEX) + await async_bulk(client, DATA, raise_on_error=True, refresh=True) + # run some aggregations on the data async for b in scan_aggs( AsyncSearch(index="git"), {"files": A("terms", field="files")}, diff --git a/examples/composite_agg.py b/examples/composite_agg.py index a0b992dd..e35103f9 100644 --- a/examples/composite_agg.py +++ b/examples/composite_agg.py @@ -18,7 +18,10 @@ import os from typing import Any, Dict, Iterator, List, Optional, Union +from elasticsearch.helpers import bulk + from elasticsearch_dsl import A, Agg, Response, Search, connections +from tests.test_integration.test_data import DATA, GIT_INDEX def scan_aggs( @@ -55,8 +58,15 @@ def run_search(**kwargs: Any) -> Response: def main() -> None: # initiate the default connection to elasticsearch - connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + client = connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]]) + + # create the index and populate it with some data + # note that the dataset is imported from the library's test suite + client.indices.delete(index="git", ignore_unavailable=True) + client.indices.create(index="git", **GIT_INDEX) + bulk(client, DATA, raise_on_error=True, refresh=True) + # run some aggregations on the data for b in scan_aggs( Search(index="git"), {"files": A("terms", field="files")}, diff --git a/tests/test_integration/test_data.py b/tests/test_integration/test_data.py index 6cccf91b..1e80896a 100644 --- a/tests/test_integration/test_data.py +++ b/tests/test_integration/test_data.py @@ -19,99 +19,85 @@ from elasticsearch import Elasticsearch +user_mapping = { + "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} +} -def create_flat_git_index(client: Elasticsearch, index: str) -> None: - # we will use user on several places - user_mapping = { - "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} - } - - client.indices.create( - index=index, - body={ - "settings": { - # just one shard, no replicas for testing - "number_of_shards": 1, - "number_of_replicas": 0, - # custom analyzer for analyzing file paths - "analysis": { - "analyzer": { - "file_path": { - "type": "custom", - "tokenizer": "path_hierarchy", - "filter": ["lowercase"], - } - } - }, - }, - "mappings": { - "properties": { - "description": {"type": "text", "analyzer": "snowball"}, - "author": user_mapping, - "authored_date": {"type": "date"}, - "committer": user_mapping, - "committed_date": {"type": "date"}, - "parent_shas": {"type": "keyword"}, - "files": { - "type": "text", - "analyzer": "file_path", - "fielddata": True, - }, +FLAT_GIT_INDEX: Dict[str, Any] = { + "settings": { + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], } + } + }, + }, + "mappings": { + "properties": { + "description": {"type": "text", "analyzer": "snowball"}, + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, }, + } + }, +} + +GIT_INDEX: Dict[str, Any] = { + "settings": { + # custom analyzer for analyzing file paths + "analysis": { + "analyzer": { + "file_path": { + "type": "custom", + "tokenizer": "path_hierarchy", + "filter": ["lowercase"], + } + } }, - ) + }, + "mappings": { + "properties": { + # common fields + "description": {"type": "text", "analyzer": "snowball"}, + "commit_repo": {"type": "join", "relations": {"repo": "commit"}}, + # COMMIT mappings + "author": user_mapping, + "authored_date": {"type": "date"}, + "committer": user_mapping, + "committed_date": {"type": "date"}, + "parent_shas": {"type": "keyword"}, + "files": { + "type": "text", + "analyzer": "file_path", + "fielddata": True, + }, + # REPO mappings + "is_public": {"type": "boolean"}, + "owner": user_mapping, + "created_at": {"type": "date"}, + "tags": {"type": "keyword"}, + } + }, +} -def create_git_index(client: Elasticsearch, index: str) -> None: - # we will use user on several places - user_mapping = { - "properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}} - } +def create_flat_git_index(client: Elasticsearch, index: str) -> None: + client.indices.create(index=index, body=FLAT_GIT_INDEX) - client.indices.create( - index=index, - body={ - "settings": { - # just one shard, no replicas for testing - "number_of_shards": 1, - "number_of_replicas": 0, - # custom analyzer for analyzing file paths - "analysis": { - "analyzer": { - "file_path": { - "type": "custom", - "tokenizer": "path_hierarchy", - "filter": ["lowercase"], - } - } - }, - }, - "mappings": { - "properties": { - # common fields - "description": {"type": "text", "analyzer": "snowball"}, - "commit_repo": {"type": "join", "relations": {"repo": "commit"}}, - # COMMIT mappings - "author": user_mapping, - "authored_date": {"type": "date"}, - "committer": user_mapping, - "committed_date": {"type": "date"}, - "parent_shas": {"type": "keyword"}, - "files": { - "type": "text", - "analyzer": "file_path", - "fielddata": True, - }, - # REPO mappings - "is_public": {"type": "boolean"}, - "owner": user_mapping, - "created_at": {"type": "date"}, - "tags": {"type": "keyword"}, - } - }, - }, - ) + +def create_git_index(client: Elasticsearch, index: str) -> None: + client.indices.create(index=index, body=GIT_INDEX) DATA = [ diff --git a/utils/run-unasync.py b/utils/run-unasync.py index 64149eb0..bae0c7a6 100644 --- a/utils/run-unasync.py +++ b/utils/run-unasync.py @@ -64,6 +64,7 @@ def main(check=False): "async_connections": "connections", "async_scan": "scan", "async_simulate": "simulate", + "async_bulk": "bulk", "async_mock_client": "mock_client", "async_client": "client", "async_data_client": "data_client",