From 7f749cfd9379c7acdfb0f0e198cc35a57d9e35c3 Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Mon, 4 Nov 2024 14:56:16 +0400 Subject: [PATCH] Switch from Elasticsearch YAML tests to client tests (#2667) (cherry picked from commit 596e7cacc1c449f15745497ea67255c9e5ce0f91) --- .buildkite/pipeline.yml | 2 +- .buildkite/run-repository.sh | 1 + noxfile.py | 2 +- .../test_async/test_server/conftest.py | 16 +- .../test_server/test_rest_api_spec.py | 14 +- .../test_server/test_rest_api_spec.py | 194 ++++++------------ .../test_server/test_vectorstore/__init__.py | 2 +- .../test_vectorstore/test_vectorstore.py | 11 +- test_elasticsearch/utils.py | 104 ++++++---- 9 files changed, 157 insertions(+), 189 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index ff911719e..16bf81360 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -5,7 +5,7 @@ steps: env: PYTHON_VERSION: "{{ matrix.python }}" TEST_SUITE: "platinum" - STACK_VERSION: "8.11.0-SNAPSHOT" + STACK_VERSION: "8.16.0-SNAPSHOT" PYTHON_CONNECTION_CLASS: "{{ matrix.connection }}" NOX_SESSION: "{{ matrix.nox_session }}" matrix: diff --git a/.buildkite/run-repository.sh b/.buildkite/run-repository.sh index 2f1f3c263..ce9344e8d 100755 --- a/.buildkite/run-repository.sh +++ b/.buildkite/run-repository.sh @@ -43,6 +43,7 @@ docker run \ --env "TEST_SUITE=${TEST_SUITE}" \ --env "PYTHON_CONNECTION_CLASS=${PYTHON_CONNECTION_CLASS}" \ --env "TEST_TYPE=server" \ + --env "FORCE_COLOR=1" \ --name elasticsearch-py \ --rm \ elastic/elasticsearch-py \ diff --git a/noxfile.py b/noxfile.py index 600120bb3..2f9bc3322 100644 --- a/noxfile.py +++ b/noxfile.py @@ -49,7 +49,7 @@ def pytest_argv(): def test(session): session.install(".[dev]", env=INSTALL_ENV, silent=False) - session.run(*pytest_argv()) + session.run(*pytest_argv(), *session.posargs) @nox.session(python=["3.8", "3.12"]) diff --git a/test_elasticsearch/test_async/test_server/conftest.py b/test_elasticsearch/test_async/test_server/conftest.py index fc475bc75..623646e7d 100644 --- a/test_elasticsearch/test_async/test_server/conftest.py +++ b/test_elasticsearch/test_async/test_server/conftest.py @@ -26,7 +26,7 @@ @pytest_asyncio.fixture(scope="function") -async def async_client(elasticsearch_url): +async def async_client_factory(elasticsearch_url): if not hasattr(elasticsearch, "AsyncElasticsearch"): pytest.skip("test requires 'AsyncElasticsearch' and aiohttp to be installed") @@ -36,11 +36,17 @@ async def async_client(elasticsearch_url): # event loops (one per test!) client = None try: - client = elasticsearch.AsyncElasticsearch( - elasticsearch_url, request_timeout=3, ca_certs=CA_CERTS - ) + client = elasticsearch.AsyncElasticsearch(elasticsearch_url, ca_certs=CA_CERTS) yield client finally: if client: - wipe_cluster(client) await client.close() + + +@pytest.fixture(scope="function") +def async_client(async_client_factory): + try: + yield async_client_factory + finally: + # Wipe the cluster clean after every test execution. + wipe_cluster(async_client_factory) diff --git a/test_elasticsearch/test_async/test_server/test_rest_api_spec.py b/test_elasticsearch/test_async/test_server/test_rest_api_spec.py index eee2364f6..c48262b61 100644 --- a/test_elasticsearch/test_async/test_server/test_rest_api_spec.py +++ b/test_elasticsearch/test_async/test_server/test_rest_api_spec.py @@ -130,7 +130,9 @@ async def run_do(self, action): headers.pop("Authorization") method, args = list(action.items())[0] - args["headers"] = headers + + if headers: + args["headers"] = headers # locate api endpoint for m in method.split("."): @@ -239,15 +241,17 @@ async def _feature_enabled(self, name): @pytest_asyncio.fixture(scope="function") -def async_runner(async_client): - return AsyncYamlRunner(async_client) +def async_runner(async_client_factory): + return AsyncYamlRunner(async_client_factory) if RUN_ASYNC_REST_API_TESTS: @pytest.mark.parametrize("test_spec", YAML_TEST_SPECS) async def test_rest_api_spec(test_spec, async_runner): - if test_spec.get("skip", False): - pytest.skip("Manually skipped in 'SKIP_TESTS'") + if test_spec.get("fail", False): + pytest.xfail("Manually marked as failing in 'FAILING_TESTS'") + elif test_spec.get("skip", False): + pytest.xfail("Manually skipped") async_runner.use_spec(test_spec) await async_runner.run() diff --git a/test_elasticsearch/test_server/test_rest_api_spec.py b/test_elasticsearch/test_server/test_rest_api_spec.py index 6ede3b753..058daa121 100644 --- a/test_elasticsearch/test_server/test_rest_api_spec.py +++ b/test_elasticsearch/test_server/test_rest_api_spec.py @@ -32,12 +32,10 @@ import urllib3 import yaml -from elasticsearch import ApiError, Elasticsearch, ElasticsearchWarning, RequestError +from elasticsearch import ApiError, ElasticsearchWarning, RequestError from elasticsearch._sync.client.utils import _base64_auth_header from elasticsearch.compat import string_types -from ..utils import CA_CERTS, es_url, parse_version - # some params had to be changed in python, keep track of them so we can rename # those in the tests accordingly PARAMS_RENAMES = {"from": "from_"} @@ -70,66 +68,37 @@ } # broken YAML tests on some releases -SKIP_TESTS = { - # Warning about date_histogram.interval deprecation is raised randomly - "search/aggregation/250_moving_fn[1]", - # body: null - "indices/simulate_index_template/10_basic[2]", - # No ML node with sufficient capacity / random ML failing - "ml/start_stop_datafeed", - "ml/post_data", - "ml/jobs_crud", - "ml/datafeeds_crud", - "ml/set_upgrade_mode", - "ml/reset_job[2]", - "ml/jobs_get_stats", - "ml/get_datafeed_stats", - "ml/get_trained_model_stats", - "ml/delete_job_force", - "ml/jobs_get_result_overall_buckets", - "ml/bucket_correlation_agg[0]", - "ml/job_groups", - "transform/transforms_stats_continuous[0]", - # Fails bad request instead of 404? - "ml/inference_crud", - # rollup/security_tests time out? - "rollup/security_tests", - # Our TLS certs are custom - "ssl/10_basic[0]", - # Our user is custom - "users/10_basic[3]", - # License warning not sent? - "license/30_enterprise_license[0]", - # Shards/snapshots aren't right? - "searchable_snapshots/10_usage[1]", - # flaky data streams? - "data_stream/10_basic[1]", - "data_stream/80_resolve_index_data_streams[1]", - # bad formatting? - "cat/allocation/10_basic", - "runtime_fields/10_keyword[8]", - # service account number not right? - "service_accounts/10_basic[1]", - # doesn't use 'contains' properly? - "xpack/10_basic[0]", - "privileges/40_get_user_privs[0]", - "privileges/40_get_user_privs[1]", - "features/get_features/10_basic[0]", - "features/reset_features/10_basic[0]", - # bad use of 'is_false'? - "indices/get_alias/10_basic[22]", - # unique usage of 'set' - "indices/stats/50_disk_usage[0]", - "indices/stats/60_field_usage[0]", - # actual Elasticsearch failure? - "transform/transforms_stats", - "transform/transforms_cat_apis", - "transform/transforms_update", +FAILING_TESTS = { + # ping has a custom implementation in Python and returns a boolean + "ping/ping", + # Not investigated yet + "cat/aliases", + "cat/fielddata", + "cluster/delete_voting_config_exclusions", + "cluster/voting_config_exclusions", + "entsearch/10_basic", + "indices/clone", + "indices/resolve_cluster", + "indices/settings", + "indices/split", + "indices/simulate_template_stack", + "logstash/10_basic", + "machine_learning/30_trained_model_stack", + "machine_learning/jobs_crud", + "scroll/10_basic", + "security/10_api_key_basic", + "transform/10_basic", +} +SKIPPED_TESTS = { + # Timeouts + # https://github.com/elastic/elasticsearch-serverless-python/issues/63 + "cluster/cluster_info[0]", + "inference/10_basic[0]", + "machine_learning/20_trained_model[0]", } XPACK_FEATURES = None -ES_VERSION = None RUN_ASYNC_REST_API_TESTS = os.environ.get("PYTHON_CONNECTION_CLASS") == "requests" FALSEY_VALUES = ("", None, False, 0, 0.0) @@ -173,16 +142,6 @@ def teardown(self): self.section("teardown") self.run_code(self._teardown_code) - def es_version(self): - global ES_VERSION - if ES_VERSION is None: - version_string = (self.client.info())["version"]["number"] - if "." not in version_string: - return () - version = version_string.strip().split(".") - ES_VERSION = tuple(int(v) if v.isdigit() else 999 for v in version) - return ES_VERSION - def section(self, name): print(("=" * 10) + " " + name + " " + ("=" * 10)) @@ -331,16 +290,6 @@ def run_skip(self, skip): continue pytest.skip(f"feature '{feature}' is not supported") - if "version" in skip: - version, reason = skip["version"], skip["reason"] - if version == "all": - pytest.skip(reason) - min_version, _, max_version = version.partition("-") - min_version = parse_version(min_version.strip()) or (0,) - max_version = parse_version(max_version.strip()) or (999,) - if min_version <= (self.es_version()) <= max_version: - pytest.skip(reason) - def run_gt(self, action): for key, value in action.items(): value = self._resolve(value) @@ -516,8 +465,9 @@ def _skip_intentional_type_errors(self, e: Exception): @pytest.fixture(scope="function") -def sync_runner(sync_client): - return YamlRunner(sync_client) +def sync_runner(sync_client_factory): + # sync_client_factory does not wipe the cluster between tests + return YamlRunner(sync_client_factory) # Source: https://stackoverflow.com/a/37958106/5763213 @@ -546,62 +496,36 @@ def remove_implicit_resolver(cls, tag_to_remove): try: # Construct the HTTP and Elasticsearch client http = urllib3.PoolManager(retries=10) - client = Elasticsearch(es_url(), request_timeout=3, ca_certs=CA_CERTS) - - # Make a request to Elasticsearch for the build hash, we'll be looking for - # an artifact with this same hash to download test specs for. - client_info = client.info() - version_number = client_info["version"]["number"] - build_hash = client_info["version"]["build_hash"] - - # Now talk to the artifacts API with the 'STACK_VERSION' environment variable - resp = http.request( - "GET", - f"https://artifacts-api.elastic.co/v1/versions/{version_number}", + + yaml_tests_url = ( + "https://api.github.com/repos/elastic/elasticsearch-clients-tests/zipball/main" ) - resp = json.loads(resp.data.decode("utf-8")) - - # Look through every build and see if one matches the commit hash - # we're looking for. If not it's okay, we'll just use the latest and - # hope for the best! - builds = resp["version"]["builds"] - for build in builds: - if build["projects"]["elasticsearch"]["commit_hash"] == build_hash: - break - else: - build = builds[0] # Use the latest - - # Now we're looking for the 'rest-api-spec--sources.jar' file - # to download and extract in-memory. - packages = build["projects"]["elasticsearch"]["packages"] - for package in packages: - if re.match(r"rest-resources-zip-.*\.zip", package): - package_url = packages[package]["url"] - break - else: - raise RuntimeError( - f"Could not find the package 'rest-resources-zip-*.zip' in build {build!r}" - ) # Download the zip and start reading YAML from the files in memory - package_zip = zipfile.ZipFile(io.BytesIO(http.request("GET", package_url).data)) + package_zip = zipfile.ZipFile(io.BytesIO(http.request("GET", yaml_tests_url).data)) + for yaml_file in package_zip.namelist(): - if not re.match(r"^rest-api-spec/test/.*\.ya?ml$", yaml_file): + if not re.match(r"^.*\/tests\/.*\.ya?ml$", yaml_file): continue yaml_tests = list( yaml.load_all(package_zip.read(yaml_file), Loader=NoDatesSafeLoader) ) - # Each file may have a "test" named 'setup' or 'teardown', - # these sets of steps should be run at the beginning and end - # of every other test within the file so we do one pass to capture those. - setup_steps = teardown_steps = None + # Each file has a `requires` section with `serverless` and `stack` + # boolean entries indicating whether the test should run with + # serverless, stack or both. Additionally, each file may have a section + # named 'setup' or 'teardown', these sets of steps should be run at the + # beginning and end of every other test within the file so we do one + # pass to capture those. + requires = setup_steps = teardown_steps = None test_numbers_and_steps = [] test_number = 0 for yaml_test in yaml_tests: test_name, test_step = yaml_test.popitem() - if test_name == "setup": + if test_name == "requires": + requires = test_step + elif test_name == "setup": setup_steps = test_step elif test_name == "teardown": teardown_steps = test_step @@ -609,14 +533,17 @@ def remove_implicit_resolver(cls, tag_to_remove): test_numbers_and_steps.append((test_number, test_step)) test_number += 1 + if not requires["stack"]: + continue + # Now we combine setup, teardown, and test_steps into # a set of pytest.param() instances for test_number, test_step in test_numbers_and_steps: - # Build the id from the name of the YAML file and - # the number within that file. Most important step - # is to remove most of the file path prefixes and - # the .yml suffix. - pytest_test_name = yaml_file.rpartition(".")[0].replace(".", "/") + # Build the id from the name of the YAML file and the number within + # that file. Most important step is to remove most of the file path + # prefixes and the .yml suffix. + test_path = "/".join(yaml_file.split("/")[2:]) + pytest_test_name = test_path.rpartition(".")[0].replace(".", "/") for prefix in ("rest-api-spec/", "test/", "free/", "platinum/"): if pytest_test_name.startswith(prefix): pytest_test_name = pytest_test_name[len(prefix) :] @@ -628,7 +555,9 @@ def remove_implicit_resolver(cls, tag_to_remove): "teardown": teardown_steps, } # Skip either 'test_name' or 'test_name[x]' - if pytest_test_name in SKIP_TESTS or pytest_param_id in SKIP_TESTS: + if pytest_test_name in FAILING_TESTS or pytest_param_id in FAILING_TESTS: + pytest_param["fail"] = True + elif pytest_test_name in SKIPPED_TESTS or pytest_param_id in SKIPPED_TESTS: pytest_param["skip"] = True YAML_TEST_SPECS.append(pytest.param(pytest_param, id=pytest_param_id)) @@ -645,12 +574,13 @@ def _pytest_param_sort_key(param: pytest.param) -> Tuple[Union[str, int], ...]: # Sort the tests by ID so they're grouped together nicely. YAML_TEST_SPECS = sorted(YAML_TEST_SPECS, key=_pytest_param_sort_key) - if not RUN_ASYNC_REST_API_TESTS: @pytest.mark.parametrize("test_spec", YAML_TEST_SPECS) def test_rest_api_spec(test_spec, sync_runner): - if test_spec.get("skip", False): - pytest.skip("Manually skipped in 'SKIP_TESTS'") + if test_spec.get("fail", False): + pytest.xfail("Manually marked as failing in 'FAILING_TESTS'") + elif test_spec.get("skip", False): + pytest.skip("Manually marked as skipped") sync_runner.use_spec(test_spec) sync_runner.run() diff --git a/test_elasticsearch/test_server/test_vectorstore/__init__.py b/test_elasticsearch/test_server/test_vectorstore/__init__.py index 87710976a..1b480978c 100644 --- a/test_elasticsearch/test_server/test_vectorstore/__init__.py +++ b/test_elasticsearch/test_server/test_vectorstore/__init__.py @@ -69,7 +69,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: if text not in self.known_texts: self.known_texts.append(text) vector = [float(1.0)] * (self.dimensionality - 1) + [ - float(self.known_texts.index(text)) + float(self.known_texts.index(text) + 1) ] out_vectors.append(vector) return out_vectors diff --git a/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py b/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py index 820746acd..6e07e9887 100644 --- a/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py +++ b/test_elasticsearch/test_server/test_vectorstore/test_vectorstore.py @@ -72,7 +72,7 @@ def assert_query(query_body: dict, query: Optional[str]) -> dict: "filter": [], "k": 1, "num_candidates": 50, - "query_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], + "query_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], } } return query_body @@ -80,7 +80,7 @@ def assert_query(query_body: dict, query: Optional[str]) -> dict: store = VectorStore( index=index, retrieval_strategy=DenseVectorStrategy(), - embedding_service=FakeEmbeddings(), + embedding_service=ConsistentFakeEmbeddings(), client=sync_client, ) @@ -97,7 +97,7 @@ def test_search_without_metadata_async( store = VectorStore( index=index, retrieval_strategy=DenseVectorStrategy(), - embedding_service=FakeEmbeddings(), + embedding_service=ConsistentFakeEmbeddings(), client=sync_client, ) @@ -950,6 +950,11 @@ def test_metadata_mapping(self, sync_client: Elasticsearch, index: str) -> None: "type": "dense_vector", "dims": 10, "index": True, + "index_options": { + "ef_construction": 100, + "m": 16, + "type": "int8_hnsw", + }, "similarity": "cosine", } diff --git a/test_elasticsearch/utils.py b/test_elasticsearch/utils.py index abff98a55..4a26aa4c0 100644 --- a/test_elasticsearch/utils.py +++ b/test_elasticsearch/utils.py @@ -26,7 +26,6 @@ ConnectionError, Elasticsearch, NotFoundError, - RequestError, ) SOURCE_DIR = Path(__file__).absolute().parent.parent @@ -235,6 +234,7 @@ def wipe_searchable_snapshot_indices(client): def wipe_xpack_templates(client): + # Delete index templates (including legacy) templates = [ x.strip() for x in client.cat.templates(h="name").split("\n") if x.strip() ] @@ -247,26 +247,15 @@ def wipe_xpack_templates(client): if f"index_template [{template}] missing" in str(e): client.indices.delete_index_template(name=template) - # Delete component templates, need to retry because sometimes - # indices aren't cleaned up in time before we issue the delete. + # Delete component templates templates = client.cluster.get_component_template()["component_templates"] templates_to_delete = [ - template for template in templates if not is_xpack_template(template["name"]) + template["name"] + for template in templates + if not is_xpack_template(template["name"]) ] - for _ in range(3): - for template in list(templates_to_delete): - try: - client.cluster.delete_component_template( - name=template["name"], - ) - except RequestError: - pass - else: - templates_to_delete.remove(template) - - if not templates_to_delete: - break - time.sleep(0.01) + if templates_to_delete: + client.cluster.delete_component_template(name=",".join(templates_to_delete)) def wipe_ilm_policies(client): @@ -292,6 +281,9 @@ def wipe_ilm_policies(client): ".monitoring-8-ilm-policy", } and "-history-ilm-polcy" not in policy + and "-meta-ilm-policy" not in policy + and "-data-ilm-policy" not in policy + and "@lifecycle" not in policy ): client.ilm.delete_lifecycle(name=policy) @@ -419,38 +411,68 @@ def wait_for_cluster_state_updates_to_finish(client, timeout=30): def is_xpack_template(name): - if name.startswith(".monitoring-"): + if name.startswith("."): return True - elif name.startswith(".watch") or name.startswith(".triggered_watches"): + elif name.startswith("behavioral_analytics-events"): return True - elif name.startswith(".data-frame-"): + elif name.startswith("elastic-connectors-"): return True - elif name.startswith(".ml-"): + elif name.startswith("entities_v1_"): return True - elif name.startswith(".transform-"): + elif name.endswith("@ilm"): return True - elif name.startswith(".deprecation-"): + elif name.endswith("@template"): return True - if name in { - ".watches", - "security_audit_log", - ".slm-history", - ".async-search", - "saml-service-provider", + + return name in { + "apm-10d@lifecycle", + "apm-180d@lifecycle", + "apm-390d@lifecycle", + "apm-90d@lifecycle", + "apm@mappings", + "apm@settings", + "data-streams-mappings", + "data-streams@mappings", + "elastic-connectors", + "ecs@dynamic_templates", + "ecs@mappings", + "ilm-history-7", + "kibana-reporting@settings", "logs", - "logs-settings", + "logs-apm.error@mappings", + "logs-apm@settings", "logs-mappings", + "logs@mappings", + "logs-settings", + "logs@settings", "metrics", - "metrics-settings", + "metrics-apm@mappings", + "metrics-apm.service_destination@mappings", + "metrics-apm.service_summary@mappings", + "metrics-apm.service_transaction@mappings", + "metrics-apm@settings", + "metrics-apm.transaction@mappings", "metrics-mappings", + "metrics@mappings", + "metrics-settings", + "metrics@settings", + "metrics-tsdb-settings", + "metrics@tsdb-settings", + "search-acl-filter", "synthetics", - "synthetics-settings", "synthetics-mappings", - ".snapshot-blob-cache", - "ilm-history", - "logstash-index-template", - "security-index-template", - "data-streams-mappings", - }: - return True - return False + "synthetics@mappings", + "synthetics-settings", + "synthetics@settings", + "traces-apm@mappings", + "traces-apm.rum@mappings", + "traces@mappings", + "traces@settings", + # otel + "metrics-otel@mappings", + "semconv-resource-to-ecs@mappings", + "traces-otel@mappings", + "ecs-tsdb@mappings", + "logs-otel@mappings", + "otel@mappings", + }