From 99ade4318ca330ad170870560cc631a59fb5f7da Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Wed, 14 Dec 2022 09:12:11 -0800 Subject: [PATCH] Adding release configs for lucene filtering (#663) * Adding release configs for lucene filtering Signed-off-by: Martin Gaievski --- .../filtering/relaxed-filter/index.json | 26 +++++++++++ .../relaxed-filter/relaxed-filter-spec.json | 42 ++++++++++++++++++ .../relaxed-filter/relaxed-filter-test.yml | 33 ++++++++++++++ .../filtering/restrictive-filter/index.json | 26 +++++++++++ .../restrictive-filter-spec.json | 44 +++++++++++++++++++ .../restrictive-filter-test.yml | 33 ++++++++++++++ 6 files changed, 204 insertions(+) create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-test.yml create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json create mode 100644 benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-test.yml diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json new file mode 100644 index 000000000..7a9ff2890 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/index.json @@ -0,0 +1,26 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "dimension": 128, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "lucene", + "parameters": { + "ef_construction": 256, + "m": 16 + } + } + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json new file mode 100644 index 000000000..fecde0392 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-spec.json @@ -0,0 +1,42 @@ +{ + "bool": + { + "should": + [ + { + "range": + { + "age": + { + "gte": 30, + "lte": 70 + } + } + }, + { + "term": + { + "color": "green" + } + }, + { + "term": + { + "color": "blue" + } + }, + { + "term": + { + "color": "yellow" + } + }, + { + "term": + { + "color": "sweet" + } + } + ] + } +} \ No newline at end of file diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-test.yml b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-test.yml new file mode 100644 index 000000000..a47782649 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/relaxed-filter/relaxed-filter-test.yml @@ -0,0 +1,33 @@ +endpoint: [ENDPOINT] +test_name: "index-workflow" +test_id: "Index workflow" +num_runs: 10 +show_runs: false +steps: + - name: delete_index + index_name: target_index + - name: create_index + index_name: target_index + index_spec: [INDEX_SPEC_PATH]/index.json + - name: ingest_multi_field + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 + attributes_dataset_name: attributes + attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ] + - name: refresh_index + index_name: target_index + - name: query_with_filter + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 + neighbors_format: hdf5 + neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters.hdf5 + neighbors_dataset: neighbors_filter_5 + filter_spec: [INDEX_SPEC_PATH]/relaxed-filter-spec.json diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json new file mode 100644 index 000000000..7a9ff2890 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/index.json @@ -0,0 +1,26 @@ +{ + "settings": { + "index": { + "knn": true, + "number_of_shards": 24, + "number_of_replicas": 1 + } + }, + "mappings": { + "properties": { + "target_field": { + "type": "knn_vector", + "dimension": 128, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "lucene", + "parameters": { + "ef_construction": 256, + "m": 16 + } + } + } + } + } +} diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json new file mode 100644 index 000000000..9e6356f1c --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-spec.json @@ -0,0 +1,44 @@ +{ + "bool": + { + "must": + [ + { + "range": + { + "age": + { + "gte": 30, + "lte": 60 + } + } + }, + { + "term": + { + "taste": "bitter" + } + }, + { + "bool": + { + "should": + [ + { + "term": + { + "color": "blue" + } + }, + { + "term": + { + "color": "green" + } + } + ] + } + } + ] + } +} \ No newline at end of file diff --git a/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-test.yml b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-test.yml new file mode 100644 index 000000000..61e55f113 --- /dev/null +++ b/benchmarks/perf-tool/release-configs/lucene-hnsw/filtering/restrictive-filter/restrictive-filter-test.yml @@ -0,0 +1,33 @@ +endpoint: [ENDPOINT] +test_name: "index-workflow" +test_id: "Index workflow" +num_runs: 10 +show_runs: false +steps: + - name: delete_index + index_name: target_index + - name: create_index + index_name: target_index + index_spec: [INDEX_SPEC_PATH]/index.json + - name: ingest_multi_field + index_name: target_index + field_name: target_field + bulk_size: 500 + dataset_format: hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 + attributes_dataset_name: attributes + attribute_spec: [ { name: 'color', type: 'str' }, { name: 'taste', type: 'str' }, { name: 'age', type: 'int' } ] + - name: refresh_index + index_name: target_index + - name: query_with_filter + k: 100 + r: 1 + calculate_recall: true + index_name: target_index + field_name: target_field + dataset_format: hdf5 + dataset_path: [DATASET_PATH]/sift-128-euclidean-with-attr.hdf5 + neighbors_format: hdf5 + neighbors_path: [DATASET_PATH]/sift-128-euclidean-with-filters.hdf5 + neighbors_dataset: neighbors_filter_4 + filter_spec: [INDEX_SPEC_PATH]/restrictive-filter-test.yml