From 3b0035bb2996a0e0a486a3dd33eac2959b4af0c4 Mon Sep 17 00:00:00 2001 From: bowenlan-amzn Date: Thu, 1 Aug 2024 22:01:09 -0700 Subject: [PATCH] bitmap filtering Signed-off-by: bowenlan-amzn --- .../test/search/370_bitmap_filtering.yml | 184 ++++++++++++++++ server/build.gradle | 3 + server/licenses/RoaringBitmap-1.1.0.jar.sha1 | 1 + server/licenses/RoaringBitmap-LICENSE.txt | 202 ++++++++++++++++++ server/licenses/RoaringBitmap-NOTICE.txt | 0 .../index/mapper/NumberFieldMapper.java | 54 +++++ .../index/query/TermsQueryBuilder.java | 104 ++++++++- .../org/opensearch/indices/TermsLookup.java | 26 ++- .../search/query/BitmapDocValuesQuery.java | 138 ++++++++++++ .../index/mapper/NumberFieldTypeTests.java | 20 ++ .../index/query/TermsQueryBuilderTests.java | 67 +++++- .../opensearch/indices/TermsLookupTests.java | 5 +- 12 files changed, 790 insertions(+), 14 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/370_bitmap_filtering.yml create mode 100644 server/licenses/RoaringBitmap-1.1.0.jar.sha1 create mode 100644 server/licenses/RoaringBitmap-LICENSE.txt create mode 100644 server/licenses/RoaringBitmap-NOTICE.txt create mode 100644 server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_bitmap_filtering.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_bitmap_filtering.yml new file mode 100644 index 0000000000000..34a439bdf4099 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_bitmap_filtering.yml @@ -0,0 +1,184 @@ +--- +setup: + - skip: + version: " - 2.16.99" + reason: bitmap filtering was added in 2.17 + - do: + indices.create: + index: students + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + student_id: + type: integer + - do: + bulk: + refresh: true + body: + - { "index": { "_index": "students", "_id": "1" } } + - { "name": "Jane Doe", "student_id": 111 } + - { "index": { "_index": "students", "_id": "2" } } + - { "name": "Mary Major", "student_id": 222 } + - { "index": { "_index": "students", "_id": "3" } } + - { "name": "John Doe", "student_id": 333 } + - do: + indices.create: + index: classes + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + enrolled: + type: binary + store: true + - do: + bulk: + refresh: true + body: + - { "index": { "_index": "classes", "_id": "101" } } + - { "enrolled": "OjAAAAEAAAAAAAEAEAAAAG8A3gA=" } # 111,222 + - { "index": { "_index": "classes", "_id": "102" } } + - { "enrolled": "OjAAAAEAAAAAAAAAEAAAAG8A" } # 111 + - { "index": { "_index": "classes", "_id": "103" } } + - { "enrolled": "OjAAAAEAAAAAAAAAEAAAAE0B" } # 333 + - { "index": { "_index": "classes", "_id": "104" } } + - { "enrolled": "OjAAAAEAAAAAAAEAEAAAAN4ATQE=" } # 222,333 + - do: + cluster.health: + wait_for_status: green + +--- +"Terms lookup on a binary field with bitmap": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "terms": { + "student_id": { + "index": "classes", + "id": "101", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + } + - match: { hits.total: 2 } + - match: { hits.hits.0._source.name: Jane Doe } + - match: { hits.hits.0._source.student_id: 111 } + - match: { hits.hits.1._source.name: Mary Major } + - match: { hits.hits.1._source.student_id: 222 } + +--- +"Terms query accepting bitmap as value": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "terms": { + "student_id": ["OjAAAAEAAAAAAAEAEAAAAG8A3gA="], + "value_type": "bitmap" + } + } + } + - match: { hits.total: 2 } + - match: { hits.hits.0._source.name: Jane Doe } + - match: { hits.hits.0._source.student_id: 111 } + - match: { hits.hits.1._source.name: Mary Major } + - match: { hits.hits.1._source.student_id: 222 } + +--- +"Boolean must bitmap filtering": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "bool": { + "must": [ + { + "terms": { + "student_id": { + "index": "classes", + "id": "101", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + ], + "must_not": [ + { + "terms": { + "student_id": { + "index": "classes", + "id": "104", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + ] + } + } + } + - match: { hits.total: 1 } + - match: { hits.hits.0._source.name: Jane Doe } + - match: { hits.hits.0._source.student_id: 111 } + +--- +"Boolean should bitmap filtering": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "bool": { + "should": [ + { + "terms": { + "student_id": { + "index": "classes", + "id": "101", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + }, + { + "terms": { + "student_id": { + "index": "classes", + "id": "104", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + ] + } + } + } + - match: { hits.total: 3 } + - match: { hits.hits.0._source.name: Mary Major } + - match: { hits.hits.0._source.student_id: 222 } + - match: { hits.hits.1._source.name: Jane Doe } + - match: { hits.hits.1._source.student_id: 111 } + - match: { hits.hits.2._source.name: John Doe } + - match: { hits.hits.2._source.student_id: 333 } diff --git a/server/build.gradle b/server/build.gradle index 10e0e35dd2bef..1634d91346e71 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -125,6 +125,9 @@ dependencies { api "com.google.protobuf:protobuf-java:${versions.protobuf}" api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}" + // https://mvnrepository.com/artifact/org.roaringbitmap/RoaringBitmap + implementation 'org.roaringbitmap:RoaringBitmap:1.1.0' + testImplementation(project(":test:framework")) { // tests use the locally compiled version of server exclude group: 'org.opensearch', module: 'server' diff --git a/server/licenses/RoaringBitmap-1.1.0.jar.sha1 b/server/licenses/RoaringBitmap-1.1.0.jar.sha1 new file mode 100644 index 0000000000000..bf34e11b92710 --- /dev/null +++ b/server/licenses/RoaringBitmap-1.1.0.jar.sha1 @@ -0,0 +1 @@ +9607213861158ae7060234d93ee9c9cb19f494d1 \ No newline at end of file diff --git a/server/licenses/RoaringBitmap-LICENSE.txt b/server/licenses/RoaringBitmap-LICENSE.txt new file mode 100644 index 0000000000000..3bdd0036295a6 --- /dev/null +++ b/server/licenses/RoaringBitmap-LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/RoaringBitmap-NOTICE.txt b/server/licenses/RoaringBitmap-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 27e62c3746a8e..3fc0ce085d63e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -48,6 +48,7 @@ import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; @@ -58,6 +59,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; @@ -68,9 +70,11 @@ import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.DocValueFormat; import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.query.BitmapDocValuesQuery; import java.io.IOException; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; @@ -82,6 +86,8 @@ import java.util.function.Function; import java.util.function.Supplier; +import org.roaringbitmap.RoaringBitmap; + /** * A {@link FieldMapper} for numeric types: byte, short, int, long, float and double. * @@ -822,6 +828,21 @@ public Query termsQuery(String field, List values, boolean hasDocValues, return IntPoint.newSetQuery(field, v); } + @Override + public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable) { + RoaringBitmap bitmap = new RoaringBitmap(); + try { + bitmap.deserialize(ByteBuffer.wrap(bitmapArray.array())); + } catch (Exception e) { + throw new IllegalArgumentException("Failed to deserialize the bitmap.", e); + } + + if (isSearchable) { + return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap)); + } + return new BitmapDocValuesQuery(field, bitmap); + } + @Override public Query rangeQuery( String field, @@ -1174,6 +1195,10 @@ public final TypeParser parser() { public abstract Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable); + public Query bitmapQuery(String field, BytesArray bitmap, boolean isSearchable) { + throw new IllegalArgumentException("Field [" + name + "] of type [" + typeName() + "] does not support bitmap queries"); + } + public abstract Query rangeQuery( String field, Object lowerTerm, @@ -1415,6 +1440,31 @@ public static Query unsignedLongRangeQuery( } return builder.apply(l, u); } + + static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) { + final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); + return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { + + long upto; + + @Override + public BytesRef next() { + upto = bitmap.nextValue((int) upto); + if (upto == -1) { + return null; + } + IntPoint.encodeDimension((int) upto, encoded.bytes, 0); + upto++; + return encoded; + } + }) { + @Override + protected String toString(byte[] value) { + assert value.length == Integer.BYTES; + return Integer.toString(IntPoint.decodeDimension(value, 0)); + } + }; + } } /** @@ -1495,6 +1545,10 @@ public Query termsQuery(List values, QueryShardContext context) { return query; } + public Query bitmapQuery(BytesArray bitmap) { + return type.bitmapQuery(name(), bitmap, isSearchable()); + } + @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); diff --git a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java index ac0ca3919ea38..ccd654a069f9b 100644 --- a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java @@ -37,14 +37,17 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import org.opensearch.Version; import org.opensearch.action.get.GetRequest; import org.opensearch.client.Client; import org.opensearch.common.SetOnce; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.xcontent.support.XContentMapValues; +import org.opensearch.core.ParseField; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.ParsingException; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -53,6 +56,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.ConstantFieldType; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.indices.TermsLookup; import java.io.IOException; @@ -60,6 +64,7 @@ import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; +import java.util.Base64; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -82,6 +87,34 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { private final TermsLookup termsLookup; private final Supplier> supplier; + private static final ParseField VALUE_TYPE_FIELD = new ParseField("value_type"); + private ValueType valueType = ValueType.DEFAULT; + + enum ValueType { + DEFAULT("default"), + BITMAP("bitmap"); + + private final String type; + + ValueType(String type) { + this.type = type; + } + + static ValueType fromString(String type) { + for (ValueType valueType : ValueType.values()) { + if (valueType.type.equalsIgnoreCase(type)) { + return valueType; + } + } + throw new IllegalArgumentException(type + " is not valid " + VALUE_TYPE_FIELD); + } + } + + TermsQueryBuilder valueType(ValueType valueType) { + this.valueType = valueType; + return this; + } + public TermsQueryBuilder(String fieldName, TermsLookup termsLookup) { this(fieldName, null, termsLookup); } @@ -187,6 +220,11 @@ public TermsQueryBuilder(String fieldName, Iterable values) { this.supplier = null; } + private TermsQueryBuilder(String fieldName, Iterable values, ValueType valueType) { + this(fieldName, values); + this.valueType = valueType; + } + private TermsQueryBuilder(String fieldName, Supplier> supplier) { this.fieldName = fieldName; this.values = null; @@ -194,6 +232,11 @@ private TermsQueryBuilder(String fieldName, Supplier> supplier) { this.supplier = supplier; } + private TermsQueryBuilder(String fieldName, Supplier> supplier, ValueType valueType) { + this(fieldName, supplier); + this.valueType = valueType; + } + /** * Read from a stream. */ @@ -203,6 +246,9 @@ public TermsQueryBuilder(StreamInput in) throws IOException { termsLookup = in.readOptionalWriteable(TermsLookup::new); values = (List) in.readGenericValue(); this.supplier = null; + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + valueType = in.readEnum(ValueType.class); + } } @Override @@ -213,6 +259,9 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeString(fieldName); out.writeOptionalWriteable(termsLookup); out.writeGenericValue(values); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeEnum(valueType); + } } public String fieldName() { @@ -360,6 +409,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep builder.field(fieldName, convertBack(values)); } printBoostAndQueryName(builder); + if (valueType != ValueType.DEFAULT) { + builder.field(VALUE_TYPE_FIELD.getPreferredName(), valueType.type); + } builder.endObject(); } @@ -371,6 +423,8 @@ public static TermsQueryBuilder fromXContent(XContentParser parser) throws IOExc String queryName = null; float boost = AbstractQueryBuilder.DEFAULT_BOOST; + String valueTypeStr = ValueType.DEFAULT.type; + XContentParser.Token token; String currentFieldName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { @@ -406,6 +460,8 @@ public static TermsQueryBuilder fromXContent(XContentParser parser) throws IOExc boost = parser.floatValue(); } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { queryName = parser.text(); + } else if (VALUE_TYPE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + valueTypeStr = parser.text(); } else { throw new ParsingException( parser.getTokenLocation(), @@ -430,7 +486,18 @@ public static TermsQueryBuilder fromXContent(XContentParser parser) throws IOExc ); } - return new TermsQueryBuilder(fieldName, values, termsLookup).boost(boost).queryName(queryName); + ValueType valueType = ValueType.fromString(valueTypeStr); + if (valueType == ValueType.BITMAP) { + if (values != null && values.size() == 1 && values.get(0) instanceof BytesRef) { + values.set(0, new BytesArray(Base64.getDecoder().decode(((BytesRef) values.get(0)).utf8ToString()))); + } else if (termsLookup == null) { + throw new IllegalArgumentException( + "Invalid value for bitmap type: Expected a single-element array with a base64 encoded serialized bitmap." + ); + } + } + + return new TermsQueryBuilder(fieldName, values, termsLookup).boost(boost).queryName(queryName).valueType(valueType); } static List parseValues(XContentParser parser) throws IOException { @@ -473,17 +540,37 @@ protected Query doToQuery(QueryShardContext context) throws IOException { if (fieldType == null) { throw new IllegalStateException("Rewrite first"); } + if (valueType == ValueType.BITMAP) { + if (values.size() == 1 && values.get(0) instanceof BytesArray) { + if (fieldType instanceof NumberFieldMapper.NumberFieldType) { + return ((NumberFieldMapper.NumberFieldType) fieldType).bitmapQuery((BytesArray) values.get(0)); + } + } + } return fieldType.termsQuery(values, context); } private void fetch(TermsLookup termsLookup, Client client, ActionListener> actionListener) { GetRequest getRequest = new GetRequest(termsLookup.index(), termsLookup.id()); getRequest.preference("_local").routing(termsLookup.routing()); + if (termsLookup.store()) { + getRequest.storedFields(termsLookup.path()); + } client.get(getRequest, ActionListener.delegateFailure(actionListener, (delegatedListener, getResponse) -> { List terms = new ArrayList<>(); - if (getResponse.isSourceEmpty() == false) { // extract terms only if the doc source exists - List extractedValues = XContentMapValues.extractRawValues(termsLookup.path(), getResponse.getSourceAsMap()); - terms.addAll(extractedValues); + if (termsLookup.store()) { + List values = getResponse.getField(termsLookup.path()).getValues(); + if (values.size() != 1 && valueType == ValueType.BITMAP) { + throw new IllegalArgumentException( + "Invalid value for bitmap type: Expected a single base64 encoded serialized bitmap." + ); + } + terms.addAll(values); + } else { + if (getResponse.isSourceEmpty() == false) { // extract terms only if the doc source exists + List extractedValues = XContentMapValues.extractRawValues(termsLookup.path(), getResponse.getSourceAsMap()); + terms.addAll(extractedValues); + } } delegatedListener.onResponse(terms); })); @@ -491,7 +578,7 @@ private void fetch(TermsLookup termsLookup, Client client, ActionListener> supplier = new SetOnce<>(); queryRewriteContext.registerAsyncAction((client, listener) -> fetch(termsLookup, client, ActionListener.map(listener, list -> { supplier.set(list); return null; }))); - return new TermsQueryBuilder(this.fieldName, supplier::get); + return new TermsQueryBuilder(this.fieldName, supplier::get, valueType); } if (values == null || values.isEmpty()) { diff --git a/server/src/main/java/org/opensearch/indices/TermsLookup.java b/server/src/main/java/org/opensearch/indices/TermsLookup.java index 37533c0809d7a..7337ed31ce41e 100644 --- a/server/src/main/java/org/opensearch/indices/TermsLookup.java +++ b/server/src/main/java/org/opensearch/indices/TermsLookup.java @@ -87,6 +87,9 @@ public TermsLookup(StreamInput in) throws IOException { path = in.readString(); index = in.readString(); routing = in.readOptionalString(); + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + store = in.readBoolean(); + } } @Override @@ -98,6 +101,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(path); out.writeString(index); out.writeOptionalString(routing); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeBoolean(store); + } } public String index() { @@ -121,6 +127,17 @@ public TermsLookup routing(String routing) { return this; } + private boolean store; + + public boolean store() { + return store; + } + + public TermsLookup store(boolean store) { + this.store = store; + return this; + } + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("terms_lookup", args -> { String index = (String) args[0]; String id = (String) args[1]; @@ -132,6 +149,7 @@ public TermsLookup routing(String routing) { PARSER.declareString(constructorArg(), new ParseField("id")); PARSER.declareString(constructorArg(), new ParseField("path")); PARSER.declareString(TermsLookup::routing, new ParseField("routing")); + PARSER.declareBoolean(TermsLookup::store, new ParseField("store")); } public static TermsLookup parseTermsLookup(XContentParser parser) throws IOException { @@ -151,12 +169,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (routing != null) { builder.field("routing", routing); } + if (store) { + builder.field("store", true); + } return builder; } @Override public int hashCode() { - return Objects.hash(index, id, path, routing); + return Objects.hash(index, id, path, routing, store); } @Override @@ -171,6 +192,7 @@ public boolean equals(Object obj) { return Objects.equals(index, other.index) && Objects.equals(id, other.id) && Objects.equals(path, other.path) - && Objects.equals(routing, other.routing); + && Objects.equals(routing, other.routing) + && Objects.equals(store, other.store); } } diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java new file mode 100644 index 0000000000000..5bfb63e1f722d --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -0,0 +1,138 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Objects; + +import org.roaringbitmap.RoaringBitmap; + +/** + * Filter with bitmap + *

+ * Similar to Lucene SortedNumericDocValuesSetQuery + */ +public class BitmapDocValuesQuery extends Query implements Accountable { + + final String field; + final RoaringBitmap bitmap; + + public BitmapDocValuesQuery(String field, RoaringBitmap bitmap) { + this.field = field; + this.bitmap = bitmap; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field); + final NumericDocValues singleton = DocValues.unwrapSingleton(values); + final TwoPhaseIterator iterator; + if (singleton != null) { + iterator = new TwoPhaseIterator(singleton) { + @Override + public boolean matches() throws IOException { + long value = singleton.longValue(); + return bitmap.contains((int) value); + } + + @Override + public float matchCost() { + return 2; + } + }; + } else { + iterator = new TwoPhaseIterator(values) { + @Override + public boolean matches() throws IOException { + int count = values.docValueCount(); + for (int i = 0; i < count; i++) { + final long value = values.nextValue(); + if (bitmap.contains((int) value)) { + return true; + } + } + return false; + } + + @Override + public float matchCost() { + return 2 * values.docValueCount(); + } + }; + } + return new ConstantScoreScorer(this, score(), scoreMode, iterator); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return DocValues.isCacheable(ctx, field); + } + }; + } + + @Override + public String toString(String field) { + return field + ": " + bitmap.toString(); + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (bitmap.getLongCardinality() == 0) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + BitmapDocValuesQuery that = (BitmapDocValuesQuery) other; + return field.equals(that.field) && bitmap.equals(that.bitmap); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, bitmap); + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.shallowSizeOfInstance(BitmapDocValuesQuery.class) + RamUsageEstimator.sizeOfObject(field) + + RamUsageEstimator.sizeOfObject(bitmap); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index 96487db6dd512..f91279f263eea 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -62,6 +62,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; @@ -73,18 +74,22 @@ import org.opensearch.index.mapper.NumberFieldMapper.NumberType; import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.MultiValueMode; +import org.opensearch.search.query.BitmapDocValuesQuery; import org.junit.Before; import java.io.ByteArrayInputStream; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.function.Supplier; +import org.roaringbitmap.RoaringBitmap; + import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; @@ -144,6 +149,21 @@ public void testLongTermsQueryWithDecimalPart() { assertTrue(ft.termsQuery(Arrays.asList(1.1, 2.1), null) instanceof MatchNoDocsQuery); } + public void testBitmapQuery() { + RoaringBitmap r = new RoaringBitmap(); + r.add(111); + r.add(333); + byte[] array = new byte[r.serializedSizeInBytes()]; + r.serialize(ByteBuffer.wrap(array)); + BytesArray bitmap = new BytesArray(array); + + NumberFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.INTEGER); + assertEquals( + new IndexOrDocValuesQuery(NumberType.bitmapIndexQuery("field", r), new BitmapDocValuesQuery("field", r)), + ft.bitmapQuery(bitmap) + ); + } + public void testByteTermQueryWithDecimalPart() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.BYTE); assertTrue(ft.termQuery(42.1, null) instanceof MatchNoDocsQuery); diff --git a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java index 7b45308a523c8..ce3eeb0e3bf69 100644 --- a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java @@ -46,6 +46,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.get.GetRequest; import org.opensearch.action.get.GetResponse; +import org.opensearch.common.document.DocumentField; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.ParsingException; @@ -59,15 +60,20 @@ import org.junit.Before; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import org.roaringbitmap.RoaringBitmap; + import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.instanceOf; @@ -120,10 +126,9 @@ protected TermsQueryBuilder doCreateTestQueryBuilder() { } private TermsLookup randomTermsLookup() { - // Randomly choose between a typeless terms lookup and one with an explicit type to make sure we are TermsLookup lookup = new TermsLookup(randomAlphaOfLength(10), randomAlphaOfLength(10), termsPath); - // testing both cases. lookup.routing(randomBoolean() ? randomAlphaOfLength(10) : null); + lookup.store(randomBoolean()); return lookup; } @@ -245,7 +250,17 @@ public GetResponse executeGet(GetRequest getRequest) { } catch (IOException ex) { throw new OpenSearchException("boom", ex); } - return new GetResponse(new GetResult(getRequest.index(), getRequest.id(), 0, 1, 0, true, new BytesArray(json), null, null)); + Map documentField = new HashMap<>(); + List nonNullTerms = new ArrayList<>(); + for (Object obj : randomTerms) { + if (obj != null) { + nonNullTerms.add(obj); + } + } + documentField.put(termsPath, new DocumentField(termsPath, nonNullTerms)); + return new GetResponse( + new GetResult(getRequest.index(), getRequest.id(), 0, 1, 0, true, new BytesArray(json), documentField, null) + ); } public void testNumeric() throws IOException { @@ -388,4 +403,50 @@ protected QueryBuilder parseQuery(XContentParser parser) throws IOException { } } + public void testFromJsonWithValueType() throws IOException { + String json = "{\n" + + " \"terms\": {\n" + + " \"student_id\": [\"OjAAAAEAAAAAAAEAEAAAAG8A3gA=\"],\n" + + " \"boost\" : 1.0,\n" + + " \"value_type\": \"bitmap\"\n" + + " }\n" + + "}"; + + TermsQueryBuilder parsed = (TermsQueryBuilder) parseQuery(json); + checkGeneratedJson(json, parsed); + assertEquals(json, 1, parsed.values().size()); + } + + public void testFromJsonWithValueTypeFail() { + String json = "{\n" + + " \"terms\": {\n" + + " \"student_id\": [\"OjAAAAEAAAAAAAEAEAAAAG8A3gA=\", \"2\"],\n" + + " \"boost\" : 1.0,\n" + + " \"value_type\": \"bitmap\"\n" + + " }\n" + + "}"; + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> parseQuery(json)); + assertEquals( + "Invalid value for bitmap type: Expected a single-element array with a base64 encoded serialized bitmap.", + e.getMessage() + ); + } + + public void testTermsLookupBitmap() throws IOException { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(111); + bitmap.add(333); + byte[] array = new byte[bitmap.serializedSizeInBytes()]; + bitmap.serialize(ByteBuffer.wrap(array)); + randomTerms = List.of(new BytesArray(array)); // this will be fetched back by terms lookup + + TermsQueryBuilder query = new TermsQueryBuilder(INT_FIELD_NAME, randomTermsLookup().store(true)).valueType( + TermsQueryBuilder.ValueType.BITMAP + ); + QueryShardContext context = createShardContext(); + QueryBuilder rewritten = rewriteQuery(query, new QueryShardContext(context)); + Query luceneQuery = rewritten.toQuery(context); + assertTrue(luceneQuery instanceof IndexOrDocValuesQuery); + } } diff --git a/server/src/test/java/org/opensearch/indices/TermsLookupTests.java b/server/src/test/java/org/opensearch/indices/TermsLookupTests.java index 8a7867729f2c1..3f45bc86104dd 100644 --- a/server/src/test/java/org/opensearch/indices/TermsLookupTests.java +++ b/server/src/test/java/org/opensearch/indices/TermsLookupTests.java @@ -48,12 +48,15 @@ public void testTermsLookup() { String id = randomAlphaOfLengthBetween(1, 10); String path = randomAlphaOfLengthBetween(1, 10); String routing = randomAlphaOfLengthBetween(1, 10); + boolean store = randomBoolean(); TermsLookup termsLookup = new TermsLookup(index, id, path); termsLookup.routing(routing); + termsLookup.store(store); assertEquals(index, termsLookup.index()); assertEquals(id, termsLookup.id()); assertEquals(path, termsLookup.path()); assertEquals(routing, termsLookup.routing()); + assertEquals(store, termsLookup.store()); } public void testIllegalArguments() { @@ -109,6 +112,6 @@ public void testXContentParsing() throws IOException { public static TermsLookup randomTermsLookup() { return new TermsLookup(randomAlphaOfLength(10), randomAlphaOfLength(10), randomAlphaOfLength(10).replace('.', '_')).routing( randomBoolean() ? randomAlphaOfLength(10) : null - ); + ).store(randomBoolean()); } }