Skip to content

Commit 5550d32

Browse files
committed
restructure trie to store values as a set
Signed-off-by: Ruirui Zhang <mariazrr@amazon.com>
1 parent 4f987eb commit 5550d32

File tree

5 files changed

+48
-52
lines changed

5 files changed

+48
-52
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99
- Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920))
1010
- Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060))
1111
- The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065))
12+
- [Rule-based Auto-tagging] restructure the in-memory trie to store values as a set ([#19344](https://github.com/opensearch-project/OpenSearch/pull/19344))
1213
- Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054))
1314
- Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091))
1415
- Use S3CrtClient for higher throughput while uploading files to S3 ([#18800](https://github.com/opensearch-project/OpenSearch/pull/18800))

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
package org.opensearch.rule.storage;
1010

11-
import java.util.Optional;
11+
import java.util.List;
12+
import java.util.Set;
1213

1314
/**
1415
* This interface provides apis to store Rule attribute values
@@ -23,16 +24,17 @@ public interface AttributeValueStore<K, V> {
2324

2425
/**
2526
* removes the key and associated value from attribute value store
26-
* @param key to be removed
27+
* @param key key of the value to be removed
28+
* @param value to be removed
2729
*/
28-
void remove(K key);
30+
void remove(K key, V value);
2931

3032
/**
3133
* Returns the value associated with the key
3234
* @param key in the data structure
3335
* @return
3436
*/
35-
Optional<V> get(K key);
37+
List<Set<V>> get(K key);
3638

3739
/**
3840
* Clears all the keys and their associated values from the attribute value store

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java

Lines changed: 28 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010

1111
import org.apache.commons.collections4.trie.PatriciaTrie;
1212

13-
import java.util.Map;
14-
import java.util.Optional;
13+
import java.util.ArrayList;
14+
import java.util.HashSet;
15+
import java.util.List;
16+
import java.util.Set;
1517
import java.util.concurrent.locks.ReentrantReadWriteLock;
1618

1719
/**
@@ -21,7 +23,7 @@
2123
* ref: https://commons.apache.org/proper/commons-collections/javadocs/api-4.4/org/apache/commons/collections4/trie/PatriciaTrie.html
2224
*/
2325
public class DefaultAttributeValueStore<K extends String, V> implements AttributeValueStore<K, V> {
24-
private final PatriciaTrie<V> trie;
26+
private final PatriciaTrie<Set<V>> trie;
2527
private static final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
2628
private static final ReentrantReadWriteLock.ReadLock readLock = lock.readLock();
2729
private static final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock();
@@ -37,74 +39,58 @@ public DefaultAttributeValueStore() {
3739
* Main constructor
3840
* @param trie A Patricia Trie
3941
*/
40-
public DefaultAttributeValueStore(PatriciaTrie<V> trie) {
42+
public DefaultAttributeValueStore(PatriciaTrie<Set<V>> trie) {
4143
this.trie = trie;
4244
}
4345

4446
@Override
4547
public void put(K key, V value) {
4648
writeLock.lock();
4749
try {
48-
trie.put(key, value);
50+
trie.computeIfAbsent(key, k -> new HashSet<>()).add(value);
4951
} finally {
5052
writeLock.unlock();
5153
}
5254
}
5355

5456
@Override
55-
public void remove(String key) {
57+
public void remove(K key, V value) {
5658
writeLock.lock();
5759
try {
58-
trie.remove(key);
60+
Set<V> values = trie.get(key);
61+
if (values != null && values.contains(value)) {
62+
values.remove(value);
63+
if (values.isEmpty()) {
64+
trie.remove(key);
65+
}
66+
}
5967
} finally {
6068
writeLock.unlock();
6169
}
6270
}
6371

6472
@Override
65-
public Optional<V> get(String key) {
73+
public List<Set<V>> get(String key) {
6674
readLock.lock();
6775
try {
68-
/**
69-
* Since we are inserting prefixes into the trie and searching for larger strings
70-
* It is important to find the largest matching prefix key in the trie efficiently
71-
* Hence we can do binary search
72-
*/
73-
final String longestMatchingPrefix = findLongestMatchingPrefix(key);
76+
List<Set<V>> results = new ArrayList<>();
77+
StringBuilder prefixBuilder = new StringBuilder(key);
7478

75-
/**
76-
* Now there are following cases for this prefix
77-
* 1. There is a Rule which has this prefix as one of the attribute values. In this case we should return the
78-
* Rule's label otherwise send empty
79-
*/
80-
for (Map.Entry<String, V> possibleMatch : trie.prefixMap(longestMatchingPrefix).entrySet()) {
81-
if (key.startsWith(possibleMatch.getKey())) {
82-
return Optional.of(possibleMatch.getValue());
79+
for (int i = key.length(); i >= 0; i--) {
80+
String prefix = prefixBuilder.toString();
81+
Set<V> value = trie.get(prefix);
82+
if (value != null && !value.isEmpty()) {
83+
results.add(value);
84+
}
85+
if (!prefixBuilder.isEmpty()) {
86+
prefixBuilder.deleteCharAt(prefixBuilder.length() - 1);
8387
}
8488
}
89+
90+
return results;
8591
} finally {
8692
readLock.unlock();
8793
}
88-
return Optional.empty();
89-
}
90-
91-
private String findLongestMatchingPrefix(String key) {
92-
int low = 0;
93-
int high = key.length() - 1;
94-
95-
while (low < high) {
96-
int mid = (high + low + 1) / 2;
97-
/**
98-
* This operation has O(1) complexity because prefixMap returns only the iterator
99-
*/
100-
if (!trie.prefixMap(key.substring(0, mid)).isEmpty()) {
101-
low = mid;
102-
} else {
103-
high = mid - 1;
104-
}
105-
}
106-
107-
return key.substring(0, low);
10894
}
10995

11096
@Override

modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,24 @@ public void setUp() throws Exception {
2626

2727
public void testPut() {
2828
subjectUnderTest.put("foo", "bar");
29-
assertEquals("bar", subjectUnderTest.get("foo").get());
29+
assertEquals("bar", subjectUnderTest.get("foo").getFirst().iterator().next());
30+
subjectUnderTest.put("foo", "sing");
31+
assertEquals(1, subjectUnderTest.get("foo").size());
32+
assertEquals(2, subjectUnderTest.get("foo").get(0).size());
33+
assertTrue(subjectUnderTest.get("foo").get(0).contains("sing"));
3034
}
3135

3236
public void testRemove() {
3337
subjectUnderTest.put("foo", "bar");
34-
subjectUnderTest.remove("foo");
38+
subjectUnderTest.remove("foo", "bar");
3539
assertEquals(0, subjectUnderTest.size());
3640
}
3741

3842
public void tesGet() {
3943
subjectUnderTest.put("foo", "bar");
40-
assertEquals("bar", subjectUnderTest.get("foo").get());
44+
assertEquals("bar", subjectUnderTest.get("foo").getFirst());
45+
subjectUnderTest.put("foo", "sing");
46+
assertEquals(2, subjectUnderTest.get("foo").size());
4147
}
4248

4349
public void testGetWhenNoProperPrefixIsPresent() {

modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ private void perform(Rule rule, BiConsumer<Map.Entry<Attribute, Set<String>>, Ru
6666
private void removeOperation(Map.Entry<Attribute, Set<String>> attributeEntry, Rule rule) {
6767
AttributeValueStore<String, String> valueStore = attributeValueStoreFactory.getAttributeValueStore(attributeEntry.getKey());
6868
for (String value : attributeEntry.getValue()) {
69-
valueStore.remove(value.replace(WILDCARD, ""));
69+
valueStore.remove(value.replace(WILDCARD, ""), rule.getFeatureValue());
7070
}
7171
}
7272

@@ -92,12 +92,13 @@ public Optional<String> evaluateLabel(List<AttributeExtractor<String>> attribute
9292
attributeExtractor.getAttribute()
9393
);
9494
for (String value : attributeExtractor.extract()) {
95-
Optional<String> possibleMatch = valueStore.get(value);
95+
List<Set<String>> candidateMatches = valueStore.get(value);
9696

97-
if (possibleMatch.isEmpty()) {
97+
if (candidateMatches == null || candidateMatches.isEmpty()) {
9898
return Optional.empty();
9999
}
100100

101+
Optional<String> possibleMatch = candidateMatches.get(0).stream().findAny();
101102
if (result.isEmpty()) {
102103
result = possibleMatch;
103104
} else {

0 commit comments

Comments
 (0)