Skip to content

Commit 6433f37

Browse files
committed
restructure trie to store values as a set
Signed-off-by: Ruirui Zhang <mariazrr@amazon.com>
1 parent 488149f commit 6433f37

File tree

5 files changed

+72
-51
lines changed

5 files changed

+72
-51
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99
- Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920))
1010
- Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060))
1111
- The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065) ([#19097](https://github.com/opensearch-project/OpenSearch/pull/19097)))
12+
- [Rule-based Auto-tagging] restructure the in-memory trie to store values as a set ([#19344](https://github.com/opensearch-project/OpenSearch/pull/19344))
1213
- Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054))
1314
- Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091))
1415
- Use S3CrtClient for higher throughput while uploading files to S3 ([#18800](https://github.com/opensearch-project/OpenSearch/pull/18800))

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/AttributeValueStore.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
package org.opensearch.rule.storage;
1010

11+
import java.util.ArrayList;
12+
import java.util.List;
1113
import java.util.Optional;
14+
import java.util.Set;
1215

1316
/**
1417
* This interface provides apis to store Rule attribute values
@@ -21,16 +24,32 @@ public interface AttributeValueStore<K, V> {
2124
*/
2225
void put(K key, V value);
2326

27+
/**
28+
* removes the key and associated value from attribute value store
29+
* @param key key of the value to be removed
30+
* @param value to be removed
31+
*/
32+
default void remove(K key, V value) {
33+
remove(key);
34+
}
35+
2436
/**
2537
* removes the key and associated value from attribute value store
2638
* @param key to be removed
2739
*/
2840
void remove(K key);
2941

42+
/**
43+
* Returns the values associated with the key
44+
* @param key in the data structure
45+
*/
46+
default List<Set<V>> getAll(K key) {
47+
return new ArrayList<>();
48+
}
49+
3050
/**
3151
* Returns the value associated with the key
3252
* @param key in the data structure
33-
* @return
3453
*/
3554
Optional<V> get(K key);
3655

modules/autotagging-commons/common/src/main/java/org/opensearch/rule/storage/DefaultAttributeValueStore.java

Lines changed: 35 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010

1111
import org.apache.commons.collections4.trie.PatriciaTrie;
1212

13-
import java.util.Map;
13+
import java.util.ArrayList;
14+
import java.util.HashSet;
15+
import java.util.List;
1416
import java.util.Optional;
17+
import java.util.Set;
1518
import java.util.concurrent.locks.ReentrantReadWriteLock;
1619

1720
/**
@@ -21,7 +24,7 @@
2124
* ref: https://commons.apache.org/proper/commons-collections/javadocs/api-4.4/org/apache/commons/collections4/trie/PatriciaTrie.html
2225
*/
2326
public class DefaultAttributeValueStore<K extends String, V> implements AttributeValueStore<K, V> {
24-
private final PatriciaTrie<V> trie;
27+
private final PatriciaTrie<Set<V>> trie;
2528
private static final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
2629
private static final ReentrantReadWriteLock.ReadLock readLock = lock.readLock();
2730
private static final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock();
@@ -37,74 +40,65 @@ public DefaultAttributeValueStore() {
3740
* Main constructor
3841
* @param trie A Patricia Trie
3942
*/
40-
public DefaultAttributeValueStore(PatriciaTrie<V> trie) {
43+
public DefaultAttributeValueStore(PatriciaTrie<Set<V>> trie) {
4144
this.trie = trie;
4245
}
4346

4447
@Override
4548
public void put(K key, V value) {
4649
writeLock.lock();
4750
try {
48-
trie.put(key, value);
51+
trie.computeIfAbsent(key, k -> new HashSet<>()).add(value);
4952
} finally {
5053
writeLock.unlock();
5154
}
5255
}
5356

5457
@Override
55-
public void remove(String key) {
58+
public void remove(K key, V value) {
5659
writeLock.lock();
5760
try {
58-
trie.remove(key);
61+
trie.computeIfPresent(key, (k, values) -> {
62+
values.remove(value);
63+
return values.isEmpty() ? null : values;
64+
});
5965
} finally {
6066
writeLock.unlock();
6167
}
6268
}
6369

6470
@Override
65-
public Optional<V> get(String key) {
71+
public void remove(K key) {
72+
throw new UnsupportedOperationException("This remove(K key) function is not supported within DefaultAttributeValueStore.");
73+
}
74+
75+
@Override
76+
public Optional<V> get(K key) {
77+
throw new UnsupportedOperationException("This get(K key) function is not supported within DefaultAttributeValueStore.");
78+
}
79+
80+
@Override
81+
public List<Set<V>> getAll(String key) {
6682
readLock.lock();
6783
try {
68-
/**
69-
* Since we are inserting prefixes into the trie and searching for larger strings
70-
* It is important to find the largest matching prefix key in the trie efficiently
71-
* Hence we can do binary search
72-
*/
73-
final String longestMatchingPrefix = findLongestMatchingPrefix(key);
84+
List<Set<V>> results = new ArrayList<>();
85+
StringBuilder prefixBuilder = new StringBuilder(key);
7486

75-
/**
76-
* Now there are following cases for this prefix
77-
* 1. There is a Rule which has this prefix as one of the attribute values. In this case we should return the
78-
* Rule's label otherwise send empty
79-
*/
80-
for (Map.Entry<String, V> possibleMatch : trie.prefixMap(longestMatchingPrefix).entrySet()) {
81-
if (key.startsWith(possibleMatch.getKey())) {
82-
return Optional.of(possibleMatch.getValue());
87+
for (int i = key.length(); i >= 0; i--) {
88+
String prefix = prefixBuilder.toString();
89+
Set<V> value = trie.get(prefix);
90+
if (value != null && !value.isEmpty()) {
91+
results.add(value);
92+
}
93+
if (!prefixBuilder.isEmpty()) {
94+
prefixBuilder.deleteCharAt(prefixBuilder.length() - 1);
8395
}
8496
}
97+
98+
return results;
8599
} finally {
86100
readLock.unlock();
87101
}
88-
return Optional.empty();
89-
}
90-
91-
private String findLongestMatchingPrefix(String key) {
92-
int low = 0;
93-
int high = key.length() - 1;
94-
95-
while (low < high) {
96-
int mid = (high + low + 1) / 2;
97-
/**
98-
* This operation has O(1) complexity because prefixMap returns only the iterator
99-
*/
100-
if (!trie.prefixMap(key.substring(0, mid)).isEmpty()) {
101-
low = mid;
102-
} else {
103-
high = mid - 1;
104-
}
105-
}
106-
107-
return key.substring(0, low);
108102
}
109103

110104
@Override

modules/autotagging-commons/common/src/test/java/org/opensearch/rule/storage/AttributeValueStoreTests.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,27 +26,33 @@ public void setUp() throws Exception {
2626

2727
public void testPut() {
2828
subjectUnderTest.put("foo", "bar");
29-
assertEquals("bar", subjectUnderTest.get("foo").get());
29+
assertEquals("bar", subjectUnderTest.getAll("foo").getFirst().iterator().next());
30+
subjectUnderTest.put("foo", "sing");
31+
assertEquals(1, subjectUnderTest.getAll("foo").size());
32+
assertEquals(2, subjectUnderTest.getAll("foo").get(0).size());
33+
assertTrue(subjectUnderTest.getAll("foo").get(0).contains("sing"));
3034
}
3135

3236
public void testRemove() {
3337
subjectUnderTest.put("foo", "bar");
34-
subjectUnderTest.remove("foo");
38+
subjectUnderTest.remove("foo", "bar");
3539
assertEquals(0, subjectUnderTest.size());
3640
}
3741

3842
public void tesGet() {
3943
subjectUnderTest.put("foo", "bar");
40-
assertEquals("bar", subjectUnderTest.get("foo").get());
44+
assertEquals("bar", subjectUnderTest.getAll("foo").getFirst());
45+
subjectUnderTest.put("foo", "sing");
46+
assertEquals(2, subjectUnderTest.getAll("foo").size());
4147
}
4248

4349
public void testGetWhenNoProperPrefixIsPresent() {
4450
subjectUnderTest.put("foo", "bar");
4551
subjectUnderTest.put("foodip", "sing");
46-
assertTrue(subjectUnderTest.get("foxtail").isEmpty());
52+
assertTrue(subjectUnderTest.getAll("foxtail").isEmpty());
4753
subjectUnderTest.put("fox", "lucy");
4854

49-
assertFalse(subjectUnderTest.get("foxtail").isEmpty());
55+
assertFalse(subjectUnderTest.getAll("foxtail").isEmpty());
5056
}
5157

5258
public void testClear() {
@@ -97,7 +103,7 @@ public void run() {
97103
try {
98104
Thread.sleep(random().nextInt(100));
99105
for (String key : toReadKeys) {
100-
subjectUnderTest.get(key);
106+
subjectUnderTest.getAll(key);
101107
}
102108
} catch (InterruptedException e) {}
103109
}

modules/autotagging-commons/src/main/java/org/opensearch/rule/InMemoryRuleProcessingService.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ private void perform(Rule rule, BiConsumer<Map.Entry<Attribute, Set<String>>, Ru
6666
private void removeOperation(Map.Entry<Attribute, Set<String>> attributeEntry, Rule rule) {
6767
AttributeValueStore<String, String> valueStore = attributeValueStoreFactory.getAttributeValueStore(attributeEntry.getKey());
6868
for (String value : attributeEntry.getValue()) {
69-
valueStore.remove(value.replace(WILDCARD, ""));
69+
valueStore.remove(value.replace(WILDCARD, ""), rule.getFeatureValue());
7070
}
7171
}
7272

@@ -92,12 +92,13 @@ public Optional<String> evaluateLabel(List<AttributeExtractor<String>> attribute
9292
attributeExtractor.getAttribute()
9393
);
9494
for (String value : attributeExtractor.extract()) {
95-
Optional<String> possibleMatch = valueStore.get(value);
95+
List<Set<String>> candidateMatches = valueStore.getAll(value);
9696

97-
if (possibleMatch.isEmpty()) {
97+
if (candidateMatches == null || candidateMatches.isEmpty()) {
9898
return Optional.empty();
9999
}
100100

101+
Optional<String> possibleMatch = candidateMatches.get(0).stream().findAny();
101102
if (result.isEmpty()) {
102103
result = possibleMatch;
103104
} else {

0 commit comments

Comments
 (0)