Skip to content

Commit

Permalink
Fixed keyword range ordinal seek status logic and added keyword in tests
Browse files Browse the repository at this point in the history
Signed-off-by: expani <anijainc@amazon.com>
  • Loading branch information
expani committed Jan 24, 2025
1 parent 973edf5 commit 5e7a711
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, StarTreeN
}

@Override
public void collectChildrenInRange(Long low, Long high, StarTreeNodeCollector collector) throws IOException {
public void collectChildrenInRange(long low, long high, StarTreeNodeCollector collector) throws IOException {
if (low <= high) {
FixedLengthStarTreeNode lowStarTreeNode = binarySearchChild(low, true, null);
if (lowStarTreeNode != null) {
Expand All @@ -307,21 +307,33 @@ public void collectChildrenInRange(Long low, Long high, StarTreeNodeCollector co
}
}

/**
*
* @param dimensionValue : The dimension to match.
* @param matchNextHighest : If true then we try to return @dimensionValue or the next Highest. Else, we return @dimensionValue or the next Lowest.
* @param lastMatchedNode : If not null, we begin the binary search from the node after this.
* @return : Matched node or null.
* @throws IOException
*/
private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, boolean matchNextHighest, StarTreeNode lastMatchedNode)
throws IOException {

int low = firstChildId;
int tempLow = low;
int starNodeId, nullNodeId;
starNodeId = nullNodeId = Integer.MIN_VALUE;

// if the current node is star node, increment the tempLow to reduce the search space
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempLow), StarTreeNodeType.STAR) != null) {
starNodeId = tempLow;
tempLow++;
}

int high = getInt(LAST_CHILD_ID_OFFSET);
int tempHigh = high;
// if the current node is null node, decrement the tempHigh to reduce the search space
if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempHigh), StarTreeNodeType.NULL) != null) {
nullNodeId = tempHigh;
tempHigh--;
}

Expand All @@ -346,8 +358,7 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, boolean m
if (midDimensionValue < dimensionValue) { // Going to the right from mid to search next
tempLow = mid + 1;
// We are going out of bounds for this dimension on the right side.
if ((tempLow > high)
|| matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempLow), StarTreeNodeType.NULL) != null) {
if (tempLow > high || tempLow == nullNodeId) {
return matchNextHighest ? null : midNode;
} else {
FixedLengthStarTreeNode nodeGreaterThanMid = new FixedLengthStarTreeNode(in, tempLow);
Expand All @@ -358,8 +369,7 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue, boolean m
} else { // Going to the left from mid to search next
tempHigh = mid - 1;
// We are going out of bounds for this dimension on the left side.
if ((tempHigh < low)
|| matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, tempHigh), StarTreeNodeType.STAR) != null) {
if (tempHigh < low || tempHigh == starNodeId) {
return matchNextHighest ? midNode : null;
} else {
FixedLengthStarTreeNode nodeLessThanMid = new FixedLengthStarTreeNode(in, tempHigh);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public interface StarTreeNode {

StarTreeNode getChildForDimensionValue(Long dimensionValue, StarTreeNode lastMatchedChild) throws IOException;

void collectChildrenInRange(Long low, Long high, StarTreeNodeCollector collector) throws IOException;
void collectChildrenInRange(long low, long high, StarTreeNodeCollector collector) throws IOException;

/**
* Returns the child star node for a node in the star-tree.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ public void initialiseForSegment(StarTreeValues starTreeValues, SearchContext se
Optional<Long> lowOrdinalFound = dimensionFilterMapper.getMatchingOrdinal(dimensionName, low, starTreeValues, lowMatchType);
if (lowOrdinalFound.isPresent()) {
lowOrdinal = lowOrdinalFound.get();
} else { // This is only valid for Non-numeric fields.
} else {
// This is only valid for Non-numeric fields.
// High can't be found since nothing >= low exists.
lowOrdinal = highOrdinal = Long.MAX_VALUE;
skipRangeCollection = true;
return; // High can't be found since nothing >= low exists.
return;
}
}
highOrdinal = Long.MAX_VALUE;
Expand All @@ -76,7 +78,6 @@ public void matchStarTreeNodes(StarTreeNode parentNode, StarTreeValues starTreeV

@Override
public boolean matchDimValue(long ordinal, StarTreeValues starTreeValues) {
// FIXME : Needs to be based on Match Type.
return lowOrdinal <= ordinal && ordinal <= highOrdinal;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.SHORT;
import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.hasDecimalPart;
import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.signum;
import static org.opensearch.search.startree.filter.DimensionFilter.MatchType.GTE;

public interface DimensionFilterMapper {
DimensionFilter getExactMatchFilter(MappedFieldType mappedFieldType, List<Object> rawValues);
Expand Down Expand Up @@ -345,20 +346,36 @@ public Optional<Long> getMatchingOrdinal(
} else {
TermsEnum termsEnum = sortedSetIterator.termsEnum();
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil((BytesRef) value);
if (matchType == DimensionFilter.MatchType.GT || matchType == DimensionFilter.MatchType.GTE) {
// We reached the end and couldn't match anything, else we found a term which matches.
return (seekStatus == TermsEnum.SeekStatus.END) ? Optional.empty() : Optional.of(termsEnum.ord());
} else { // LT || LTE
// If we found a term just greater, then return ordinal of the term just before it.
if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) {
long ordGreaterThanValue = termsEnum.ord();
// Checking if we are in bounds for satisfying LT
return ((ordGreaterThanValue - 1) < sortedSetIterator.getValueCount())
? Optional.of(ordGreaterThanValue - 1)
: Optional.empty();
} else {
return Optional.of(termsEnum.ord());
}
// We reached the end and couldn't match anything, else we found a term which matches.
// LT || LTE
// If we found a term just greater, then return ordinal of the term just before it.
// Checking if we are in bounds for satisfying LT
// Checking if we are in bounds for satisfying LT
switch (matchType) {
case GTE:
return seekStatus == TermsEnum.SeekStatus.END ? Optional.empty() : Optional.of(termsEnum.ord());
case GT:
return switch (seekStatus) {
case END -> Optional.empty();
case FOUND -> ((termsEnum.ord() + 1) < sortedSetIterator.getValueCount())
? Optional.of(termsEnum.ord() + 1)
: Optional.empty();
case NOT_FOUND -> Optional.of(termsEnum.ord());
};
case LTE:
if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) {
return ((termsEnum.ord() - 1) >= 0) ? Optional.of(termsEnum.ord() - 1) : Optional.empty();
} else {
return Optional.of(termsEnum.ord());
}
case LT:
if (seekStatus == TermsEnum.SeekStatus.END) {
return Optional.of(termsEnum.ord());
} else {
return ((termsEnum.ord() - 1) >= 0) ? Optional.of(termsEnum.ord() - 1) : Optional.empty();
}
default:
throw new IllegalStateException("unexpected matchType " + matchType);
}
}
} catch (IOException e) {
Expand All @@ -371,14 +388,16 @@ public Optional<Long> getMatchingOrdinal(

// TODO : Think around making TermBasedFT#indexedValueForSearch() accessor public for reuse here.
private Object parseRawKeyword(String field, Object rawValue, KeywordFieldType keywordFieldType) {
Object parsedValue;
if (keywordFieldType.getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
parsedValue = BytesRefs.toBytesRef(rawValue);
} else {
if (rawValue instanceof BytesRef) {
rawValue = ((BytesRef) rawValue).utf8ToString();
Object parsedValue = null;
if (rawValue != null) {
if (keywordFieldType.getTextSearchInfo().getSearchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
parsedValue = BytesRefs.toBytesRef(rawValue);
} else {
if (rawValue instanceof BytesRef) {
rawValue = ((BytesRef) rawValue).utf8ToString();
}
parsedValue = keywordFieldType.getTextSearchInfo().getSearchAnalyzer().normalize(field, rawValue.toString());
}
parsedValue = keywordFieldType.getTextSearchInfo().getSearchAnalyzer().normalize(field, rawValue.toString());
}
return parsedValue;
}
Expand Down
Loading

0 comments on commit 5e7a711

Please sign in to comment.