Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

Add support to compare different numerical types in LOOKUP WHERE clause #784

Merged
merged 10 commits into from
Mar 9, 2021
31 changes: 29 additions & 2 deletions src/validator/LookupValidator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,9 @@ Status LookupValidator::rewriteRelExpr(RelationalExpression* expr) {

std::string prop = la->right()->value().getStr();
// rewrite ConstantExpression
auto c = leftIsAE ? checkConstExpr(right, prop) : checkConstExpr(left, prop);
auto relExprType = expr->kind();
auto c = leftIsAE ? checkConstExpr(right, prop, relExprType, leftIsAE)
: checkConstExpr(left, prop, relExprType, leftIsAE);

if (!c.ok()) {
return Status::SemanticError("expression error : %s", left->toString().c_str());
Expand Down Expand Up @@ -387,7 +389,10 @@ Status LookupValidator::rewriteRelExpr(RelationalExpression* expr) {
return Status::OK();
}

StatusOr<Value> LookupValidator::checkConstExpr(Expression* expr, const std::string& prop) {
StatusOr<Value> LookupValidator::checkConstExpr(Expression* expr,
const std::string& prop,
const Expression::Kind kind,
bool leftIsAE) {
if (!evaluableExpr(expr)) {
return Status::SemanticError("'%s' is not an evaluable expression.",
expr->toString().c_str());
Expand All @@ -397,6 +402,28 @@ StatusOr<Value> LookupValidator::checkConstExpr(Expression* expr, const std::str
auto type = schema->getFieldType(prop);
QueryExpressionContext dummy(nullptr);
auto v = Expression::eval(expr, dummy);
// TODO(Aiee) extract the type cast logic as a method if we decide to support more cross-type
// comparisons.

// Allow different numeric type to compare
if (graph::SchemaUtil::propTypeToValueType(type) == Value::Type::FLOAT && v.isInt()) {
return v.toFloat();
} else if (graph::SchemaUtil::propTypeToValueType(type) == Value::Type::INT && v.isFloat()) {
// col1 < 10.5 range: [min, 11), col1 < 10 range: [min, 10)
double f = v.getFloat();
int iCeil = ceil(f);
int iFloor = floor(f);
if ((leftIsAE && (kind == Expression::Kind::kRelGE || kind == Expression::Kind::kRelLT)) ||
(!leftIsAE && (kind == Expression::Kind::kRelGT || kind == Expression::Kind::kRelLE))) {
// edge case col1 >= 40.0, no need to round up
if (abs(f - iCeil) < kEpsilon) {
return iFloor;
}
return iCeil;
}
return iFloor;
}

if (v.type() != SchemaUtil::propTypeToValueType(type)) {
return Status::SemanticError("Column type error : %s", prop.c_str());
}
Expand Down
5 changes: 4 additions & 1 deletion src/validator/LookupValidator.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ class LookupValidator final : public Validator {

Status rewriteRelExpr(RelationalExpression* expr);

StatusOr<Value> checkConstExpr(Expression* expr, const std::string& prop);
StatusOr<Value> checkConstExpr(Expression* expr,
const std::string& prop,
const Expression::Kind kind,
bool leftIsAE);

Status checkTSService();

Expand Down
177 changes: 176 additions & 1 deletion tests/tck/features/lookup/ByIndex.feature
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
Feature: Lookup by index itself

Background:
Given a graph with space named "nba"
Given an empty graph
And load "nba" csv data to a new space
And wait 3 seconds

Scenario: [1] tag index
When executing query:
Expand Down Expand Up @@ -438,3 +440,176 @@ Feature: Lookup by index itself
LOOKUP ON serve WHERE serve.start_year == serve.end_year YIELD serve.start_year AS startYear
"""
Then a SemanticError should be raised at runtime:

Scenario: [1] Compare INT and FLOAT during IndexScan
When executing query:
"""
LOOKUP ON player WHERE player.age == 40 YIELD player.age AS Age
"""
Then the result should be, in any order:
| VertexID | Age |
| "Dirk Nowitzki" | 40 |
| "Kobe Bryant" | 40 |
When executing query:
"""
LOOKUP ON player WHERE player.age > 40 YIELD player.age AS Age
"""
Then the result should be, in any order:
| VertexID | Age |
| "Grant Hill" | 46 |
| "Jason Kidd" | 45 |
| "Manu Ginobili" | 41 |
| "Ray Allen" | 43 |
| "Shaquile O'Neal" | 47 |
| "Steve Nash" | 45 |
| "Tim Duncan" | 42 |
| "Vince Carter" | 42 |
When executing query:
"""
LOOKUP ON player WHERE player.age >= 40.0 YIELD player.age AS Age
"""
Then the result should be, in any order:
| VertexID | Age |
| "Grant Hill" | 46 |
| "Jason Kidd" | 45 |
| "Manu Ginobili" | 41 |
| "Ray Allen" | 43 |
| "Shaquile O'Neal" | 47 |
| "Steve Nash" | 45 |
| "Tim Duncan" | 42 |
| "Vince Carter" | 42 |
| "Dirk Nowitzki" | 40 |
| "Kobe Bryant" | 40 |
When executing query:
"""
LOOKUP ON player WHERE player.age > 40.5 YIELD player.age AS Age
"""
Then the result should be, in any order:
| VertexID | Age |
| "Grant Hill" | 46 |
| "Jason Kidd" | 45 |
| "Manu Ginobili" | 41 |
| "Ray Allen" | 43 |
| "Shaquile O'Neal" | 47 |
| "Steve Nash" | 45 |
| "Tim Duncan" | 42 |
| "Vince Carter" | 42 |
When executing query:
"""
LOOKUP ON player WHERE player.age >= 40.5 YIELD player.age AS Age
"""
Then the result should be, in any order:
| VertexID | Age |
| "Grant Hill" | 46 |
| "Jason Kidd" | 45 |
| "Manu Ginobili" | 41 |
| "Ray Allen" | 43 |
| "Shaquile O'Neal" | 47 |
| "Steve Nash" | 45 |
| "Tim Duncan" | 42 |
| "Vince Carter" | 42 |
When executing query:
"""
LOOKUP ON player WHERE player.age < 40
YIELD player.age AS Age, player.name AS Name | order by Age DESC, Name| limit 10
"""
Then the result should be, in order, with relax comparison:
| VertexID | Age | Name |
| "Tracy McGrady" | 39 | "Tracy McGrady" |
| "David West" | 38 | "David West" |
| "Paul Gasol" | 38 | "Paul Gasol" |
| "Yao Ming" | 38 | "Yao Ming" |
| "Dwyane Wade" | 37 | "Dwyane Wade" |
| "Amar'e Stoudemire" | 36 | "Amar'e Stoudemire" |
| "Boris Diaw" | 36 | "Boris Diaw" |
| "Tony Parker" | 36 | "Tony Parker" |
| "Carmelo Anthony" | 34 | "Carmelo Anthony" |
| "LeBron James" | 34 | "LeBron James" |
When executing query:
"""
LOOKUP ON player WHERE player.age <= 40
YIELD player.age AS Age, player.name AS Name | order by Age DESC, Name| limit 10
"""
Then the result should be, in order, with relax comparison:
| VertexID | Age | Name |
| "Dirk Nowitzki" | 40 | "Dirk Nowitzki" |
| "Kobe Bryant" | 40 | "Kobe Bryant" |
| "Tracy McGrady" | 39 | "Tracy McGrady" |
| "David West" | 38 | "David West" |
| "Paul Gasol" | 38 | "Paul Gasol" |
| "Yao Ming" | 38 | "Yao Ming" |
| "Dwyane Wade" | 37 | "Dwyane Wade" |
| "Amar'e Stoudemire" | 36 | "Amar'e Stoudemire" |
| "Boris Diaw" | 36 | "Boris Diaw" |
| "Tony Parker" | 36 | "Tony Parker" |

Scenario: [2] Compare INT and FLOAT during IndexScan
Given having executed:
"""
CREATE TAG weight (WEIGHT double)
"""
And having executed:
"""
CREATE TAG INDEX weight_index
ON weight(WEIGHT)
"""
And wait 6 seconds
When executing query:
"""
INSERT VERTEX weight(WEIGHT)
VALUES "Tim Duncan" : (70.5)
"""
Then the execution should be successful
When executing query:
"""
INSERT VERTEX weight(WEIGHT)
VALUES "Tony Parker" : (80.0)
"""
Then the execution should be successful
When executing query:
"""
LOOKUP ON weight
WHERE weight.WEIGHT > 70;
"""
Then the result should be, in any order:
| VertexID |
| "Tim Duncan" |
| "Tony Parker" |
When executing query:
"""
LOOKUP ON weight
WHERE weight.WEIGHT > 70.4;
"""
Then the result should be, in any order:
| VertexID |
| "Tim Duncan" |
| "Tony Parker" |
When executing query:
"""
LOOKUP ON weight
WHERE weight.WEIGHT >= 70.5;
"""
Then the result should be, in any order:
| VertexID |
| "Tim Duncan" |
| "Tony Parker" |
Then drop the used space

# (TODO) Unsupported cases due to the lack of float precision
# When executing query:
# """
# LOOKUP ON weight
# WHERE weight.WEIGHT > 70.5;
# """
# Then the result should be, in any order:
# | VertexID |
# | "Tony Parker" |
# When executing query:
# """
# LOOKUP ON weight
# WHERE weight.WEIGHT <= 80.0;
# """
# Then the result should be, in any order:
# | VertexID |
# | "Tim Duncan" |
# | "Tony Parker" |
Loading