Skip to content

Commit

Permalink
Improve like predicate speed
Browse files Browse the repository at this point in the history
  • Loading branch information
linxt20 committed Sep 22, 2024
1 parent a37b0ed commit 97e5c35
Show file tree
Hide file tree
Showing 18 changed files with 297 additions and 142 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public void testCompareOperations() {
"root.test.sg1.s1 = root.test.sg1.s2",
"root.test.sg1.s1 > root.test.sg1.s2",
"root.test.sg1.s1 < root.test.sg1.s2",
"root.test.sg1.s5 LIKE '^test$'",
"root.test.sg1.s5 LIKE 'LikePattern{pattern='test', escape=\\}'",
"root.test.sg1.s2 IN (1,2)",
"root.test.sg1.s2 BETWEEN 1 AND 3",
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ private static void createTable() {

statement.execute(
"create table vehicle2(device_id STRING ID, s1 FLOAT MEASUREMENT, s2 DOUBLE MEASUREMENT, empty DOUBLE MEASUREMENT)");
statement.execute(
"create table likeTest(device_id STRING ID, s1 TEXT MEASUREMENT, s2 STRING MEASUREMENT)");
} catch (SQLException throwable) {
fail(throwable.getMessage());
}
Expand All @@ -101,6 +103,12 @@ private static void generateData() {
statement.execute("insert into vehicle1(time,device_id,s5) values(1, 'd1', '2024-01-01')");
statement.execute("insert into vehicle1(time,device_id,s5) values(2, 'd1','2024-01-02')");
statement.execute("insert into vehicle1(time,device_id,s5) values(3, 'd1','2024-01-03')");
statement.execute(
"insert into likeTest(time,device_id,s1,s2) values(1, 'd1','abcdef', '123456')");
statement.execute(
"insert into likeTest(time,device_id,s1,s2) values(2, 'd1','_abcdef', '123\\456')");
statement.execute(
"insert into likeTest(time,device_id,s1,s2) values(3, 'd1','abcdef%', '123#456')");
} catch (SQLException throwable) {
fail(throwable.getMessage());
}
Expand Down Expand Up @@ -403,45 +411,63 @@ public void testBetweenExpression() {
public void testRegularLikeInExpressions() {
try (Connection connection = EnvFactory.getEnv().getConnection(BaseEnv.TABLE_SQL_DIALECT);
Statement statement = connection.createStatement()) {
// String query =
// "SELECT s1 FROM vehicle1 where device_id='d1' WHERE s3 LIKE '_' && s3 REGEXP
// '[0-9]' && s3 IN ('4', '2', '3')";
// try (ResultSet rs = statement.executeQuery(query)) {
// for (int i = 2; i <= 4; i++) {
// Assert.assertTrue(rs.next());
// Assert.assertEquals(i, rs.getLong(1));
// }
// Assert.assertFalse(rs.next());
// }

// String query2 =
// "SELECT s1 FROM vehicle1 where device_id='d1' WHERE s4 LIKE '_' && s4 REGEXP
// '[0-9]' && s4 IN ('4', '2', '3')";
// try (ResultSet rs = statement.executeQuery(query2)) {
// for (int i = 2; i <= 4; i++) {
// Assert.assertTrue(rs.next());
// Assert.assertEquals(i, rs.getLong(1));
// }
// Assert.assertFalse(rs.next());
// }
statement.execute("USE " + DATABASE_NAME);
String[] ans = new String[] {"abcdef"};
String query = "SELECT s1 FROM likeTest where s1 LIKE 'abcdef'";
try (ResultSet rs = statement.executeQuery(query)) {
for (int i = 2; i < 3; i++) {
Assert.assertTrue(rs.next());
Assert.assertEquals(ans[i - 2], rs.getString(1));
}
Assert.assertFalse(rs.next());
}

// String query3 =
// "SELECT time,s1 FROM vehicle1 where device_id='d1' and s5 IN ('2024-01-01',
// '2024-01-02', '2024-01-03')";
// try (ResultSet rs = statement.executeQuery(query3)) {
// for (int i = 1; i <= 3; i++) {
// Assert.assertTrue(rs.next());
// Assert.assertEquals(i, rs.getLong(1));
// }
// Assert.assertFalse(rs.next());
// }

String query4 = "SELECT time,s1 FROM vehicle1 where device_id='d1' and s6 IN (1, 2, 3)";
try (ResultSet rs = statement.executeQuery(query4)) {
for (int i = 1; i <= 3; i++) {
ans = new String[] {"_abcdef"};
query = "SELECT s1 FROM likeTest where s1 LIKE '\\_%' escape '\\'";
try (ResultSet rs = statement.executeQuery(query)) {
for (int i = 2; i < 3; i++) {
Assert.assertTrue(rs.next());
Assert.assertEquals(i, rs.getLong(1));
Assert.assertEquals(ans[i - 2], rs.getString(1));
}
Assert.assertFalse(rs.next());
}

ans = new String[] {"abcdef", "_abcdef", "abcdef%"};
query = "SELECT s1 FROM likeTest where s1 LIKE '%abcde%' escape '\\'";
try (ResultSet rs = statement.executeQuery(query)) {
for (int i = 2; i < 5; i++) {
Assert.assertTrue(rs.next());
Assert.assertEquals(ans[i - 2], rs.getString(1));
}
Assert.assertFalse(rs.next());
}

ans = new String[] {"123456"};
query = "SELECT s2 FROM likeTest where s2 LIKE '12345_'";
try (ResultSet rs = statement.executeQuery(query)) {
for (int i = 2; i < 3; i++) {
Assert.assertTrue(rs.next());
Assert.assertEquals(ans[i - 2], rs.getString(1));
}
Assert.assertFalse(rs.next());
}

ans = new String[] {"123\\456"};
query = "SELECT s2 FROM likeTest where s2 LIKE '%\\\\%' escape '\\'";
try (ResultSet rs = statement.executeQuery(query)) {
for (int i = 2; i < 3; i++) {
Assert.assertTrue(rs.next());
Assert.assertEquals(ans[i - 2], rs.getString(1));
}
Assert.assertFalse(rs.next());
}

ans = new String[] {"123#456"};
query = "SELECT s2 FROM likeTest where s2 LIKE '123##456' escape '#'";
try (ResultSet rs = statement.executeQuery(query)) {
for (int i = 2; i < 3; i++) {
Assert.assertTrue(rs.next());
Assert.assertEquals(ans[i - 2], rs.getString(1));
}
Assert.assertFalse(rs.next());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@
import org.apache.iotdb.db.queryengine.transformation.dag.column.multi.LogicalOrMultiColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.ternary.BetweenColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.IsNullColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LikeColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LogicNotColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.RegularColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.AbsColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.AcosColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.AsinColumnTransformer;
Expand Down Expand Up @@ -148,6 +148,7 @@
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.scalar.UpperColumnTransformer;

import org.apache.tsfile.common.conf.TSFileConfig;
import org.apache.tsfile.common.regexp.LikePattern;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.read.common.block.column.BinaryColumn;
import org.apache.tsfile.read.common.block.column.BooleanColumn;
Expand All @@ -173,14 +174,13 @@
import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoMetadataChecker.isStringLiteral;
import static org.apache.iotdb.db.queryengine.plan.relational.type.InternalTypeManager.getTSDataType;
import static org.apache.iotdb.db.queryengine.plan.relational.type.TypeSignatureTranslator.toTypeSignature;
import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter;
import static org.apache.tsfile.read.common.type.BlobType.BLOB;
import static org.apache.tsfile.read.common.type.BooleanType.BOOLEAN;
import static org.apache.tsfile.read.common.type.DoubleType.DOUBLE;
import static org.apache.tsfile.read.common.type.IntType.INT32;
import static org.apache.tsfile.read.common.type.LongType.INT64;
import static org.apache.tsfile.read.common.type.StringType.STRING;
import static org.apache.tsfile.utils.RegexUtils.compileRegex;
import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex;

public class ColumnTransformerBuilder
extends AstVisitor<ColumnTransformer, ColumnTransformerBuilder.Context> {
Expand Down Expand Up @@ -1153,13 +1153,18 @@ protected ColumnTransformer visitLikePredicate(LikePredicate node, Context conte
context.cache.put(node, identity);
} else {
ColumnTransformer childColumnTransformer = process(node.getValue(), context);
Optional<String> escapeValueOpt =
node.getEscape().isPresent()
? Optional.ofNullable(((StringLiteral) node.getEscape().get()).getValue())
: Optional.empty();
context.cache.put(
node,
new RegularColumnTransformer(
new LikeColumnTransformer(
BOOLEAN,
childColumnTransformer,
compileRegex(
parseLikePatternToRegex(((StringLiteral) node.getPattern()).getValue()))));
LikePattern.compile(
((StringLiteral) node.getPattern()).getValue(),
getEscapeCharacter(escapeValueOpt))));
}
}
ColumnTransformer res = context.cache.get(node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,36 +24,36 @@
import org.apache.iotdb.db.queryengine.plan.expression.ExpressionType;
import org.apache.iotdb.db.queryengine.plan.expression.visitor.ExpressionVisitor;

import org.apache.tsfile.common.regexp.LikePattern;
import org.apache.tsfile.utils.RamUsageEstimator;
import org.apache.tsfile.utils.ReadWriteIOUtils;

import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.regex.Pattern;
import java.util.Optional;

import static org.apache.tsfile.utils.RegexUtils.compileRegex;
import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex;
import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter;

public class LikeExpression extends UnaryExpression {

private static final long INSTANCE_SIZE =
RamUsageEstimator.shallowSizeOfInstance(LikeExpression.class);

private final String patternString;
private final Pattern pattern;
private final LikePattern pattern;

private final boolean isNot;

public LikeExpression(Expression expression, String patternString, boolean isNot) {
super(expression);
this.patternString = patternString;
this.isNot = isNot;
pattern = compileRegex(parseLikePatternToRegex(patternString));
pattern = LikePattern.compile(patternString, getEscapeCharacter(Optional.of("\\")));
}

public LikeExpression(
Expression expression, String patternString, Pattern pattern, boolean isNot) {
Expression expression, String patternString, LikePattern pattern, boolean isNot) {
super(expression);
this.patternString = patternString;
this.pattern = pattern;
Expand All @@ -64,14 +64,14 @@ public LikeExpression(ByteBuffer byteBuffer) {
super(Expression.deserialize(byteBuffer));
patternString = ReadWriteIOUtils.readString(byteBuffer);
isNot = ReadWriteIOUtils.readBool(byteBuffer);
pattern = compileRegex(parseLikePatternToRegex(patternString));
pattern = LikePattern.compile(patternString, getEscapeCharacter(Optional.of("\\")));
}

public String getPatternString() {
return patternString;
}

public Pattern getPattern() {
public LikePattern getPattern() {
return pattern;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
import java.nio.ByteBuffer;
import java.util.regex.Pattern;

import static org.apache.tsfile.utils.RegexUtils.compileRegex;

public class RegularExpression extends UnaryExpression {

private static final long INSTANCE_SIZE =
Expand All @@ -49,7 +47,7 @@ public RegularExpression(Expression expression, String patternString, boolean is
super(expression);
this.patternString = patternString;
this.isNot = isNot;
pattern = compileRegex(patternString);
pattern = Pattern.compile(patternString);
}

public RegularExpression(
Expand All @@ -64,7 +62,7 @@ public RegularExpression(ByteBuffer byteBuffer) {
super(Expression.deserialize(byteBuffer));
patternString = ReadWriteIOUtils.readString(byteBuffer);
isNot = ReadWriteIOUtils.readBool(byteBuffer);
pattern = compileRegex(Validate.notNull(patternString, "patternString cannot be null"));
pattern = Pattern.compile(Validate.notNull(patternString, "patternString cannot be null"));
}

public String getPatternString() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.ArithmeticNegationColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.InColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.IsNullColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LikeColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.LogicNotColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.column.unary.RegularColumnTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.udf.UDTFContext;
Expand Down Expand Up @@ -442,7 +443,7 @@ private ColumnTransformer getConcreteUnaryColumnTransformer(
return new ArithmeticNegationColumnTransformer(returnType, childColumnTransformer);
case LIKE:
LikeExpression likeExpression = (LikeExpression) expression;
return new RegularColumnTransformer(
return new LikeColumnTransformer(
returnType, childColumnTransformer, likeExpression.getPattern());
case REGEXP:
RegularExpression regularExpression = (RegularExpression) expression;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.ArithmeticNegationTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.InTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.IsNullTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.LikeTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.LogicNotTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.RegularTransformer;
import org.apache.iotdb.db.queryengine.transformation.dag.transformer.unary.TransparentTransformer;
Expand Down Expand Up @@ -311,7 +312,7 @@ private Transformer getConcreteUnaryTransformer(Expression expression, LayerRead
return new ArithmeticNegationTransformer(parentReader);
case LIKE:
LikeExpression likeExpression = (LikeExpression) expression;
return new RegularTransformer(parentReader, likeExpression.getPattern());
return new LikeTransformer(parentReader, likeExpression.getPattern());
case REGEXP:
RegularExpression regularExpression = (RegularExpression) expression;
return new RegularTransformer(parentReader, regularExpression.getPattern());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.iotdb.db.queryengine.plan.relational.type.InternalTypeManager;

import org.apache.tsfile.common.conf.TSFileConfig;
import org.apache.tsfile.common.regexp.LikePattern;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.read.common.type.Type;
import org.apache.tsfile.read.filter.basic.Filter;
Expand All @@ -58,6 +59,7 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

Expand All @@ -66,6 +68,7 @@
import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.ConvertPredicateToTimeFilterVisitor.isTimeColumn;
import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoScanChecker.isLiteral;
import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoScanChecker.isSymbolReference;
import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter;

public class ConvertPredicateToFilterVisitor
extends PredicateVisitor<Filter, ConvertPredicateToFilterVisitor.Context> {
Expand Down Expand Up @@ -208,10 +211,17 @@ protected Filter visitLikePredicate(LikePredicate node, Context context) {
SymbolReference operand = (SymbolReference) node.getValue();
checkArgument(context.isMeasurementColumn(operand));
int measurementIndex = context.getMeasurementIndex(operand.getName());
Expression pattern = node.getPattern();
Optional<String> escapeValueOpt =
node.getEscape().isPresent()
? Optional.ofNullable(((StringLiteral) node.getEscape().get()).getValue())
: Optional.empty();
Type type = context.getType(Symbol.from(operand));
TSDataType dataType = InternalTypeManager.getTSDataType(type);
return ValueFilterApi.like(measurementIndex, getStringValue(pattern), dataType);
return ValueFilterApi.like(
measurementIndex,
LikePattern.compile(
((StringLiteral) node.getPattern()).getValue(), getEscapeCharacter(escapeValueOpt)),
dataType);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;

import static com.google.common.base.Preconditions.checkArgument;
import static org.apache.iotdb.db.queryengine.plan.relational.analyzer.predicate.PredicatePushIntoScanChecker.isSymbolReference;
import static org.apache.tsfile.utils.RegexUtils.parseLikePatternToRegex;
import static org.apache.tsfile.common.regexp.LikePattern.getEscapeCharacter;

/**
* The {@link ConvertSchemaPredicateToFilterVisitor} will convert a predicate to {@link
Expand Down Expand Up @@ -116,7 +117,12 @@ protected SchemaFilter visitIsNotNullPredicate(
return null;
}
return wrapIdOrAttributeFilter(
new LikeFilter(parseLikePatternToRegex(((StringLiteral) node.getPattern()).getValue())),
new LikeFilter(
(((StringLiteral) node.getPattern()).getValue()),
node.getEscape().isPresent()
? getEscapeCharacter(
Optional.ofNullable(((StringLiteral) node.getEscape().get()).getValue()))
: Optional.empty()),
((SymbolReference) node.getValue()).getName(),
context);
}
Expand Down
Loading

0 comments on commit 97e5c35

Please sign in to comment.