Skip to content

Commit 88802bc

Browse files
committed
ESQL: URL encoding changes
- Add fixed test cases that fail decoding - Randomize the encoder used in decoding tests
1 parent 5d60753 commit 88802bc

File tree

5 files changed

+97
-46
lines changed

5 files changed

+97
-46
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2609,10 +2609,10 @@ Georgi | georgi
26092609
url_encode mixed input tests
26102610
required_capability: url_encode
26112611

2612-
ROW u = ["hello elastic!", "a+b-c%d", "", ".-_~", "!#$&'()*+,/:;=?@[]"] | EVAL u = URL_ENCODE(u);
2612+
ROW u = ["hello elastic!", "a+b-c%d", "", ".-_~", "!#$&'()*+,/:;=?@[]", "🔥💧"] | EVAL u = URL_ENCODE(u);
26132613

26142614
u:keyword
2615-
["hello+elastic%21", "a%2Bb-c%25d", "", ".-_~", "%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D"]
2615+
["hello+elastic%21", "a%2Bb-c%25d", "", ".-_~", "%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D", "%F0%9F%94%A5%F0%9F%92%A7"]
26162616
;
26172617

26182618
url_decode sample for docs

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractUrlEncodeDecodeTestCase.java

Lines changed: 92 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.apache.commons.codec.net.PercentCodec;
1212
import org.apache.lucene.util.BytesRef;
1313
import org.elasticsearch.common.lucene.BytesRefs;
14+
import org.elasticsearch.core.Tuple;
1415
import org.elasticsearch.xpack.esql.core.type.DataType;
1516
import org.elasticsearch.xpack.esql.expression.function.scalar.util.UrlCodecUtils;
1617

@@ -23,12 +24,37 @@
2324
import java.util.function.Supplier;
2425

2526
import static org.hamcrest.Matchers.equalTo;
27+
import static org.hamcrest.Matchers.is;
28+
import static org.hamcrest.Matchers.nullValue;
2629

2730
public abstract class AbstractUrlEncodeDecodeTestCase extends AbstractScalarFunctionTestCase {
2831

2932
private static final PercentCodec urlEncodeCodec;
3033
private static final PercentCodec urlEncodeComponentCodec;
3134

35+
public enum PercentCodecTestType {
36+
ENCODE("UrlEncodeEvaluator[val=Attribute[channel=0]]"),
37+
ENCODE_COMPONENT("UrlEncodeComponentEvaluator[val=Attribute[channel=0]]"),
38+
DECODE("UrlDecodeEvaluator[val=Attribute[channel=0]]");
39+
40+
public final String evaluatorToString;
41+
42+
PercentCodecTestType(String evaluatorToString) {
43+
this.evaluatorToString = evaluatorToString;
44+
}
45+
46+
public PercentCodec getCodec() {
47+
return switch (this) {
48+
case ENCODE -> urlEncodeCodec;
49+
case ENCODE_COMPONENT -> urlEncodeComponentCodec;
50+
51+
// Randomized decoder tests apply a random encoder to the input to make it decodable. Fixed bad cases for the decoder skip
52+
// this by design, in order to assert undecodable input is handled gracefully.
53+
case DECODE -> randomBoolean() ? urlEncodeCodec : urlEncodeComponentCodec;
54+
};
55+
}
56+
}
57+
3258
static {
3359
// Both codecs percent-encode all characters in the input except for alphanumerics, '-', '.', '_', and '~'. The space character is a
3460
// special case, as it can be either percent-encoded or replaced with a '+'.
@@ -46,30 +72,20 @@ public abstract class AbstractUrlEncodeDecodeTestCase extends AbstractScalarFunc
4672

4773
private record RandomUrl(String plain, String encoded) {}
4874

49-
public static Iterable<Object[]> createParameters(boolean isEncoderTest, String evaluatorToString) {
75+
public static Iterable<Object[]> createParameters(PercentCodecTestType codecTestType) {
5076
List<TestCaseSupplier> suppliers = new ArrayList<>();
5177

52-
// Pick a random encoder during decoder tests, for more coverage.
53-
boolean useUrlComponentEncoder = isEncoderTest
54-
? evaluatorToString.equals("UrlEncodeComponentEvaluator[val=Attribute[channel=0]]")
55-
: randomBoolean();
56-
5778
for (DataType dataType : DataType.stringTypes()) {
5879
// random URL tests
59-
Supplier<TestCaseSupplier.TestCase> caseSupplier = () -> createTestCaseWithRandomUrl(
60-
dataType,
61-
evaluatorToString,
62-
isEncoderTest,
63-
useUrlComponentEncoder
64-
);
80+
Supplier<TestCaseSupplier.TestCase> caseSupplier = () -> createTestCaseWithRandomUrl(dataType, codecTestType);
6581
suppliers.add(new TestCaseSupplier(List.of(dataType), caseSupplier));
6682

6783
// random strings tests
6884
for (TestCaseSupplier.TypedDataSupplier supplier : TestCaseSupplier.stringCases(dataType)) {
6985
TestCaseSupplier testCaseSupplier = new TestCaseSupplier(
7086
supplier.name(),
7187
List.of(supplier.type()),
72-
() -> createTestCaseWithRandomString(dataType, evaluatorToString, isEncoderTest, useUrlComponentEncoder, supplier)
88+
() -> createTestCaseWithRandomString(dataType, codecTestType, supplier)
7389
);
7490
suppliers.add(testCaseSupplier);
7591
}
@@ -89,68 +105,94 @@ public static Iterable<Object[]> createParameters(boolean isEncoderTest, String
89105
new String(allAsciiChars(), StandardCharsets.UTF_8) };
90106

91107
for (String input : fixedInputs) {
92-
suppliers.add(createFixedTestCase(dataType, input, evaluatorToString, isEncoderTest, useUrlComponentEncoder));
108+
suppliers.add(createFixedTestCase(dataType, input, codecTestType));
109+
}
110+
111+
if (codecTestType == PercentCodecTestType.DECODE) {
112+
// bad inputs for decoder tests aren't encoded first (as they wouldn't be bad then), but are expected to be handled
113+
// gracefully by the decoder.
114+
115+
List<Tuple<String, String>> tuples = List.of(
116+
// incomplete sequence
117+
Tuple.tuple("%1", "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Incomplete trailing escape (%) pattern"),
118+
119+
// missing sequence
120+
Tuple.tuple("%", "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Incomplete trailing escape (%) pattern"),
121+
122+
// invalid hex digits
123+
Tuple.tuple(
124+
"%xy",
125+
"Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - not a hexadecimal digit: \"x\" = 120"
126+
),
127+
128+
// valid and invalid sequences
129+
Tuple.tuple(
130+
"foo+bar%20qux%mn",
131+
"Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - Error at index 0 in: \"mn\""
132+
)
133+
);
134+
135+
for (Tuple<String, String> t : tuples) {
136+
String undecodableInput = t.v1();
137+
String expectedErrorMessage = t.v2();
138+
suppliers.add(createBadDecoderTestCase(dataType, undecodableInput, expectedErrorMessage));
139+
}
93140
}
94141
}
95142

96143
return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(false, suppliers);
97144

98145
}
99146

100-
public static TestCaseSupplier.TestCase createTestCaseWithRandomUrl(
101-
DataType dataType,
102-
String evaluatorToString,
103-
boolean isEncoderTest,
104-
boolean useUrlComponentEncoder
105-
) {
106-
RandomUrl url = generateRandomUrl(useUrlComponentEncoder);
147+
public static TestCaseSupplier.TestCase createTestCaseWithRandomUrl(DataType dataType, PercentCodecTestType codecTestType) {
148+
boolean isEncoderTest = (codecTestType != PercentCodecTestType.DECODE);
149+
RandomUrl url = generateRandomUrl(codecTestType);
107150
BytesRef input = new BytesRef(isEncoderTest ? url.plain() : url.encoded());
108151
BytesRef output = new BytesRef(isEncoderTest ? url.encoded() : url.plain());
109152
TestCaseSupplier.TypedData fieldTypedData = new TestCaseSupplier.TypedData(input, dataType, "string");
110153

111-
return new TestCaseSupplier.TestCase(List.of(fieldTypedData), evaluatorToString, dataType, equalTo(output));
154+
return new TestCaseSupplier.TestCase(List.of(fieldTypedData), codecTestType.evaluatorToString, dataType, equalTo(output));
112155
}
113156

114157
public static TestCaseSupplier.TestCase createTestCaseWithRandomString(
115158
DataType dataType,
116-
String evaluatorToString,
117-
boolean isEncoderTest,
118-
boolean useUrlComponentEncoder,
159+
PercentCodecTestType codecTestType,
119160
TestCaseSupplier.TypedDataSupplier supplier
120161
) {
162+
boolean isEncoderTest = (codecTestType != PercentCodecTestType.DECODE);
121163
TestCaseSupplier.TypedData fieldTypedData = supplier.get();
122164
String plain = BytesRefs.toBytesRef(fieldTypedData.data()).utf8ToString();
123-
String encoded = encode(plain, useUrlComponentEncoder);
165+
String encoded = encode(plain, codecTestType);
124166
BytesRef input = new BytesRef(isEncoderTest ? plain : encoded);
125167
BytesRef output = new BytesRef(isEncoderTest ? encoded : plain);
126168

127169
return new TestCaseSupplier.TestCase(
128170
List.of(new TestCaseSupplier.TypedData(input, dataType, "string")),
129-
evaluatorToString,
171+
codecTestType.evaluatorToString,
130172
dataType,
131173
equalTo(output)
132174
);
133175
}
134176

135-
private static RandomUrl generateRandomUrl(boolean useUrlComponentEncoder) {
177+
private static RandomUrl generateRandomUrl(PercentCodecTestType codecTestType) {
136178
String protocol = randomFrom("http://", "https://", "");
137179
String domain = String.format(Locale.ROOT, "%s.com", randomAlphaOfLengthBetween(3, 10));
138180
String path = randomFrom("", "/" + randomAlphanumericOfLength(5) + "/");
139181
String query = randomFrom("", "?" + randomAlphaOfLength(5) + "=" + randomAlphanumericOfLength(5));
140182
String space = " "; // ensure the correct encoding for space (+ or %20)
141183

142184
String plain = String.format(Locale.ROOT, "%s%s%s%s%s", protocol, domain, path, query, space);
143-
String encoded = encode(plain, useUrlComponentEncoder);
185+
String encoded = encode(plain, codecTestType);
144186

145187
return new RandomUrl(plain, encoded);
146188
}
147189

148-
private static String encode(String plain, boolean useUrlComponentEncoder) {
190+
private static String encode(String plain, PercentCodecTestType codecTestType) {
149191
byte[] plainBytes = plain.getBytes(StandardCharsets.UTF_8);
150192
byte[] encoded = null;
151193

152194
try {
153-
encoded = useUrlComponentEncoder ? urlEncodeComponentCodec.encode(plainBytes) : urlEncodeCodec.encode(plainBytes);
195+
encoded = codecTestType.getCodec().encode(plainBytes);
154196
} catch (EncoderException ex) {
155197
// Checked exception isn't really thrown, but we must handle it given the signature of PercentCodec.encode().
156198
throw new RuntimeException(ex);
@@ -186,26 +228,35 @@ private static byte[] buildUnsafeBytes(final Set<Character> additionallySafe) {
186228
return bytes;
187229
}
188230

189-
private static TestCaseSupplier createFixedTestCase(
190-
DataType dataType,
191-
String plain,
192-
String evaluatorToString,
193-
boolean isEncoderTest,
194-
boolean useUrlComponentEncoder
195-
) {
231+
private static TestCaseSupplier createFixedTestCase(DataType dataType, String plain, PercentCodecTestType codecTestType) {
196232
return new TestCaseSupplier(List.of(dataType), () -> {
197-
String encoded = encode(plain, useUrlComponentEncoder);
198-
String input = isEncoderTest ? plain : encoded;
233+
boolean isEncoderTest = (codecTestType != PercentCodecTestType.DECODE);
234+
String encoded = encode(plain, codecTestType);
235+
String input = (isEncoderTest) ? plain : encoded;
199236
String output = isEncoderTest ? encoded : plain;
237+
200238
return new TestCaseSupplier.TestCase(
201239
List.of(new TestCaseSupplier.TypedData(new BytesRef(input), dataType, "string")),
202-
evaluatorToString,
240+
codecTestType.evaluatorToString,
203241
dataType,
204242
equalTo(new BytesRef(output))
205243
);
206244
});
207245
}
208246

247+
private static TestCaseSupplier createBadDecoderTestCase(DataType dataType, String undecodable, String exceptionMessage) {
248+
return new TestCaseSupplier(
249+
List.of(dataType),
250+
() -> new TestCaseSupplier.TestCase(
251+
List.of(new TestCaseSupplier.TypedData(new BytesRef(undecodable), dataType, "string")),
252+
PercentCodecTestType.DECODE.evaluatorToString,
253+
dataType,
254+
is(nullValue())
255+
).withWarning("Line 1:1: evaluation of [source] failed, treating result as null. Only first 20 failures recorded.")
256+
.withWarning(exceptionMessage)
257+
);
258+
}
259+
209260
private static byte[] allAsciiChars() {
210261
byte[] bytes = new byte[Byte.MAX_VALUE + 1];
211262
for (int i = 0; i < bytes.length; ++i) {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/UrlDecodeTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public UrlDecodeTests(@Name("TestCase") Supplier<TestCaseSupplier.TestCase> test
2626

2727
@ParametersFactory
2828
public static Iterable<Object[]> parameters() {
29-
return createParameters(false, "UrlDecodeEvaluator[val=Attribute[channel=0]]");
29+
return createParameters(PercentCodecTestType.DECODE);
3030
}
3131

3232
@Override

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/UrlEncodeComponentTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public UrlEncodeComponentTests(@Name("TestCase") Supplier<TestCaseSupplier.TestC
2626

2727
@ParametersFactory
2828
public static Iterable<Object[]> parameters() {
29-
return createParameters(true, "UrlEncodeComponentEvaluator[val=Attribute[channel=0]]");
29+
return createParameters(PercentCodecTestType.ENCODE_COMPONENT);
3030
}
3131

3232
@Override

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/UrlEncodeTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public UrlEncodeTests(@Name("TestCase") Supplier<TestCaseSupplier.TestCase> test
2626

2727
@ParametersFactory
2828
public static Iterable<Object[]> parameters() {
29-
return createParameters(true, "UrlEncodeEvaluator[val=Attribute[channel=0]]");
29+
return createParameters(PercentCodecTestType.ENCODE);
3030
}
3131

3232
@Override

0 commit comments

Comments
 (0)