1111import org .apache .commons .codec .net .PercentCodec ;
1212import org .apache .lucene .util .BytesRef ;
1313import org .elasticsearch .common .lucene .BytesRefs ;
14+ import org .elasticsearch .core .Tuple ;
1415import org .elasticsearch .xpack .esql .core .type .DataType ;
1516import org .elasticsearch .xpack .esql .expression .function .scalar .util .UrlCodecUtils ;
1617
2324import java .util .function .Supplier ;
2425
2526import static org .hamcrest .Matchers .equalTo ;
27+ import static org .hamcrest .Matchers .is ;
28+ import static org .hamcrest .Matchers .nullValue ;
2629
2730public abstract class AbstractUrlEncodeDecodeTestCase extends AbstractScalarFunctionTestCase {
2831
2932 private static final PercentCodec urlEncodeCodec ;
3033 private static final PercentCodec urlEncodeComponentCodec ;
3134
35+ public enum PercentCodecTestType {
36+ ENCODE ("UrlEncodeEvaluator[val=Attribute[channel=0]]" ),
37+ ENCODE_COMPONENT ("UrlEncodeComponentEvaluator[val=Attribute[channel=0]]" ),
38+ DECODE ("UrlDecodeEvaluator[val=Attribute[channel=0]]" );
39+
40+ public final String evaluatorToString ;
41+
42+ PercentCodecTestType (String evaluatorToString ) {
43+ this .evaluatorToString = evaluatorToString ;
44+ }
45+
46+ public PercentCodec getCodec () {
47+ return switch (this ) {
48+ case ENCODE -> urlEncodeCodec ;
49+ case ENCODE_COMPONENT -> urlEncodeComponentCodec ;
50+
51+ // Randomized decoder tests apply a random encoder to the input to make it decodable. Fixed bad cases for the decoder skip
52+ // this by design, in order to assert undecodable input is handled gracefully.
53+ case DECODE -> randomBoolean () ? urlEncodeCodec : urlEncodeComponentCodec ;
54+ };
55+ }
56+ }
57+
3258 static {
3359 // Both codecs percent-encode all characters in the input except for alphanumerics, '-', '.', '_', and '~'. The space character is a
3460 // special case, as it can be either percent-encoded or replaced with a '+'.
@@ -46,30 +72,20 @@ public abstract class AbstractUrlEncodeDecodeTestCase extends AbstractScalarFunc
4672
4773 private record RandomUrl (String plain , String encoded ) {}
4874
49- public static Iterable <Object []> createParameters (boolean isEncoderTest , String evaluatorToString ) {
75+ public static Iterable <Object []> createParameters (PercentCodecTestType codecTestType ) {
5076 List <TestCaseSupplier > suppliers = new ArrayList <>();
5177
52- // Pick a random encoder during decoder tests, for more coverage.
53- boolean useUrlComponentEncoder = isEncoderTest
54- ? evaluatorToString .equals ("UrlEncodeComponentEvaluator[val=Attribute[channel=0]]" )
55- : randomBoolean ();
56-
5778 for (DataType dataType : DataType .stringTypes ()) {
5879 // random URL tests
59- Supplier <TestCaseSupplier .TestCase > caseSupplier = () -> createTestCaseWithRandomUrl (
60- dataType ,
61- evaluatorToString ,
62- isEncoderTest ,
63- useUrlComponentEncoder
64- );
80+ Supplier <TestCaseSupplier .TestCase > caseSupplier = () -> createTestCaseWithRandomUrl (dataType , codecTestType );
6581 suppliers .add (new TestCaseSupplier (List .of (dataType ), caseSupplier ));
6682
6783 // random strings tests
6884 for (TestCaseSupplier .TypedDataSupplier supplier : TestCaseSupplier .stringCases (dataType )) {
6985 TestCaseSupplier testCaseSupplier = new TestCaseSupplier (
7086 supplier .name (),
7187 List .of (supplier .type ()),
72- () -> createTestCaseWithRandomString (dataType , evaluatorToString , isEncoderTest , useUrlComponentEncoder , supplier )
88+ () -> createTestCaseWithRandomString (dataType , codecTestType , supplier )
7389 );
7490 suppliers .add (testCaseSupplier );
7591 }
@@ -89,68 +105,94 @@ public static Iterable<Object[]> createParameters(boolean isEncoderTest, String
89105 new String (allAsciiChars (), StandardCharsets .UTF_8 ) };
90106
91107 for (String input : fixedInputs ) {
92- suppliers .add (createFixedTestCase (dataType , input , evaluatorToString , isEncoderTest , useUrlComponentEncoder ));
108+ suppliers .add (createFixedTestCase (dataType , input , codecTestType ));
109+ }
110+
111+ if (codecTestType == PercentCodecTestType .DECODE ) {
112+ // bad inputs for decoder tests aren't encoded first (as they wouldn't be bad then), but are expected to be handled
113+ // gracefully by the decoder.
114+
115+ List <Tuple <String , String >> tuples = List .of (
116+ // incomplete sequence
117+ Tuple .tuple ("%1" , "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Incomplete trailing escape (%) pattern" ),
118+
119+ // missing sequence
120+ Tuple .tuple ("%" , "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Incomplete trailing escape (%) pattern" ),
121+
122+ // invalid hex digits
123+ Tuple .tuple (
124+ "%xy" ,
125+ "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - not a hexadecimal digit: \" x\" = 120"
126+ ),
127+
128+ // valid and invalid sequences
129+ Tuple .tuple (
130+ "foo+bar%20qux%mn" ,
131+ "Line 1:1: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - Error at index 0 in: \" mn\" "
132+ )
133+ );
134+
135+ for (Tuple <String , String > t : tuples ) {
136+ String undecodableInput = t .v1 ();
137+ String expectedErrorMessage = t .v2 ();
138+ suppliers .add (createBadDecoderTestCase (dataType , undecodableInput , expectedErrorMessage ));
139+ }
93140 }
94141 }
95142
96143 return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors (false , suppliers );
97144
98145 }
99146
100- public static TestCaseSupplier .TestCase createTestCaseWithRandomUrl (
101- DataType dataType ,
102- String evaluatorToString ,
103- boolean isEncoderTest ,
104- boolean useUrlComponentEncoder
105- ) {
106- RandomUrl url = generateRandomUrl (useUrlComponentEncoder );
147+ public static TestCaseSupplier .TestCase createTestCaseWithRandomUrl (DataType dataType , PercentCodecTestType codecTestType ) {
148+ boolean isEncoderTest = (codecTestType != PercentCodecTestType .DECODE );
149+ RandomUrl url = generateRandomUrl (codecTestType );
107150 BytesRef input = new BytesRef (isEncoderTest ? url .plain () : url .encoded ());
108151 BytesRef output = new BytesRef (isEncoderTest ? url .encoded () : url .plain ());
109152 TestCaseSupplier .TypedData fieldTypedData = new TestCaseSupplier .TypedData (input , dataType , "string" );
110153
111- return new TestCaseSupplier .TestCase (List .of (fieldTypedData ), evaluatorToString , dataType , equalTo (output ));
154+ return new TestCaseSupplier .TestCase (List .of (fieldTypedData ), codecTestType . evaluatorToString , dataType , equalTo (output ));
112155 }
113156
114157 public static TestCaseSupplier .TestCase createTestCaseWithRandomString (
115158 DataType dataType ,
116- String evaluatorToString ,
117- boolean isEncoderTest ,
118- boolean useUrlComponentEncoder ,
159+ PercentCodecTestType codecTestType ,
119160 TestCaseSupplier .TypedDataSupplier supplier
120161 ) {
162+ boolean isEncoderTest = (codecTestType != PercentCodecTestType .DECODE );
121163 TestCaseSupplier .TypedData fieldTypedData = supplier .get ();
122164 String plain = BytesRefs .toBytesRef (fieldTypedData .data ()).utf8ToString ();
123- String encoded = encode (plain , useUrlComponentEncoder );
165+ String encoded = encode (plain , codecTestType );
124166 BytesRef input = new BytesRef (isEncoderTest ? plain : encoded );
125167 BytesRef output = new BytesRef (isEncoderTest ? encoded : plain );
126168
127169 return new TestCaseSupplier .TestCase (
128170 List .of (new TestCaseSupplier .TypedData (input , dataType , "string" )),
129- evaluatorToString ,
171+ codecTestType . evaluatorToString ,
130172 dataType ,
131173 equalTo (output )
132174 );
133175 }
134176
135- private static RandomUrl generateRandomUrl (boolean useUrlComponentEncoder ) {
177+ private static RandomUrl generateRandomUrl (PercentCodecTestType codecTestType ) {
136178 String protocol = randomFrom ("http://" , "https://" , "" );
137179 String domain = String .format (Locale .ROOT , "%s.com" , randomAlphaOfLengthBetween (3 , 10 ));
138180 String path = randomFrom ("" , "/" + randomAlphanumericOfLength (5 ) + "/" );
139181 String query = randomFrom ("" , "?" + randomAlphaOfLength (5 ) + "=" + randomAlphanumericOfLength (5 ));
140182 String space = " " ; // ensure the correct encoding for space (+ or %20)
141183
142184 String plain = String .format (Locale .ROOT , "%s%s%s%s%s" , protocol , domain , path , query , space );
143- String encoded = encode (plain , useUrlComponentEncoder );
185+ String encoded = encode (plain , codecTestType );
144186
145187 return new RandomUrl (plain , encoded );
146188 }
147189
148- private static String encode (String plain , boolean useUrlComponentEncoder ) {
190+ private static String encode (String plain , PercentCodecTestType codecTestType ) {
149191 byte [] plainBytes = plain .getBytes (StandardCharsets .UTF_8 );
150192 byte [] encoded = null ;
151193
152194 try {
153- encoded = useUrlComponentEncoder ? urlEncodeComponentCodec . encode ( plainBytes ) : urlEncodeCodec .encode (plainBytes );
195+ encoded = codecTestType . getCodec () .encode (plainBytes );
154196 } catch (EncoderException ex ) {
155197 // Checked exception isn't really thrown, but we must handle it given the signature of PercentCodec.encode().
156198 throw new RuntimeException (ex );
@@ -186,26 +228,35 @@ private static byte[] buildUnsafeBytes(final Set<Character> additionallySafe) {
186228 return bytes ;
187229 }
188230
189- private static TestCaseSupplier createFixedTestCase (
190- DataType dataType ,
191- String plain ,
192- String evaluatorToString ,
193- boolean isEncoderTest ,
194- boolean useUrlComponentEncoder
195- ) {
231+ private static TestCaseSupplier createFixedTestCase (DataType dataType , String plain , PercentCodecTestType codecTestType ) {
196232 return new TestCaseSupplier (List .of (dataType ), () -> {
197- String encoded = encode (plain , useUrlComponentEncoder );
198- String input = isEncoderTest ? plain : encoded ;
233+ boolean isEncoderTest = (codecTestType != PercentCodecTestType .DECODE );
234+ String encoded = encode (plain , codecTestType );
235+ String input = (isEncoderTest ) ? plain : encoded ;
199236 String output = isEncoderTest ? encoded : plain ;
237+
200238 return new TestCaseSupplier .TestCase (
201239 List .of (new TestCaseSupplier .TypedData (new BytesRef (input ), dataType , "string" )),
202- evaluatorToString ,
240+ codecTestType . evaluatorToString ,
203241 dataType ,
204242 equalTo (new BytesRef (output ))
205243 );
206244 });
207245 }
208246
247+ private static TestCaseSupplier createBadDecoderTestCase (DataType dataType , String undecodable , String exceptionMessage ) {
248+ return new TestCaseSupplier (
249+ List .of (dataType ),
250+ () -> new TestCaseSupplier .TestCase (
251+ List .of (new TestCaseSupplier .TypedData (new BytesRef (undecodable ), dataType , "string" )),
252+ PercentCodecTestType .DECODE .evaluatorToString ,
253+ dataType ,
254+ is (nullValue ())
255+ ).withWarning ("Line 1:1: evaluation of [source] failed, treating result as null. Only first 20 failures recorded." )
256+ .withWarning (exceptionMessage )
257+ );
258+ }
259+
209260 private static byte [] allAsciiChars () {
210261 byte [] bytes = new byte [Byte .MAX_VALUE + 1 ];
211262 for (int i = 0 ; i < bytes .length ; ++i ) {
0 commit comments