1313import com .fasterxml .jackson .core .JsonParser ;
1414import com .fasterxml .jackson .core .JsonToken ;
1515
16- import org .elasticsearch .common .Strings ;
1716import org .elasticsearch .core .CheckedConsumer ;
1817import org .elasticsearch .test .ESTestCase ;
19- import org .elasticsearch .xcontent .XContentBuilder ;
2018import org .elasticsearch .xcontent .XContentString ;
21- import org .elasticsearch .xcontent .json .JsonXContent ;
2219import org .hamcrest .Matchers ;
2320
2421import java .io .IOException ;
2522import java .nio .charset .StandardCharsets ;
23+ import java .util .Locale ;
2624
2725public class ESUTF8StreamJsonParserTests extends ESTestCase {
2826
@@ -45,11 +43,13 @@ public void testGetValueAsText() throws IOException {
4543 assertThat (parser .nextFieldName (), Matchers .equalTo ("foo" ));
4644 assertThat (parser .nextValue (), Matchers .equalTo (JsonToken .VALUE_STRING ));
4745
48- var textRef = parser .getValueAsText ().bytes ();
49- assertThat (textRef , Matchers .notNullValue ());
50- assertThat (textRef .offset (), Matchers .equalTo (9 ));
51- assertThat (textRef .offset () + textRef .length (), Matchers .equalTo (12 ));
52- assertTextRef (textRef , "bar" );
46+ var text = parser .getValueAsText ();
47+ assertThat (text , Matchers .notNullValue ());
48+
49+ var bytes = text .bytes ();
50+ assertThat (bytes .offset (), Matchers .equalTo (9 ));
51+ assertThat (bytes .offset () + bytes .length (), Matchers .equalTo (12 ));
52+ assertTextRef (bytes , "bar" );
5353
5454 assertThat (parser .getValueAsString (), Matchers .equalTo ("bar" ));
5555 assertThat (parser .getValueAsText (), Matchers .nullValue ());
@@ -62,17 +62,36 @@ public void testGetValueAsText() throws IOException {
6262 assertThat (parser .nextFieldName (), Matchers .equalTo ("foo" ));
6363 assertThat (parser .nextValue (), Matchers .equalTo (JsonToken .VALUE_STRING ));
6464
65+ var text = parser .getValueAsText ();
66+ assertThat (text , Matchers .notNullValue ());
67+ assertTextRef (text .bytes (), "bar\" baz\" " );
68+ });
69+
70+ testParseJson ("{\" foo\" : \" b\\ u00e5r\" }" , parser -> {
71+ assertThat (parser .nextToken (), Matchers .equalTo (JsonToken .START_OBJECT ));
72+ assertThat (parser .nextFieldName (), Matchers .equalTo ("foo" ));
73+ assertThat (parser .nextValue (), Matchers .equalTo (JsonToken .VALUE_STRING ));
74+
6575 assertThat (parser .getValueAsText (), Matchers .nullValue ());
66- assertThat (parser .getValueAsString (), Matchers .equalTo ("bar \" baz \" " ));
76+ assertThat (parser .getValueAsString (), Matchers .equalTo ("bår " ));
6777 });
6878
6979 testParseJson ("{\" foo\" : \" bår\" }" , parser -> {
7080 assertThat (parser .nextToken (), Matchers .equalTo (JsonToken .START_OBJECT ));
7181 assertThat (parser .nextFieldName (), Matchers .equalTo ("foo" ));
7282 assertThat (parser .nextValue (), Matchers .equalTo (JsonToken .VALUE_STRING ));
7383
74- assertThat (parser .getValueAsText (), Matchers .nullValue ());
84+ var text = parser .getValueAsText ();
85+ assertThat (text , Matchers .notNullValue ());
86+
87+ var bytes = text .bytes ();
88+ assertThat (bytes .offset (), Matchers .equalTo (9 ));
89+ assertThat (bytes .offset () + bytes .length (), Matchers .equalTo (13 ));
90+ assertTextRef (bytes , "bår" );
91+
7592 assertThat (parser .getValueAsString (), Matchers .equalTo ("bår" ));
93+
94+ assertThat (parser .nextToken (), Matchers .equalTo (JsonToken .END_OBJECT ));
7695 });
7796
7897 testParseJson ("{\" foo\" : [\" lorem\" , \" ipsum\" , \" dolor\" ]}" , parser -> {
@@ -112,43 +131,97 @@ public void testGetValueAsText() throws IOException {
112131 });
113132 }
114133
115- private boolean validForTextRef (String value ) {
116- for (char c : value .toCharArray ()) {
117- if (c == '"' ) {
118- return false ;
134+ private record TestInput (String input , String result , boolean supportsOptimized ) {}
135+
136+ private static final TestInput [] ESCAPE_SEQUENCES = {
137+ new TestInput ("\\ b" , "\b " , false ),
138+ new TestInput ("\\ t" , "\t " , false ),
139+ new TestInput ("\\ n" , "\n " , false ),
140+ new TestInput ("\\ f" , "\f " , false ),
141+ new TestInput ("\\ r" , "\r " , false ),
142+ new TestInput ("\\ \" " , "\" " , true ),
143+ new TestInput ("\\ /" , "/" , true ),
144+ new TestInput ("\\ \\ " , "\\ " , true ) };
145+
146+ private int randomCodepoint (boolean includeAscii ) {
147+ while (true ) {
148+ char val = Character .toChars (randomInt (0xFFFF ))[0 ];
149+ if (val <= 0x7f && includeAscii == false ) {
150+ continue ;
119151 }
120- if (c == '\\' ) {
121- return false ;
152+ if (val >= Character . MIN_SURROGATE && val <= Character . MAX_SURROGATE ) {
153+ continue ;
122154 }
123- if ((int ) c < 32 || (int ) c >= 128 ) {
124- return false ;
155+ return val ;
156+ }
157+ }
158+
159+ private TestInput buildRandomInput (int length ) {
160+ StringBuilder input = new StringBuilder (length );
161+ StringBuilder result = new StringBuilder (length );
162+ boolean forceSupportOptimized = randomBoolean ();
163+ boolean doesSupportOptimized = true ;
164+ for (int i = 0 ; i < length ; ++i ) {
165+ if (forceSupportOptimized == false && randomBoolean ()) {
166+ switch (randomInt (9 )) {
167+ case 0 -> {
168+ var escape = randomFrom (ESCAPE_SEQUENCES );
169+ input .append (escape .input ());
170+ result .append (escape .result ());
171+ doesSupportOptimized = doesSupportOptimized && escape .supportsOptimized ();
172+ }
173+ case 1 -> {
174+ int value = randomCodepoint (true );
175+ input .append (String .format (Locale .ENGLISH , "\\ u%04x" , value ));
176+ result .append (Character .toChars (value ));
177+ doesSupportOptimized = false ;
178+ }
179+ default -> {
180+ var value = Character .toChars (randomCodepoint (false ));
181+ input .append (value );
182+ result .append (value );
183+ }
184+ }
185+ } else {
186+ var value = randomAlphanumericOfLength (1 );
187+ input .append (value );
188+ result .append (value );
125189 }
126190 }
127- return true ;
191+ return new TestInput ( input . toString (), result . toString (), doesSupportOptimized ) ;
128192 }
129193
130194 public void testGetValueRandomized () throws IOException {
131- XContentBuilder jsonBuilder = JsonXContent .contentBuilder ().startObject ();
195+ StringBuilder inputBuilder = new StringBuilder ();
196+ inputBuilder .append ('{' );
197+
132198 final int numKeys = 128 ;
133199 String [] keys = new String [numKeys ];
134- String [] values = new String [numKeys ];
200+ TestInput [] inputs = new TestInput [numKeys ];
135201 for (int i = 0 ; i < numKeys ; i ++) {
136202 String currKey = randomAlphanumericOfLength (6 );
137- String currVal = randomUnicodeOfLengthBetween (0 , 512 );
138- jsonBuilder .field (currKey , currVal );
203+ var currVal = buildRandomInput (randomInt (512 ));
204+ inputBuilder .append ('"' );
205+ inputBuilder .append (currKey );
206+ inputBuilder .append ("\" :\" " );
207+ inputBuilder .append (currVal .input ());
208+ inputBuilder .append ('"' );
209+ if (i < numKeys - 1 ) {
210+ inputBuilder .append (',' );
211+ }
139212 keys [i ] = currKey ;
140- values [i ] = currVal ;
213+ inputs [i ] = currVal ;
141214 }
142215
143- jsonBuilder . endObject ( );
144- testParseJson (Strings .toString (jsonBuilder ), parser -> {
216+ inputBuilder . append ( '}' );
217+ testParseJson (inputBuilder .toString (), parser -> {
145218 assertThat (parser .nextToken (), Matchers .equalTo (JsonToken .START_OBJECT ));
146219 for (int i = 0 ; i < numKeys ; i ++) {
147220 assertThat (parser .nextFieldName (), Matchers .equalTo (keys [i ]));
148221 assertThat (parser .nextValue (), Matchers .equalTo (JsonToken .VALUE_STRING ));
149222
150- String currVal = values [i ];
151- if (validForTextRef ( currVal )) {
223+ String currVal = inputs [i ]. result () ;
224+ if (inputs [ i ]. supportsOptimized ( )) {
152225 assertTextRef (parser .getValueAsText ().bytes (), currVal );
153226 } else {
154227 assertThat (parser .getValueAsText (), Matchers .nullValue ());
0 commit comments