Skip to content

Commit

Permalink
Merge branch '2.18' into 2.19
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Nov 15, 2024
2 parents 818647a + 88b4c94 commit 788eb90
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 14 deletions.
5 changes: 5 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,11 @@ Zhanghao (@zhangOranges)
* Contributed #1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding
(2.18.0)

Justin Gosselin (@jgosselin-accesso)
* Reported #1359: Non-surrogate characters being incorrectly combined when
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
(2.18.2)

Eduard Gomoliako (@Gems)
* Contributed #1356: Make `JsonGenerator::writeTypePrefix` method to not write a
`WRAPPER_ARRAY` when `typeIdDef.id == null`
Expand Down
4 changes: 4 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ a pure JSON library.

#1353: Use fastdoubleparser 1.0.90
(fixed by @pjfanning)
#1359: Non-surrogate characters being incorrectly combined when
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
(reported by Justin G)
(fixed by @pjfanning)

2.18.0 (26-Sep-2024)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ public class UTF8JsonGenerator
*/

// @since 2.10
@SuppressWarnings("deprecation")
public UTF8JsonGenerator(IOContext ctxt, int features, ObjectCodec codec,
OutputStream out, char quoteChar)
{
Expand All @@ -131,16 +130,15 @@ public UTF8JsonGenerator(IOContext ctxt, int features, ObjectCodec codec,
_outputBuffer = ctxt.allocWriteEncodingBuffer();
_outputEnd = _outputBuffer.length;

/* To be exact, each char can take up to 6 bytes when escaped (Unicode
* escape with backslash, 'u' and 4 hex digits); but to avoid fluctuation,
* we will actually round down to only do up to 1/8 number of chars
*/
// To be exact, each char can take up to 6 bytes when escaped (Unicode
// escape with backslash, 'u' and 4 hex digits); but to avoid fluctuation,
// we will actually round down to only do up to 1/8 number of chars
_outputMaxContiguous = _outputEnd >> 3;
_charBuffer = ctxt.allocConcatBuffer();
_charBufferLength = _charBuffer.length;

// By default we use this feature to determine additional quoting
if (isEnabled(Feature.ESCAPE_NON_ASCII)) {
if (isEnabled(JsonWriteFeature.ESCAPE_NON_ASCII.mappedFeature())) {
setHighestNonEscapedChar(127);
}
}
Expand Down Expand Up @@ -1511,7 +1509,7 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isSurrogateChar(ch)) {
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
Expand Down Expand Up @@ -1559,7 +1557,7 @@ private final void _writeStringSegment2(final String text, int offset, final int
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isSurrogateChar(ch)) {
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
Expand Down Expand Up @@ -2249,8 +2247,9 @@ private byte[] getHexBytes() {
}

// @since 2.18
private boolean _isSurrogateChar(int ch) {
return (ch & 0xD800) == 0xD800;
private static boolean _isStartOfSurrogatePair(final int ch) {
// In 0xD800 - 0xDBFF range?
return (ch & 0xFC00) == 0xD800;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
import static org.junit.jupiter.api.Assertions.assertEquals;

class ParserFeatureDefaultsTest
extends JUnit5TestBase
extends JUnit5TestBase
{
static class TestParser extends ParserMinimalBase
{
public TestParser() { super(null); }
public TestParser() {
super(StreamReadConstraints.defaults());
}

@Override
public JsonToken nextToken() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package com.fasterxml.jackson.core.json;
package com.fasterxml.jackson.core.write;

import java.io.ByteArrayOutputStream;
import java.io.StringWriter;
Expand All @@ -7,11 +7,13 @@
import org.junit.jupiter.api.Test;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.json.JsonWriteFeature;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

class Surrogate223Test extends JUnit5TestBase
class SurrogateWrite223Test extends JUnit5TestBase
{
private final JsonFactory DEFAULT_JSON_F = newStreamFactory();

Expand Down Expand Up @@ -90,4 +92,35 @@ void surrogatesCharBacked() throws Exception
assertToken(JsonToken.END_ARRAY, p.nextToken());
p.close();
}

//https://github.com/FasterXML/jackson-core/issues/1359
@Test
void checkNonSurrogates() throws Exception {
JsonFactory f = JsonFactory.builder()
.enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
.build();
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (JsonGenerator gen = f.createGenerator(out)) {
gen.writeStartObject();

// Inside the BMP, beyond surrogate block; 0xFF0C - full-width comma
gen.writeStringField("test_full_width", "foo" + new String(Character.toChars(0xFF0C)) + "bar");

// Inside the BMP, beyond surrogate block; 0xFE6A - small form percent
gen.writeStringField("test_small_form", "foo" + new String(Character.toChars(0xFE6A)) + "bar");

// Inside the BMP, before the surrogate block; 0x3042 - Hiragana A
gen.writeStringField("test_hiragana", "foo" + new String(Character.toChars(0x3042)) + "bar");

// Outside the BMP; 0x1F60A - emoji
gen.writeStringField("test_emoji", new String(Character.toChars(0x1F60A)));

gen.writeEndObject();
}
String json = out.toString("UTF-8");
assertTrue(json.contains("foo\uFF0Cbar"));
assertTrue(json.contains("foo\uFE6Abar"));
assertTrue(json.contains("foo\u3042bar"));
assertTrue(json.contains("\"test_emoji\":\"\uD83D\uDE0A\""));
}
}

0 comments on commit 788eb90

Please sign in to comment.