Skip to content

Commit

Permalink
fix issue with incorrect matches for some surrogate chars (#1360)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjfanning authored Nov 15, 2024
1 parent cc70bc2 commit 88b4c94
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 6 deletions.
5 changes: 5 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -456,3 +456,8 @@ Jared Stehler (@jaredstehler)
Zhanghao (@zhangOranges)
* Contributed #1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding
(2.18.0)

Justin Gosselin (@jgosselin-accesso)
* Reported #1359: Non-surrogate characters being incorrectly combined when
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
(2.18.2)
4 changes: 4 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ a pure JSON library.

#1353: Use fastdoubleparser 1.0.90
(fixed by @pjfanning)
#1359: Non-surrogate characters being incorrectly combined when
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
(reported by Justin G)
(fixed by @pjfanning)

2.18.0 (26-Sep-2024)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1509,7 +1509,7 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isSurrogateChar(ch)) {
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
Expand Down Expand Up @@ -1557,7 +1557,7 @@ private final void _writeStringSegment2(final String text, int offset, final int
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isSurrogateChar(ch)) {
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
Expand Down Expand Up @@ -2247,8 +2247,9 @@ private byte[] getHexBytes() {
}

// @since 2.18
private boolean _isSurrogateChar(int ch) {
return (ch & 0xD800) == 0xD800;
private static boolean _isStartOfSurrogatePair(final int ch) {
// In 0xD800 - 0xDBFF range?
return (ch & 0xFC00) == 0xD800;
}
}

Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
package com.fasterxml.jackson.core.json;
package com.fasterxml.jackson.core.write;

import java.io.ByteArrayOutputStream;
import java.io.StringWriter;
import java.io.Writer;

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.json.JsonWriteFeature;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

class Surrogate223Test extends JUnit5TestBase
class SurrogateWrite223Test extends JUnit5TestBase
{
private final JsonFactory DEFAULT_JSON_F = newStreamFactory();

Expand Down Expand Up @@ -90,4 +92,35 @@ void surrogatesCharBacked() throws Exception
assertToken(JsonToken.END_ARRAY, p.nextToken());
p.close();
}

//https://github.com/FasterXML/jackson-core/issues/1359
@Test
void checkNonSurrogates() throws Exception {
JsonFactory f = JsonFactory.builder()
.enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
.build();
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (JsonGenerator gen = f.createGenerator(out)) {
gen.writeStartObject();

// Inside the BMP, beyond surrogate block; 0xFF0C - full-width comma
gen.writeStringField("test_full_width", "foo" + new String(Character.toChars(0xFF0C)) + "bar");

// Inside the BMP, beyond surrogate block; 0xFE6A - small form percent
gen.writeStringField("test_small_form", "foo" + new String(Character.toChars(0xFE6A)) + "bar");

// Inside the BMP, before the surrogate block; 0x3042 - Hiragana A
gen.writeStringField("test_hiragana", "foo" + new String(Character.toChars(0x3042)) + "bar");

// Outside the BMP; 0x1F60A - emoji
gen.writeStringField("test_emoji", new String(Character.toChars(0x1F60A)));

gen.writeEndObject();
}
String json = out.toString("UTF-8");
assertTrue(json.contains("foo\uFF0Cbar"));
assertTrue(json.contains("foo\uFE6Abar"));
assertTrue(json.contains("foo\u3042bar"));
assertTrue(json.contains("\"test_emoji\":\"\uD83D\uDE0A\""));
}
}

0 comments on commit 88b4c94

Please sign in to comment.