Skip to content

Commit fb653d4

Browse files
committed
lib: yyjson: sync latest changes from PR
Signed-off-by: Eduardo Silva <eduardo@chronosphere.io>
1 parent dd1db8f commit fb653d4

File tree

3 files changed

+195
-74
lines changed

3 files changed

+195
-74
lines changed

lib/yyjson-0.12.0/src/yyjson.c

Lines changed: 105 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -4658,7 +4658,8 @@ static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg,
46584658

46594659
/** Read unicode escape sequence. */
46604660
static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
4661-
const char **msg, yyjson_read_flag flg) {
4661+
const char **msg, yyjson_read_flag flg,
4662+
bool *unierr) {
46624663
#define return_err(_end, _msg) *msg = _msg; *src_ptr = _end; return false
46634664

46644665
u8 *src = *src_ptr;
@@ -4669,12 +4670,19 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
46694670
src += 2; /* skip `\u` */
46704671
if (unlikely(!hex_load_4(src, &hi))) {
46714672
if (has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1)) {
4672-
usize cnt = 0;
4673+
usize cnt = 0, i;
4674+
u8 ch;
46734675
while (cnt < 4 && char_is_hex(src[cnt])) cnt++;
4676+
ch = src[cnt];
4677+
dst[0] = '\\';
4678+
dst[1] = 'u';
4679+
for (i = 0; i < cnt; i++) dst[2 + i] = src[i];
4680+
dst += 2 + cnt;
46744681
src += cnt;
4675-
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
4682+
if (ch && ch != '"' && ch != '\'') src++;
46764683
*src_ptr = src;
46774684
*dst_ptr = dst;
4685+
if (unierr) *unierr = true;
46784686
return true;
46794687
}
46804688
return_err(src - 2, "invalid escaped sequence in string");
@@ -4699,6 +4707,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
46994707
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
47004708
*src_ptr = src;
47014709
*dst_ptr = dst;
4710+
if (unierr) *unierr = true;
47024711
return true;
47034712
}
47044713
if (has_allow(INVALID_SURROGATE)) {
@@ -4714,6 +4723,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47144723
}
47154724
*src_ptr = src;
47164725
*dst_ptr = dst;
4726+
if (unierr) *unierr = true;
47174727
return true;
47184728
}
47194729
return_err(src - 6, "no low surrogate in string");
@@ -4727,6 +4737,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47274737
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
47284738
*src_ptr = src;
47294739
*dst_ptr = dst;
4740+
if (unierr) *unierr = true;
47304741
return true;
47314742
}
47324743
if (has_allow(INVALID_SURROGATE)) {
@@ -4742,6 +4753,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47424753
}
47434754
*src_ptr = src;
47444755
*dst_ptr = dst;
4756+
if (unierr) *unierr = true;
47454757
return true;
47464758
}
47474759
return_err(src - 6, "invalid escape in string");
@@ -4752,6 +4764,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47524764
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
47534765
*src_ptr = src;
47544766
*dst_ptr = dst;
4767+
if (unierr) *unierr = true;
47554768
return true;
47564769
}
47574770
if (has_allow(INVALID_SURROGATE)) {
@@ -4767,6 +4780,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47674780
}
47684781
*src_ptr = src;
47694782
*dst_ptr = dst;
4783+
if (unierr) *unierr = true;
47704784
return true;
47714785
}
47724786
return_err(src - 6, "invalid low surrogate in string");
@@ -4783,6 +4797,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47834797
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
47844798
*src_ptr = src;
47854799
*dst_ptr = dst;
4800+
if (unierr) *unierr = true;
47864801
return true;
47874802
}
47884803
if (!has_allow(INVALID_SURROGATE)) {
@@ -4798,6 +4813,7 @@ static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr,
47984813
} else {
47994814
*dst++ = (u8)hi;
48004815
}
4816+
if (unierr) *unierr = true;
48014817
}
48024818
*src_ptr = src;
48034819
*dst_ptr = dst;
@@ -4839,6 +4855,7 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
48394855
u8 *src = hdr, *dst = NULL, *pos;
48404856
u16 hi, lo;
48414857
u32 uni, tmp;
4858+
bool unierr = false;
48424859

48434860
/* Resume incremental parsing. */
48444861
if (con && unlikely(con[0])) {
@@ -4901,7 +4918,8 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
49014918
gcc_store_barrier(*src);
49024919
if (likely(*src == quo)) {
49034920
val->tag = ((u64)(src - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR |
4904-
(quo == '"' ? YYJSON_SUBTYPE_NOESC : 0);
4921+
(unierr ? YYJSON_SUBTYPE_UNIERR :
4922+
(quo == '"' ? YYJSON_SUBTYPE_NOESC : 0));
49054923
val->uni.str = (const char *)hdr;
49064924
*src = '\0';
49074925
*end = src + 1;
@@ -4950,14 +4968,13 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
49504968
}
49514969
#endif
49524970
if (unlikely(pos == src)) {
4953-
if (has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1)) {
4954-
dst = src;
4955-
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
4956-
++src;
4957-
goto copy_utf8;
4958-
}
4959-
if (has_allow(INVALID_UNICODE)) ++src;
4960-
else return_err(src, "invalid UTF-8 encoding in string");
4971+
if (!has_allow(INVALID_UNICODE) &&
4972+
!has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1))
4973+
return_err(src, "invalid UTF-8 encoding in string");
4974+
dst = src;
4975+
*dst++ = *src++;
4976+
unierr = true;
4977+
goto copy_utf8;
49614978
}
49624979
goto skip_ascii;
49634980
}
@@ -4977,7 +4994,7 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
49774994
case 't': *dst++ = '\t'; src++; break;
49784995
case 'u':
49794996
src--;
4980-
if (!read_uni_esc(&src, &dst, msg, flg)) return_err(src, *msg);
4997+
if (!read_uni_esc(&src, &dst, msg, flg, &unierr)) return_err(src, *msg);
49814998
break;
49824999
default: {
49835000
if (has_allow(EXT_ESCAPE)) {
@@ -5029,24 +5046,21 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
50295046
}
50305047
}
50315048
} else if (likely(*src == quo)) {
5032-
val->tag = ((u64)(dst - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
5049+
val->tag = ((u64)(dst - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR |
5050+
(unierr ? YYJSON_SUBTYPE_UNIERR : 0);
50335051
val->uni.str = (const char *)hdr;
50345052
*dst = '\0';
50355053
*end = src + 1;
50365054
if (con) con[0] = con[1] = NULL;
50375055
return true;
50385056
} else {
5039-
if (has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1)) {
5040-
if (src >= eof) return_err(src, "unclosed string");
5041-
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
5042-
src++;
5043-
} else {
5044-
if (!has_allow(INVALID_UNICODE)) {
5045-
return_err(src, "unexpected control character in string");
5046-
}
5047-
if (src >= eof) return_err(src, "unclosed string");
5048-
*dst++ = *src++;
5057+
if (!has_allow(INVALID_UNICODE) &&
5058+
!has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1)) {
5059+
return_err(src, "unexpected control character in string");
50495060
}
5061+
if (src >= eof) return_err(src, "unclosed string");
5062+
*dst++ = *src++;
5063+
unierr = true;
50505064
}
50515065

50525066
copy_ascii:
@@ -5134,15 +5148,12 @@ static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
51345148
}
51355149
#endif
51365150
if (unlikely(pos == src)) {
5137-
if (has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1)) {
5138-
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
5139-
++src;
5140-
goto copy_utf8;
5141-
}
5142-
if (!has_allow(INVALID_UNICODE)) {
5151+
if (!has_allow(INVALID_UNICODE) &&
5152+
!has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1))
51435153
return_err(src, MSG_ERR_UTF8);
5144-
}
5145-
goto copy_ascii_stop_1;
5154+
*dst++ = *src++;
5155+
unierr = true;
5156+
goto copy_utf8;
51465157
}
51475158
goto copy_ascii;
51485159
}
@@ -5177,7 +5188,7 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
51775188

51785189
#define return_suc(_str_end, _cur_end) do { \
51795190
val->tag = ((u64)(_str_end - hdr) << YYJSON_TAG_BIT) | \
5180-
(u64)(YYJSON_TYPE_STR); \
5191+
(u64)(YYJSON_TYPE_STR | (unierr ? YYJSON_SUBTYPE_UNIERR : 0)); \
51815192
val->uni.str = (const char *)hdr; \
51825193
*pre = _str_end; *end = _cur_end; \
51835194
return true; \
@@ -5188,6 +5199,7 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
51885199
u8 *src = hdr, *dst = NULL;
51895200
u16 hi, lo;
51905201
u32 uni, tmp;
5202+
bool unierr = false;
51915203

51925204
/* add null-terminator for previous raw string */
51935205
**pre = '\0';
@@ -5232,9 +5244,14 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
52325244
src += 4;
52335245
} else {
52345246
#if !YYJSON_DISABLE_UTF8_VALIDATION
5235-
if (!has_allow(INVALID_UNICODE)) return_err(src, MSG_ERR_UTF8);
5247+
if (!has_allow(INVALID_UNICODE) &&
5248+
!has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1))
5249+
return_err(src, MSG_ERR_UTF8);
52365250
#endif
5237-
src += 1;
5251+
dst = src;
5252+
*dst++ = *src++;
5253+
unierr = true;
5254+
goto copy_utf8;
52385255
}
52395256
}
52405257
if (char_is_id_ascii(*src)) goto skip_ascii;
@@ -5243,7 +5260,7 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
52435260
dst = src;
52445261
copy_escape:
52455262
if (byte_match_2(src, "\\u")) {
5246-
if (!read_uni_esc(&src, &dst, msg, flg)) return_err(src, *msg);
5263+
if (!read_uni_esc(&src, &dst, msg, flg, &unierr)) return_err(src, *msg);
52475264
} else {
52485265
if (!char_is_id_next(*src)) return_suc(dst, src);
52495266
return_err(src, "unexpected character in key");
@@ -5299,13 +5316,8 @@ static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
52995316
!has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1))
53005317
return_err(src, MSG_ERR_UTF8);
53015318
#endif
5302-
if (has_rflag(flg, YYJSON_READ_REPLACE_INVALID_UNICODE, 1)) {
5303-
*dst++ = 0xEF; *dst++ = 0xBF; *dst++ = 0xBD;
5304-
src += 1;
5305-
} else {
5306-
*dst = *src;
5307-
dst += 1; src += 1;
5308-
}
5319+
*dst++ = *src++;
5320+
unierr = true;
53095321
}
53105322
}
53115323
if (char_is_id_ascii(*src)) goto copy_ascii;
@@ -9120,12 +9132,16 @@ static_inline u8 *yyjson_write_single(yyjson_val *val,
91209132
str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
91219133
check_str_len(str_len);
91229134
incr_len(str_len * 6 + 2 + end_len);
9123-
if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
9124-
cur = write_str_noesc(cur, str_ptr, str_len);
9125-
} else {
9126-
cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
9127-
if (unlikely(!cur)) goto fail_str;
9128-
}
9135+
do {
9136+
yyjson_subtype st = unsafe_yyjson_get_subtype(val);
9137+
if (likely(cpy) && st == YYJSON_SUBTYPE_NOESC) {
9138+
cur = write_str_noesc(cur, str_ptr, str_len);
9139+
} else {
9140+
bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR);
9141+
cur = write_str(cur, esc, inv2, str_ptr, str_len, enc_table);
9142+
if (unlikely(!cur)) goto fail_str;
9143+
}
9144+
} while (0);
91299145
break;
91309146

91319147
case YYJSON_TYPE_NUM:
@@ -9257,12 +9273,16 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root,
92579273
str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
92589274
check_str_len(str_len);
92599275
incr_len(str_len * 6 + 16);
9260-
if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
9261-
cur = write_str_noesc(cur, str_ptr, str_len);
9262-
} else {
9263-
cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
9264-
if (unlikely(!cur)) goto fail_str;
9265-
}
9276+
do {
9277+
yyjson_subtype st = unsafe_yyjson_get_subtype(val);
9278+
if (likely(cpy) && st == YYJSON_SUBTYPE_NOESC) {
9279+
cur = write_str_noesc(cur, str_ptr, str_len);
9280+
} else {
9281+
bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR);
9282+
cur = write_str(cur, esc, inv2, str_ptr, str_len, enc_table);
9283+
if (unlikely(!cur)) goto fail_str;
9284+
}
9285+
} while (0);
92669286
*cur++ = is_key ? ':' : ',';
92679287
goto val_end;
92689288
}
@@ -9443,12 +9463,16 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root,
94439463
check_str_len(str_len);
94449464
incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4));
94459465
cur = write_indent(cur, no_indent ? 0 : level, spaces);
9446-
if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
9447-
cur = write_str_noesc(cur, str_ptr, str_len);
9448-
} else {
9449-
cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
9450-
if (unlikely(!cur)) goto fail_str;
9451-
}
9466+
do {
9467+
yyjson_subtype st = unsafe_yyjson_get_subtype(val);
9468+
if (likely(cpy) && st == YYJSON_SUBTYPE_NOESC) {
9469+
cur = write_str_noesc(cur, str_ptr, str_len);
9470+
} else {
9471+
bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR);
9472+
cur = write_str(cur, esc, inv2, str_ptr, str_len, enc_table);
9473+
if (unlikely(!cur)) goto fail_str;
9474+
}
9475+
} while (0);
94529476
*cur++ = is_key ? ':' : ',';
94539477
*cur++ = is_key ? ' ' : '\n';
94549478
goto val_end;
@@ -9812,12 +9836,16 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root,
98129836
str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
98139837
check_str_len(str_len);
98149838
incr_len(str_len * 6 + 16);
9815-
if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
9816-
cur = write_str_noesc(cur, str_ptr, str_len);
9817-
} else {
9818-
cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
9819-
if (unlikely(!cur)) goto fail_str;
9820-
}
9839+
do {
9840+
yyjson_subtype st = unsafe_yyjson_get_subtype(val);
9841+
if (likely(cpy) && st == YYJSON_SUBTYPE_NOESC) {
9842+
cur = write_str_noesc(cur, str_ptr, str_len);
9843+
} else {
9844+
bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR);
9845+
cur = write_str(cur, esc, inv2, str_ptr, str_len, enc_table);
9846+
if (unlikely(!cur)) goto fail_str;
9847+
}
9848+
} while (0);
98219849
*cur++ = is_key ? ':' : ',';
98229850
goto val_end;
98239851
}
@@ -10004,12 +10032,16 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root,
1000410032
check_str_len(str_len);
1000510033
incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4));
1000610034
cur = write_indent(cur, no_indent ? 0 : level, spaces);
10007-
if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
10008-
cur = write_str_noesc(cur, str_ptr, str_len);
10009-
} else {
10010-
cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
10011-
if (unlikely(!cur)) goto fail_str;
10012-
}
10035+
do {
10036+
yyjson_subtype st = unsafe_yyjson_get_subtype(val);
10037+
if (likely(cpy) && st == YYJSON_SUBTYPE_NOESC) {
10038+
cur = write_str_noesc(cur, str_ptr, str_len);
10039+
} else {
10040+
bool inv2 = inv || (st == YYJSON_SUBTYPE_UNIERR);
10041+
cur = write_str(cur, esc, inv2, str_ptr, str_len, enc_table);
10042+
if (unlikely(!cur)) goto fail_str;
10043+
}
10044+
} while (0);
1001310045
*cur++ = is_key ? ':' : ',';
1001410046
*cur++ = is_key ? ' ' : '\n';
1001510047
goto val_end;

0 commit comments

Comments
 (0)