From 0dc68aa50eaf50f323e18e705963d90221d93a24 Mon Sep 17 00:00:00 2001 From: Brian Lopez Date: Mon, 6 Nov 2017 19:44:23 -0800 Subject: [PATCH 1/2] Fix parsing bad surrogate trailers If a valid surrogate character escape is found, but the following byte sequence isn't a valid unicode escape sequence, insert our replacement character '?' as we would any other place we saw invalid characters while unescaping. Fixes #176 --- ext/yajl/yajl_encode.c | 3 ++- spec/parsing/one_off_spec.rb | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ext/yajl/yajl_encode.c b/ext/yajl/yajl_encode.c index 8535c1b4..b1db84cf 100644 --- a/ext/yajl/yajl_encode.c +++ b/ext/yajl/yajl_encode.c @@ -189,7 +189,8 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, break; } default: - assert("this should never happen" == NULL); + unescaped = "?"; + break; } yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped)); beg = ++end; diff --git a/spec/parsing/one_off_spec.rb b/spec/parsing/one_off_spec.rb index 9bc6b324..3747d14d 100644 --- a/spec/parsing/one_off_spec.rb +++ b/spec/parsing/one_off_spec.rb @@ -2,6 +2,12 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb') describe "One-off JSON examples" do + it "should not blow up with a bad surrogate trailer" do + bad_json = "{\"e\":{\"\\uD800\\\\DC00\":\"a\"}}" + + Yajl::Parser.new.parse(bad_json) + end + it "should parse 23456789012E666 and return Infinity" do infinity = (1.0/0) silence_warnings do From 85701fc3254f4b06e67c13be79825bab71678e17 Mon Sep 17 00:00:00 2001 From: Brian Lopez Date: Mon, 6 Nov 2017 21:46:42 -0800 Subject: [PATCH 2/2] Don't advance our end pointer until we've checked we have enough buffer left and have peeked ahead to see that a unicode escape is approaching. Thanks @kivikakk for helping me track down the actual bug here! --- ext/yajl/yajl_encode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ext/yajl/yajl_encode.c b/ext/yajl/yajl_encode.c index b1db84cf..716ddded 100644 --- a/ext/yajl/yajl_encode.c +++ b/ext/yajl/yajl_encode.c @@ -162,8 +162,8 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, end+=3; /* check if this is a surrogate */ if ((codepoint & 0xFC00) == 0xD800) { - end++; - if (str[end] == '\\' && str[end + 1] == 'u') { + if (end + 2 < len && str[end + 1] == '\\' && str[end + 2] == 'u') { + end++; unsigned int surrogate = 0; hexToDigit(&surrogate, str + end + 2); codepoint = @@ -189,8 +189,7 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str, break; } default: - unescaped = "?"; - break; + assert("this should never happen" == NULL); } yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped)); beg = ++end;