From 1640dedb3b2a8d6e54ba7b22290d86d5984768be Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Tue, 3 Mar 2015 15:44:54 +0100 Subject: [PATCH] src: fix ucs-2 buffer encoding regression StringBytes::Write() did a plain memcpy() when is_extern is true but that's wrong when the source is a two-byte string and the destination a one-byte or UTF-8 string. The impact is limited to strings > 1,031,913 bytes because those are normally the only strings that are externalized, although the use of the 'externalize strings' extension (--expose_externalize_string) can also trigger it. This commit also cleans up the bytes versus characters confusion in StringBytes::Write() because that was closely intertwined with the UCS-2 encoding regression. One wasn't fixable without the other. Fixes: https://github.com/iojs/io.js/issues/1024 Fixes: https://github.com/joyent/node/issues/8683 PR-URL: https://github.com/iojs/io.js/pull/1042 Reviewed-By: Trevor Norris --- src/node_buffer.cc | 3 - src/string_bytes.cc | 72 +++++++++++----------- test/parallel/test-stringbytes-external.js | 13 ++++ 3 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 2d00ca97cc0f58..fa77c0779762a6 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -415,9 +415,6 @@ void StringWrite(const FunctionCallbackInfo& args) { if (max_length == 0) return args.GetReturnValue().Set(0); - if (encoding == UCS2) - max_length = max_length / 2; - if (offset >= obj_length) return env->ThrowRangeError("Offset is out of bounds"); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 1f5e592a32569b..4f896ace3fb693 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate, int* chars_written) { HandleScope scope(isolate); const char* data = nullptr; - size_t len = 0; - bool is_extern = GetExternalParts(isolate, val, &data, &len); - size_t extlen = len; + size_t nbytes = 0; + const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes); + const size_t external_nbytes = nbytes; CHECK(val->IsString() == true); Local str = val.As(); - len = len < buflen ? len : buflen; + + if (nbytes > buflen) + nbytes = buflen; int flags = String::HINT_MANY_WRITES_EXPECTED | String::NO_NULL_TERMINATION | @@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate, case ASCII: case BINARY: case BUFFER: - if (is_extern) - memcpy(buf, data, len); - else - len = str->WriteOneByte(reinterpret_cast(buf), - 0, - buflen, - flags); + if (is_extern && str->IsOneByte()) { + memcpy(buf, data, nbytes); + } else { + uint8_t* const dst = reinterpret_cast(buf); + nbytes = str->WriteOneByte(dst, 0, buflen, flags); + } if (chars_written != nullptr) - *chars_written = len; + *chars_written = nbytes; break; case UTF8: - if (is_extern) - // TODO(tjfontaine) should this validate invalid surrogate pairs as - // well? - memcpy(buf, data, len); - else - len = str->WriteUtf8(buf, buflen, chars_written, flags); + nbytes = str->WriteUtf8(buf, buflen, chars_written, flags); break; - case UCS2: - if (is_extern) - memcpy(buf, data, len); - else - len = str->Write(reinterpret_cast(buf), 0, buflen, flags); + case UCS2: { + uint16_t* const dst = reinterpret_cast(buf); + size_t nchars; + if (is_extern && !str->IsOneByte()) { + memcpy(buf, data, nbytes); + nchars = nbytes / sizeof(*dst); + } else { + nchars = buflen / sizeof(*dst); + nchars = str->Write(dst, 0, nchars, flags); + nbytes = nchars * sizeof(*dst); + } if (IsBigEndian()) { // Node's "ucs2" encoding wants LE character data stored in // the Buffer, so we need to reorder on BE platforms. See // http://nodejs.org/api/buffer.html regarding Node's "ucs2" // encoding specification - uint16_t* buf16 = reinterpret_cast(buf); - for (size_t i = 0; i < len; i++) { - buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8); - } + for (size_t i = 0; i < nchars; i++) + dst[i] = dst[i] << 8 | dst[i] >> 8; } if (chars_written != nullptr) - *chars_written = len; - len = len * sizeof(uint16_t); + *chars_written = nchars; break; + } case BASE64: if (is_extern) { - len = base64_decode(buf, buflen, data, extlen); + nbytes = base64_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = base64_decode(buf, buflen, *value, value.length()); + nbytes = base64_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len; + *chars_written = nbytes; } break; case HEX: if (is_extern) { - len = hex_decode(buf, buflen, data, extlen); + nbytes = hex_decode(buf, buflen, data, external_nbytes); } else { String::Value value(str); - len = hex_decode(buf, buflen, *value, value.length()); + nbytes = hex_decode(buf, buflen, *value, value.length()); } if (chars_written != nullptr) { - *chars_written = len * 2; + *chars_written = nbytes; } break; @@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate, break; } - return len; + return nbytes; } diff --git a/test/parallel/test-stringbytes-external.js b/test/parallel/test-stringbytes-external.js index e4e3f366a1bb59..5bc4c945e87705 100644 --- a/test/parallel/test-stringbytes-external.js +++ b/test/parallel/test-stringbytes-external.js @@ -106,3 +106,16 @@ var PRE_3OF4_APEX = Math.ceil((EXTERN_APEX / 4) * 3) - RADIOS; } } })(); + +// https://github.com/iojs/io.js/issues/1024 +(function() { + var a = Array(1 << 20).join('x'); + var b = Buffer(a, 'ucs2').toString('ucs2'); + var c = Buffer(b, 'utf8').toString('utf8'); + + assert.equal(a.length, b.length); + assert.equal(b.length, c.length); + + assert.equal(a, b); + assert.equal(b, c); +})();