From 16632ff052c06a94aa74dd4a8ddcc84509652a56 Mon Sep 17 00:00:00 2001 From: Alexey Orlenko Date: Tue, 31 Jan 2017 01:41:11 +0200 Subject: [PATCH 1/2] src,build: improve the native module subsystem * Split `jsrs-impl.cc` into separate modules. * Make some refactoring. * Rename the native addon to `jstp` since there already is a function that is not a part of JSRS. * Fix `binding.gyp`: make `cflags` not ignored on macOS (as it appeared they used to be) and do not use `-O3` in Debug configuration. * Use a macro to throw V8 exceptions to avoid boilerplate code. Backport-of: https://github.com/metarhia/JSTP/pull/36 --- binding.gyp | 34 +- lib/record-serialization.js | 20 +- src/common.h | 11 + src/jsrs-impl.cc | 932 ------------------------------------ src/jsrs-impl.h | 192 -------- src/jsrs.h | 47 -- src/jsrs_parser.cc | 638 ++++++++++++++++++++++++ src/jsrs_parser.h | 127 +++++ src/jsrs_serializer.cc | 215 +++++++++ src/jsrs_serializer.h | 47 ++ src/node_bindings.cc | 88 ++++ src/packet_parser.cc | 65 +++ src/packet_parser.h | 23 + src/unicode_utils.cc | 110 +++++ src/unicode_utils.h | 31 ++ 15 files changed, 1394 insertions(+), 1186 deletions(-) create mode 100644 src/common.h delete mode 100644 src/jsrs-impl.cc delete mode 100644 src/jsrs-impl.h delete mode 100644 src/jsrs.h create mode 100644 src/jsrs_parser.cc create mode 100644 src/jsrs_parser.h create mode 100644 src/jsrs_serializer.cc create mode 100644 src/jsrs_serializer.h create mode 100644 src/node_bindings.cc create mode 100644 src/packet_parser.cc create mode 100644 src/packet_parser.h create mode 100644 src/unicode_utils.cc create mode 100644 src/unicode_utils.h diff --git a/binding.gyp b/binding.gyp index cee60b92..d4fd2190 100644 --- a/binding.gyp +++ b/binding.gyp @@ -1,9 +1,37 @@ { + 'variables': { + 'jstp_base_cflags': ['-Wall', '-Wextra', '-Wno-unused-parameter'], + 'jstp_debug_cflags': ['-g', '-O0'], + 'jstp_release_cflags': ['-O3'] + }, 'targets': [ { - 'target_name': 'jsrs', - 'cflags': ['-O3'], - 'sources': ['src/jsrs-impl.cc'] + 'target_name': 'jstp', + 'sources': [ + 'src/node_bindings.cc', + 'src/jsrs_serializer.cc', + 'src/jsrs_parser.cc', + 'src/packet_parser.cc', + 'src/unicode_utils.cc' + ], + 'configurations': { + 'Debug': { + 'cflags': ['<@(jstp_debug_cflags)'], + 'xcode_settings': { + 'OTHER_CFLAGS': ['<@(jstp_debug_cflags)'] + } + }, + 'Release': { + 'cflags': ['<@(jstp_release_cflags)'], + 'xcode_settings': { + 'OTHER_CFLAGS': ['<@(jstp_release_cflags)'] + } + } + }, + 'cflags': ['<@(jstp_base_cflags)'], + 'xcode_settings': { + 'OTHER_CFLAGS': ['<@(jstp_base_cflags)'] + } } ] } diff --git a/lib/record-serialization.js b/lib/record-serialization.js index 4139127f..5bdf1a63 100644 --- a/lib/record-serialization.js +++ b/lib/record-serialization.js @@ -1,5 +1,6 @@ 'use strict'; +var common = require('./common'); var serializerFactory = require('./serializer-factory'); var jsrs = {}; @@ -11,16 +12,16 @@ module.exports = jsrs; // one of our priorities to optimize it. var USE_NATIVE_SERIALIZER = false; -var jsrsNative; +var jstpNative; try { - jsrsNative = require('../build/Release/jsrs'); + jstpNative = require('../build/Release/jstp'); } catch (e) { try { - jsrsNative = require('../build/Debug/jsrs'); + jstpNative = require('../build/Debug/jstp'); } catch (e) { console.warn( - 'JSTP Record Serialization native module is not built. ' + + 'JSTP native addon is not built. ' + 'Run `npm install` in order to build it, otherwise you will get ' + 'poor server performance under load.' ); @@ -28,14 +29,9 @@ try { } } -if (jsrsNative) { - jsrs.parse = jsrsNative.parse; - - if (USE_NATIVE_SERIALIZER) { - jsrs.stringify = jsrsNative.stringify; - } else { +if (jstpNative) { + common.extend(jsrs, jstpNative); + if (!USE_NATIVE_SERIALIZER) { jsrs.stringify = serializerFactory.createSerializer(); } - - jsrs.parseNetworkPackets = jsrsNative.parseNetworkPackets; } diff --git a/src/common.h b/src/common.h new file mode 100644 index 00000000..ba422c39 --- /dev/null +++ b/src/common.h @@ -0,0 +1,11 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#ifndef SRC_COMMON_H_ +#define SRC_COMMON_H_ + +#define THROW_EXCEPTION(ex_type, ex_msg) \ + isolate->ThrowException(v8::Exception::ex_type( \ + v8::String::NewFromUtf8(isolate, ex_msg))) + +#endif // SRC_COMMON_H_ diff --git a/src/jsrs-impl.cc b/src/jsrs-impl.cc deleted file mode 100644 index 45f8315c..00000000 --- a/src/jsrs-impl.cc +++ /dev/null @@ -1,932 +0,0 @@ -// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is -// governed by the MIT license that can be found in the LICENSE file. - -#include "jsrs.h" -#include "jsrs-impl.h" - -#include -#include -#include -#include -#include - -#include - -namespace jstp { - -namespace jsrs { - -v8::Local Stringify(v8::Isolate* isolate, - v8::Local value) { - if (value->IsFunction()) { - return v8::Local(); - } - if (value->IsNumber() || - value->IsBoolean() || - value->IsUndefined() || - value->IsNull()) { - return value->ToString(isolate->GetCurrentContext()).ToLocalChecked(); - } else if (value->IsDate()) { - return serializer::StringifyDate(isolate, value.As()); - } else if (value->IsString()) { - return serializer::StringifyString(isolate, value.As()); - } else if (value->IsArray()) { - return serializer::StringifyArray(isolate, value.As()); - } else if (value->IsObject()) { - return serializer::StringifyObject(isolate, value.As()); - } else { - return v8::Local(); - } -} - -namespace serializer { - -v8::Local StringifyDate(v8::Isolate* isolate, - v8::Local date) { - v8::Local context = isolate->GetCurrentContext(); - v8::Local toISOString = date->Get(context, - v8::String::NewFromUtf8(isolate, "toISOString")).ToLocalChecked(); - v8::Local result = toISOString.As()->Call(context, - date, 0, nullptr).ToLocalChecked(); - v8::Local quotes = v8::String::NewFromUtf8(isolate, "\'"); - v8::Local res_str = result->ToString(); - res_str = v8::String::Concat(quotes, v8::String::Concat(res_str, quotes)); - return res_str->ToString(); -} - -v8::Local StringifyArray(v8::Isolate* isolate, - v8::Local array) { - v8::Local comma = - v8::String::NewFromUtf8(isolate, ","); - - v8::Local result = - v8::String::NewFromUtf8(isolate, "["); - - uint32_t length = array->Length(); - - for (uint32_t index = 0; index < length; index++) { - v8::Local value = array->Get(index); - if (!value->IsUndefined()) { - v8::Local chunk = jstp::jsrs::Stringify(isolate, value); - if (chunk.IsEmpty()) continue; - result = v8::String::Concat(result, chunk); - } - if (index != length - 1) { - result = v8::String::Concat(result, comma); - } - } - - result = v8::String::Concat(result, - v8::String::NewFromUtf8(isolate, "]")); - - return result; -} - -v8::Local StringifyString(v8::Isolate* isolate, - v8::Local string) { - uint32_t length = string->Length(); - std::vector result_str; - result_str.reserve((length + 1) * 2); - result_str.push_back('\''); - v8::String::Utf8Value utf8string(string); - const char* c_string = *utf8string; - for (uint32_t i = 0; i < length; i++) { - std::size_t offset; - const char* ch = GetEscapedControlChar(c_string[i], &offset); - if (ch) { - for (std::size_t k = 0; k < offset; k++) { - result_str.push_back(ch[k]); - } - } else { - result_str.push_back(c_string[i]); - } - } - - result_str.push_back('\''); - - return v8::String::NewFromUtf8(isolate, result_str.data(), - v8::NewStringType::kNormal, - static_cast(result_str.size())).ToLocalChecked(); -} - -const char* GetEscapedControlChar(char str, std::size_t* size) { - constexpr static const char* control_chars[0x20] = { - "\\u0000", "\\u0001", "\\u0002", - "\\u0003", "\\u0004", "\\u0005", - "\\u0006", "\\u0007", "\\u0008", - "\\u0009", "\\u000a", "\\u000b", - "\\u000c", "\\u000d", "\\u000e", - "\\u000f", "\\u0010", "\\u0011", - "\\u0012", "\\u0013", "\\u0014", - "\\u0015", "\\u0016", "\\u0017", - "\\u0018", "\\u0019", "\\u001a", - "\\u001b", "\\u001c", "\\u001d", - "\\u001e", "\\u001f" - }; - - *size = 2; - - switch (str) { - case '\b': return "\\b"; - case '\f': return "\\f"; - case '\n': return "\\n"; - case '\r': return "\\r"; - case '\t': return "\\t"; - case '\v': return "\\v"; - case '\\': return "\\\\"; - case '\'': return "\\'"; - case 0x7F: return "\\u007f"; - default: - if (str < 0x20) { - *size = 6; - return control_chars[static_cast(str)]; - } else { - return nullptr; - } - } -} - -v8::Local StringifyObject(v8::Isolate* isolate, - v8::Local object) { - v8::Local comma = v8::String::NewFromUtf8(isolate, ","); - v8::Local colon = v8::String::NewFromUtf8(isolate, ":"); - - v8::Local result = v8::String::NewFromUtf8(isolate, "{"); - - v8::Local context = isolate->GetCurrentContext(); - - v8::Local keys = object->GetOwnPropertyNames(context) - .ToLocalChecked(); - v8::Local chunk; - bool first_defined = true; - for (uint32_t i = 0; i < keys->Length(); i++) { - v8::Local key = keys->Get(context, i).ToLocalChecked(); - v8::Local value = object->Get(context, key).ToLocalChecked(); - chunk = jstp::jsrs::Stringify(isolate, value); - if (!value->IsUndefined() && !chunk.IsEmpty()) { - if (i != 0 && first_defined) { - result = v8::String::Concat(result, comma); - } - first_defined = true; - result = v8::String::Concat(result, - StringifyKey(isolate, key->ToString())); - result = v8::String::Concat(result, colon); - result = v8::String::Concat(result, chunk); - } else { - if (i == 0) first_defined = false; - } - } - result = v8::String::Concat(result, v8::String::NewFromUtf8(isolate, "}")); - return result; -} - -v8::Local StringifyKey(v8::Isolate* isolate, - v8::Local key) { - v8::String::Utf8Value key_str(key.As()); - if (!IsValidKey(isolate, key_str)) { - return StringifyString(isolate, key); - } - return key; -} - -bool IsValidKey(v8::Isolate* isolate, const v8::String::Utf8Value& key) { - bool result = true; - const char* key_str = *key; - for (int i = 0; i < key.length(); i++) { - if (key_str[i] == '_') continue; - if ((i == 0 && !isalpha(key_str[i])) || !isalnum(key_str[i])) { - result = false; - break; - } - } - return result; -} - -} // namespace serializer - -v8::Local Parse(v8::Isolate* isolate, - const v8::String::Utf8Value& in) { - std::size_t size; - const char* to_parse = deserializer::PrepareString(isolate, *in, in.length(), - &size); - if (!to_parse) { - return v8::Undefined(isolate); - } - - deserializer::Type type; - if (!deserializer::GetType(to_parse, to_parse + size, &type)) { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Invalid type"))); - return v8::Undefined(isolate); - } - - std::size_t parsed_size = 0; - v8::Local result = - (deserializer::kParseFunctions[type])(isolate, to_parse, - to_parse + size, &parsed_size); - if (size != parsed_size) { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, "Invalid format"))); - return v8::Undefined(isolate); - } - - delete []to_parse; - return result; -} - -v8::Local ParseNetworkPackets(v8::Isolate* isolate, - const v8::String::Utf8Value& in, v8::Local out) { - std::size_t total_size = 0; - std::size_t parsed_size = 0; - const char* source = deserializer::PrepareString(isolate, *in, in.length(), - &total_size); - const char* curr_chunk = source; - int index = 0; - - while (parsed_size < total_size) { - auto chunk_size = strlen(curr_chunk); - parsed_size += chunk_size + 1; - - if (parsed_size <= total_size) { - std::size_t parsed_chunk_size = 0; - auto result = deserializer::ParseObject(isolate, curr_chunk, - curr_chunk + chunk_size, &parsed_chunk_size); - - if (parsed_chunk_size != chunk_size) { - delete []source; - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, "Invalid format"))); - return v8::String::Empty(isolate); - } - - out->Set(index++, result); - curr_chunk += chunk_size + 1; - } - } - - auto rest = v8::String::NewFromUtf8(isolate, curr_chunk); - delete []source; - return rest; -} - -namespace deserializer { - -bool IsLineTerminatorSequence(const char* str, std::size_t* size) { - if (str[0] == '\x0D' && str[1] == '\x0A') { - *size = 2; - return true; - } else if (str[0] == '\x0D' || str[0] == '\x0A') { - *size = 1; - return true; - } else if (str[0] == '\xE2' && - str[1] == '\x80' && - (str[2] == '\xA8' || - str[2] == '\xA9')) { - *size = 3; - return true; - } - return false; -} - -bool IsWhiteSpaceCharacter(const char* str, std::size_t* size) { - if (str[0] == '\x09' || - str[0] == '\x0B' || - str[0] == '\x0C' || - str[0] == '\x20' || - str[0] == '\xA0') { - *size = 1; - return true; - } else if (str[0] == '\xC2' && str[1] == '\xA0') { - *size = 2; - return true; - } else { - bool is_multibyte_space = false; - switch (str[0]) { - case '\xE1': - if (str[1] == '\xBB' && str[2] == '\xBF') { - is_multibyte_space = true; - } - break; - case '\xE2': - if ((str[1] == '\x80' && - ((static_cast(str[2]) & 0x7F) <= 0xA || - str[2] == '\xAF')) || - (str[1] == '\x81' && str[2] == '\x9F')) { - is_multibyte_space = true; - } - break; - case '\xE3': - if (str[1] == '\x80' && str[2] == '\x80') { - is_multibyte_space = true; - } - break; - case '\xEF': - if (str[1] == '\xBB' && str[2] == '\xBF') { - is_multibyte_space = true; - } - break; - } - if (is_multibyte_space) { - *size = 3; - return true; - } - } - return false; -} - -const char* PrepareString(v8::Isolate* isolate, const char* str, - std::size_t length, std::size_t* new_length) { - char* result = new char[length + 1]; - bool string_mode = false; - enum { kDisabled = 0, kOneline, kMultiline } comment_mode = kDisabled; - std::size_t j = 0; - std::size_t size = 0; - - for (std::size_t i = 0; i < length; i++) { - if ((comment_mode == kDisabled) && - (str[i] == '\"' || str[i] == '\'') && - (i == 0 || str[i - 1] != '\\')) { - string_mode = !string_mode; - } - - if (!string_mode) { - if (comment_mode == kDisabled && str[i] == '/') { - switch (str[i + 1]) { - case '/': - comment_mode = kOneline; - break; - case '*': - comment_mode = kMultiline; - break; - } - } - - if (comment_mode == kDisabled) { - if (IsWhiteSpaceCharacter(str + i, &size) || - IsLineTerminatorSequence(str + i, &size)) { - i += size - 1; - } else { - result[j++] = str[i]; - } - } - - if ((comment_mode == kOneline && IsLineTerminatorSequence(str + i, &size)) - || (comment_mode == kMultiline && str[i - 1] == '*' && str[i] == '/')) { - comment_mode = kDisabled; - } - } else if (str[i] == '\\' && IsLineTerminatorSequence(str + i + 1, &size)) { - i += size; - } else if (IsLineTerminatorSequence(str + i, &size)) { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, "Unexpected line end in string"))); - delete []result; - return nullptr; - } else { - result[j++] = str[i]; - } - } - - result[j] = '\0'; - *new_length = j; - - return result; -} - -v8::Local ParseUndefined(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - if (*begin == ',' || *begin == ']') { - *size = 0; - } else if (*begin == 'u') { - *size = 9; - } else { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Invalid format of undefined value"))); - } - return v8::Undefined(isolate); -} - -v8::Local ParseNull(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - *size = 4; - return v8::Null(isolate); -} - -v8::Local ParseBool(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - v8::Local result; - if (begin + 4 <= end && strncmp(begin, "true", 4) == 0) { - result = v8::True(isolate); - *size = 4; - } else if (begin + 5 <= end && strncmp(begin, "false", 5) == 0) { - result = v8::False(isolate); - *size = 5; - } else { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, - "Invalid format: expected boolean"))); - result = v8::Undefined(isolate); - } - return result; -} - -v8::Local ParseNumber(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - bool negate_result = false; - const char* number_start = begin; - - if (*begin == '+' || *begin == '-') { - negate_result = *begin == '-'; - number_start++; - } - - int base = 10; - - if (*number_start == '0') { - number_start++; - - if (IsOctalDigit(*number_start)) { - return isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, "Use new octal literal syntax"))); - } else if (*number_start == 'b') { - base = 2; - number_start++; - } else if (*number_start == 'o') { - base = 8; - number_start++; - } else if (*number_start == 'x') { - base = 16; - number_start++; - } else { - number_start--; - } - } - - if (base == 10) { - return ParseDecimalNumber(isolate, begin, end, size); - } else { - auto value = ParseIntegerNumber(isolate, number_start, end, size, - base, negate_result); - std::size_t offset = static_cast(number_start - begin); - *size += offset; - return value; - } -} - -v8::Local ParseDecimalNumber(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - v8::Local result = v8::Number::New(isolate, std::atof(begin)); - *size = end - begin; - std::size_t i = 0; - while (begin[i] != ',' && begin[i] != '}' && begin[i] != ']' && - i < *size) i++; - *size = i; - return result; -} - -v8::Local ParseIntegerNumber(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size, - int base, bool negate_result) { - char* number_end; - int32_t value = std::strtol(begin, &number_end, base); - if (negate_result) { - value = -value; - } - - *size = static_cast(number_end - begin); - - return v8::Integer::New(isolate, value); -} - -v8::Local ParseString(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - *size = end - begin; - char* result = new char[*size + 1]; - std::memset(result, 0, *size + 1); - enum { kApostrophe = 0, kQMarks} string_mode = - (*begin == '\'') ? kApostrophe : kQMarks; - bool is_ended = false; - std::size_t res_index = 0; - std::size_t out_offset, in_offset; - for (std::size_t i = 1; i < *size; i++) { - if ((string_mode == kQMarks && begin[i] == '\"') || - (string_mode == kApostrophe && begin[i] == '\'')) { - is_ended = true; - *size = i + 1; - result[res_index] = '\0'; - break; - } - if (begin[i] == '\\') { - char* symb = GetControlChar(isolate, begin + ++i, &out_offset, - &in_offset); - if (!symb) { - return v8::String::Empty(isolate); - } - std::strncpy(result + res_index, symb, out_offset); - delete []symb; - i += in_offset - 1; - res_index += out_offset; - } else { - result[res_index++] = begin[i]; - } - } - if (!is_ended) { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, "Error while parsing string"))); - return v8::String::Empty(isolate); - } - v8::Local result_str = v8::String::NewFromUtf8(isolate, result, - v8::NewStringType::kNormal, static_cast(res_index)).ToLocalChecked(); - delete []result; - return result_str; -} - -char* CodePointsToUtf8(unsigned int c, std::size_t* size) { - char* result = new char[4]; - char* b = result; - if (c < 0x80) { - *b++ = c; - *size = 1; - } else if (c < 0x800) { - *b++ = 192 + c / 64; - *b++ = 128 + c % 64; - *size = 2; - } else if (c - 0xd800u < 0x800) { - delete []result; - return CodePointsToUtf8(0xFFFD, size); - } else if (c < 0x10000) { - *b++ = 224 + c / 4096; - *b++ = 128 + c / 64 % 64; - *b++ = 128 + c % 64; - *size = 3; - } else if (c < 0x110000) { - *b++ = 240 + c / 262144; - *b++ = 128 + c / 4096 % 64; - *b++ = 128 + c / 64 % 64; - *b++ = 128 + c % 64; - *size = 4; - } else { - delete []result; - return CodePointsToUtf8(0xFFFD, size); - } - return result; -} - -unsigned int ReadHexNumber(const char* str, std::size_t len, bool* ok) { - char t[6]; - char* end; - std::strncpy(t, str, len); - t[len] = '\0'; - unsigned int result = std::strtol(t, &end, 16); - if (end - t != static_cast(len)) { - *ok = false; - } else { - *ok = true; - } - return result; -} - -char* GetControlChar(v8::Isolate* isolate, const char* str, - std::size_t* res_len, std::size_t* size) { - char* result = new char[5]; - *size = 1; - *res_len = 1; - bool ok; - switch (str[0]) { - case 'b': *result = '\b'; break; - case 'f': *result = '\f'; break; - case 'n': *result = '\n'; break; - case 'r': *result = '\r'; break; - case 't': *result = '\t'; break; - case 'v': *result = '\v'; break; - case '0': *result = '\0'; break; - case 'x': { - *result = ReadHexNumber(str + 1, 2, &ok); - if (!ok) { - delete []result; - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Invalid hexadecimal escape sequence"))); - return nullptr; - } - *size = 3; - break; - } - case 'u': { - unsigned int symb_code; - if (isxdigit(str[1])) { - symb_code = ReadHexNumber(str + 1, 4, &ok); - *size = 5; - } else if (str[1] == '{') { - std::size_t hex_size; // maximal hex is 10FFFF - for (hex_size = 1; str[hex_size + 2] != '}' && hex_size <= 6; - hex_size++) { - if (str[hex_size + 2] == '\0') { - delete []result; - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Invalid Unicode code point escape"))); - return nullptr; - } - } - symb_code = ReadHexNumber(str + 2, hex_size, &ok); - *size = hex_size + 3; - } else { - ok = false; - } - if (!ok) { - delete []result; - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Invalid Unicode escape sequence"))); - return nullptr; - } - char* unicode_symbol = CodePointsToUtf8(symb_code, res_len); - delete []result; - return unicode_symbol; - } - default: - *result = str[0]; - } - - return result; -} - -v8::Local ParseKeyInObject(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - *size = end - begin; - v8::Local result; - if (begin[0] == '\'' || begin[0] == '"') { - Type current_type; - bool valid = GetType(begin, end, ¤t_type); - if (valid && current_type == Type::kString) { - std::size_t offset; - result = ParseString(isolate, begin, end, - &offset).As(); - *size = offset; - return result; - } else { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Invalid format in object: key is invalid string"))); - return v8::Local(); - } - } else { - std::size_t current_length = 0; - for (std::size_t i = 0; i < *size; i++) { - if (begin[i] == ':') { - if (current_length != 0) { - result = v8::String::NewFromUtf8(isolate, begin, - v8::NewStringType::kInternalized, - static_cast(current_length)) - .ToLocalChecked(); - break; - } else { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Unexpected token :"))); - return v8::Local(); - } - } else if (begin[i] == '_' || - (i != 0 ? isalnum(begin[i]) : isalpha(begin[i]))) { - current_length++; - } else { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Invalid format in object: key has invalid type"))); - return v8::Local(); - } - } - *size = current_length; - return result; - } -} - -v8::Local ParseValueInObject(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - v8::Local value; - Type current_type; - bool valid = GetType(begin, end, ¤t_type); - if (valid) { - value = (kParseFunctions[current_type])(isolate, begin, end, size); - return value; - } else { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Invalid type in object"))); - return v8::Object::New(isolate); - } -} - -v8::Local ParseObject(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - bool key_mode = true; - *size = end - begin; - v8::Local current_key; - v8::Local current_value; - std::size_t current_length = 0; - v8::Local result = v8::Object::New(isolate); - - for (std::size_t i = 1; i < *size; i++) { - if (key_mode) { - if (begin[i] == '}') { - if (begin[i - 1] != ',') { // In case of empty object - *size = 2; - } else { // In case of trailing comma - *size = i + 1; - } - break; - } - current_key = ParseKeyInObject(isolate, begin + i, end, - ¤t_length); - i += current_length; - } else { - current_value = ParseValueInObject(isolate, begin + i, end, - ¤t_length); - if (!current_value->IsUndefined()) { - v8::Maybe is_ok = result->Set(isolate->GetCurrentContext(), - current_key, current_value); - if (is_ok.IsNothing()) { - isolate->ThrowException( - v8::Exception::Error(v8::String::NewFromUtf8(isolate, - "Cannot add property to object"))); - } - } - i += current_length; - if (begin[i] != ',' && begin[i] != '}') { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Invalid format in object"))); - return v8::Object::New(isolate); - } else if (begin[i] == '}') { - *size = i + 1; - break; - } - } - key_mode = !key_mode; - } - return result; -} - -bool GetType(const char* begin, const char* end, Type* type) { - bool result = true; - switch (*begin) { - case ',': - case ']': - *type = Type::kUndefined; - break; - case '{': - *type = Type::kObject; - break; - case '[': - *type = Type::kArray; - break; - case '\"': - case '\'': - *type = Type::kString; - break; - case 't': - case 'f': - *type = Type::kBool; - break; - case 'n': - *type = Type::kNull; - if (begin + 4 <= end) { - result = (std::strncmp(begin, "null", 4) == 0); - } - break; - case 'u': - *type = Type::kUndefined; - if (begin + 9 <= end) { - result = (std::strncmp(begin, "undefined", 9) == 0); - } - break; - default: - result = false; - if (isdigit(*begin) || *begin == '.' || *begin == '+' || *begin == '-') { - *type = Type::kNumber; - result = true; - } - } - return result; -} - -v8::Local ParseArray(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size) { - Type current_type; - v8::Local array = v8::Array::New(isolate); - std::size_t current_length = 0; - *size = end - begin; - if (*begin == '[' && *(begin + 1) == ']') { // In case of empty array - *size = 2; - return array; - } - v8::Local t; - std::size_t current_element = 0; - for (std::size_t i = 1; i < *size; i++) { - bool valid = GetType(begin + i, end, ¤t_type); - if (valid) { - t = kParseFunctions[current_type](isolate, - begin + i, end, ¤t_length); - if (!(current_type == Type::kUndefined && - begin[i] == ']')) { - array->Set(static_cast(current_element++), t); - } - i += current_length; - - current_length = 0; - if (begin[i] != ',' && begin[i] != ']') { - isolate->ThrowException(v8::Exception::SyntaxError( - v8::String::NewFromUtf8(isolate, - "Invalid format in array: missed comma"))); - return v8::Array::New(isolate); - } else if (begin[i] == ']') { - *size = i + 1; - break; - } - } else { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Invalid type in array"))); - return v8::Array::New(isolate); - } - } - - return array; -} - -} // namespace deserializer - -namespace bindings { - -void Stringify(const v8::FunctionCallbackInfo& args) { - v8::Isolate* isolate = args.GetIsolate(); - - if (args.Length() != 1) { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Wrong number of arguments"))); - return; - } - - v8::HandleScope scope(isolate); - - v8::Local result = jstp::jsrs::Stringify(isolate, args[0]); - args.GetReturnValue().Set(result); -} - -void Parse(const v8::FunctionCallbackInfo& args) { - v8::Isolate* isolate = args.GetIsolate(); - - if (args.Length() != 1) { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Wrong number of arguments"))); - return; - } - if (!args[0]->IsString() && !args[0]->IsUint8Array()) { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Wrong argument type"))); - return; - } - - v8::HandleScope scope(isolate); - - v8::String::Utf8Value str(args[0]->ToString()); - - v8::Local result = jstp::jsrs::Parse(isolate, str); - args.GetReturnValue().Set(result); -} - -void ParseNetworkPackets(const v8::FunctionCallbackInfo& args) { - v8::Isolate* isolate = args.GetIsolate(); - - if (args.Length() != 2) { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Wrong number of arguments"))); - return; - } - - if (!args[0]->IsString() || !args[1]->IsArray()) { - isolate->ThrowException(v8::Exception::TypeError( - v8::String::NewFromUtf8(isolate, "Wrong argument type"))); - return; - } - - v8::HandleScope scope(isolate); - - v8::String::Utf8Value str(args[0]->ToString()); - auto array = v8::Local::Cast(args[1]); - auto result = jstp::jsrs::ParseNetworkPackets(isolate, str, array); - - args.GetReturnValue().Set(result); -} - -void Init(v8::Local target) { - NODE_SET_METHOD(target, "stringify", Stringify); - NODE_SET_METHOD(target, "parse", Parse); - NODE_SET_METHOD(target, "parseNetworkPackets", ParseNetworkPackets); -} - -NODE_MODULE(jsrs, Init); - -} // namespace bindings - -} // namespace jsrs - -} // namespace jstp diff --git a/src/jsrs-impl.h b/src/jsrs-impl.h deleted file mode 100644 index a55f16cb..00000000 --- a/src/jsrs-impl.h +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is -// governed by the MIT license that can be found in the LICENSE file. - -#ifndef JSRS_IMPL_H_ -#define JSRS_IMPL_H_ - -#include - -#include -#include - -namespace jstp { - -namespace jsrs { - -namespace serializer { - -// Serializes a JavaScript date into string. -v8::Local StringifyDate(v8::Isolate* isolate, - v8::Local date); - -// Serializes a JavaScript string into another string representing it as it -// would have been written in JavaScript source code. -v8::Local StringifyString(v8::Isolate* isolate, - v8::Local string); - -// Serializes a JavaScript array into string. -v8::Local StringifyArray(v8::Isolate* isolate, - v8::Local array); - -// Serializes a JavaScript object into string. -v8::Local StringifyObject(v8::Isolate* isolate, - v8::Local object); - -// Serializes a key inside a JavaScript object into string without -// quotes if possible, with single quotes otherwise. -v8::Local StringifyKey(v8::Isolate* isolate, - v8::Local key); - -// Checks if a string can be a non-quoted object key. -bool IsValidKey(v8::Isolate* isolate, const v8::String::Utf8Value& key); - -// Returns a string representing an escaped control character. -// If the given character is not a control one, returns nullptr. -// The `size` is being incremented by the length of the resulting -// string, but always at least by two, even when the function -// returns nullptr. -const char* GetEscapedControlChar(char str, std::size_t* size); - -} // namespace serializer - -namespace deserializer { - -// Enumeration of supported JavaScript types used for deserialization -// function selection. -enum Type { - kUndefined = 0, kNull, kBool, kNumber, kString, kArray, kObject, kDate -}; - -// Parses an undefined value from `begin` but never past `end` and returns the -// parsed JavaScript value. The `size` is incremented by the number of -// characters the function has used in the string so that the calling side -// knows where to continue from. -v8::Local ParseUndefined(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses a null value from `begin` but never past `end` and returns the parsed -// JavaScript value. The `size` is incremented by the number of characters the -// function has used in the string so that the calling side knows where to -// continue from. -v8::Local ParseNull(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses a boolean value from `begin` but never past `end` and returns the -// parsed JavaScript value. The `size` is incremented by the number of -// characters the function has used in the string so that the calling side -// knows where to continue from. -v8::Local ParseBool(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses a numeric value from `begin` but never past `end` and returns the -// parsed JavaScript value. The `size` is incremented by the number of -// characters the function has used in the string so that the calling side -// knows where to continue from. -v8::Local ParseNumber(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses a string value from `begin` but never past `end` and returns the -// parsed JavaScript value. The `size` is incremented by the number of -// characters the function has used in the string so that the calling side -// knows where to continue from. -v8::Local ParseString(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses an array from `begin` but never past `end` and returns the parsed -// JavaScript value. The `size` is incremented by the number of characters the -// function has used in the string so that the calling side knows where to -// continue from. -v8::Local ParseArray(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses an object key from `begin` but never past `end` and returns -// the parsed JavaScript value. The `size` is incremented by the number -// of characters the function has used in the string so that the calling side -// knows where to continue from. -v8::Local ParseKeyInObject(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses a value corresponding to key inside object from `begin` -// but never past `end` and returns the parsed JavaScript value. -// The `size` is incremented by the number of characters the function has used -// in the string so that the calling side knows where to continue from. -v8::Local ParseValueInObject(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses an object from `begin` but never past `end` and returns the parsed -// JavaScript value. The `size` is incremented by the number of characters the -// function has used in the string so that the calling side knows where to -// continue from. -v8::Local ParseObject(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// The table of parsing functions indexed with the values of the Type -// enumeration. -constexpr static v8::Local (*kParseFunctions[])(v8::Isolate*, - const char *, const char *, std::size_t *) = { - &ParseUndefined, - &ParseNull, - &ParseBool, - &ParseNumber, - &ParseString, - &ParseArray, - &ParseObject -}; - -// The maximum length of object keys. -static const std::size_t kMaxKeyLength = 256; - -// Returns true if `str` points to a valid Line Terminator Sequence code point, -// false otherwise. `size` will receive the number of bytes used by this -// code point (1, 2, 3). -bool IsLineTerminatorSequence(const char* str, std::size_t* size); - -// Returns true if `str` points to a valid White space code point, -// false otherwise. `size` will receive the number of bytes used by this -// code point (1, 2, 3). -bool IsWhiteSpaceCharacter(const char* str, std::size_t* size); - -// Prepares a source string for parsing throwing out whitespace and comments. -const char* PrepareString(v8::Isolate* isolate, const char* str, - std::size_t length, std::size_t* new_length); - -// Parses the type of the serialized JavaScript value at the position `begin` -// and before `end`. Returns true if it was able to detect the type, false -// otherwise. -bool GetType(const char* begin, const char* end, Type* type); - -// Encodes a Unicode code point in UTF-8. `size` will receive the number of -// bytes used (1, 2, 3 or 4). -char* CodePointsToUtf8(unsigned int c, std::size_t* size); - -// Parses a hexadecimal number into unsigned int. Whether the parsing -// was successful is determined by the value of `ok`. -unsigned int ReadHexNumber(const char* str, std::size_t len, bool* ok); - -// Parses a part of a JavaScript string representation after the backslash -// character (i.e., an escape sequence without \) into an unescaped control -// character. -char* GetControlChar(v8::Isolate* isolate, const char* str, - std::size_t* res_len, std::size_t* size); - -// Checks if a character is an octal digit. -inline bool IsOctalDigit(char character) { - return character >= '0' && character <= '7'; -} - -// Parses a decimal number, either integer or float. -v8::Local ParseDecimalNumber(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size); - -// Parses an integer number in arbitrary base without prefixes. -v8::Local ParseIntegerNumber(v8::Isolate* isolate, const char* begin, - const char* end, std::size_t* size, - int base, bool negate_result); - -} // namespace deserializer - -} // namespace jsrs - -} // namespace jstp - -#endif // JSRS_IMPL_H_ diff --git a/src/jsrs.h b/src/jsrs.h deleted file mode 100644 index 16e0e63d..00000000 --- a/src/jsrs.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is -// governed by the MIT license that can be found in the LICENSE file. - -#ifndef JSRS_H_ -#define JSRS_H_ - -#include -#include - -namespace jstp { - -namespace jsrs { - -// Serializes a JavaScript value using the JSTP Record Serialization format -// and returns a string representing it. -v8::Local Stringify(v8::Isolate* isolate, - v8::Local value); - -// Deserializes a UTF-8 encoded string in the JSTP Record Serialization format -// into a JavaScript value and returns a handle to it. -v8::Local Parse(v8::Isolate* isolate, - const v8::String::Utf8Value& in); - -// Efficiently parses JSTP packets for transports that require packet -// delimiters eliminating the need to split the stream data into parts before -// parsing and allowing to do that in one pass. -v8::Local ParseNetworkPackets(v8::Isolate* isolate, - const v8::String::Utf8Value& in, v8::Local out); - -namespace bindings { - -// JavaScript binding to jstp::jsrs::Stringify. -void Stringify(const v8::FunctionCallbackInfo& args); - -// JavaScript binding to jstp::jsrs::Parse. -void Parse(const v8::FunctionCallbackInfo& args); - -// JavaScript binding to jstp::jsrs::ParseNetworkPackets. -void ParseNetworkPackets(const v8::FunctionCallbackInfo& args); - -} // namespace bindings - -} // namespace jsrs - -} // namespace jstp - -#endif // JSRS_H_ diff --git a/src/jsrs_parser.cc b/src/jsrs_parser.cc new file mode 100644 index 00000000..410aab75 --- /dev/null +++ b/src/jsrs_parser.cc @@ -0,0 +1,638 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#include "jsrs_parser.h" + +#include +#include +#include +#include +#include + +#include "common.h" +#include "unicode_utils.h" + +using std::atof; +using std::isalnum; +using std::isalpha; +using std::isdigit; +using std::isxdigit; +using std::memset; +using std::ptrdiff_t; +using std::size_t; +using std::strncmp; +using std::strncpy; +using std::strtol; + +using v8::Array; +using v8::False; +using v8::Integer; +using v8::Isolate; +using v8::Local; +using v8::Maybe; +using v8::NewStringType; +using v8::Null; +using v8::Number; +using v8::Object; +using v8::String; +using v8::True; +using v8::Undefined; +using v8::Value; + +using jstp::unicode_utils::CodePointToUtf8; +using jstp::unicode_utils::IsWhiteSpaceCharacter; +using jstp::unicode_utils::IsLineTerminatorSequence; + +namespace jstp { + +namespace parser { + +// Enumeration of supported JavaScript types used for deserialization +// function selection. +enum Type { + kUndefined = 0, kNull, kBool, kNumber, kString, kArray, kObject, kDate +}; + +// Parses the type of the serialized JavaScript value at the position `begin` +// and before `end`. Returns true if it was able to detect the type, false +// otherwise. +static bool GetType(const char* begin, const char* end, Type* type); + +// The table of parsing functions indexed with the values of the Type +// enumeration. +static constexpr Local (*kParseFunctions[])(Isolate*, + const char*, + const char*, + size_t*) = { + &internal::ParseUndefined, + &internal::ParseNull, + &internal::ParseBool, + &internal::ParseNumber, + &internal::ParseString, + &internal::ParseArray, + &internal::ParseObject +}; + +Local Parse(Isolate* isolate, const String::Utf8Value& in) { + size_t size; + const char* to_parse = + internal::PrepareString(isolate, *in, in.length(), &size); + if (!to_parse) { + return Undefined(isolate); + } + + Type type; + if (!GetType(to_parse, to_parse + size, &type)) { + THROW_EXCEPTION(TypeError, "Invalid type"); + return Undefined(isolate); + } + + size_t parsed_size = 0; + Local result = + (kParseFunctions[type])(isolate, to_parse, to_parse + size, &parsed_size); + if (size != parsed_size) { + THROW_EXCEPTION(SyntaxError, "Invalid format"); + return Undefined(isolate); + } + + delete[] to_parse; + return result; +} + +static bool GetType(const char* begin, const char* end, Type* type) { + bool result = true; + switch (*begin) { + case ',': + case ']': + *type = Type::kUndefined; + break; + case '{': + *type = Type::kObject; + break; + case '[': + *type = Type::kArray; + break; + case '\"': + case '\'': + *type = Type::kString; + break; + case 't': + case 'f': + *type = Type::kBool; + break; + case 'n': + *type = Type::kNull; + if (begin + 4 <= end) { + result = (strncmp(begin, "null", 4) == 0); + } + break; + case 'u': + *type = Type::kUndefined; + if (begin + 9 <= end) { + result = (strncmp(begin, "undefined", 9) == 0); + } + break; + default: + result = false; + if (isdigit(*begin) || *begin == '.' || *begin == '+' || *begin == '-') { + *type = Type::kNumber; + result = true; + } + } + return result; +} + +namespace internal { + +const char* PrepareString(Isolate* isolate, + const char* str, + size_t length, + size_t* new_length) { + char* result = new char[length + 1]; + bool string_mode = false; + enum { kDisabled = 0, kOneline, kMultiline } comment_mode = kDisabled; + size_t j = 0; + size_t size = 0; + + for (size_t i = 0; i < length; i++) { + if ((comment_mode == kDisabled) && + (str[i] == '\"' || str[i] == '\'') && + (i == 0 || str[i - 1] != '\\')) { + string_mode = !string_mode; + } + + if (!string_mode) { + if (comment_mode == kDisabled && str[i] == '/') { + switch (str[i + 1]) { + case '/': + comment_mode = kOneline; + break; + case '*': + comment_mode = kMultiline; + break; + } + } + + if (comment_mode == kDisabled) { + if (IsWhiteSpaceCharacter(str + i, &size) || + IsLineTerminatorSequence(str + i, &size)) { + i += size - 1; + } else { + result[j++] = str[i]; + } + } + + if ((comment_mode == kOneline && + IsLineTerminatorSequence(str + i, &size)) || + (comment_mode == kMultiline && + str[i - 1] == '*' && str[i] == '/')) { + comment_mode = kDisabled; + } + } else if (str[i] == '\\' && IsLineTerminatorSequence(str + i + 1, &size)) { + i += size; + } else if (IsLineTerminatorSequence(str + i, &size)) { + THROW_EXCEPTION(SyntaxError, "Unexpected line end in string"); + delete[] result; + return nullptr; + } else { + result[j++] = str[i]; + } + } + + result[j] = '\0'; + *new_length = j; + + return result; +} + +Local ParseUndefined(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + if (*begin == ',' || *begin == ']') { + *size = 0; + } else if (*begin == 'u') { + *size = 9; + } else { + THROW_EXCEPTION(TypeError, "Invalid format of undefined value"); + } + return Undefined(isolate); +} + +Local ParseNull(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + *size = 4; + return Null(isolate); +} + +Local ParseBool(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + Local result; + if (begin + 4 <= end && strncmp(begin, "true", 4) == 0) { + result = True(isolate); + *size = 4; + } else if (begin + 5 <= end && strncmp(begin, "false", 5) == 0) { + result = False(isolate); + *size = 5; + } else { + THROW_EXCEPTION(TypeError, "Invalid format: expected boolean"); + result = Undefined(isolate); + } + return result; +} + +// Checks if a character is an octal digit. +inline bool IsOctalDigit(char character) { + return character >= '0' && character <= '7'; +} + +Local ParseNumber(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + bool negate_result = false; + const char* number_start = begin; + + if (*begin == '+' || *begin == '-') { + negate_result = *begin == '-'; + number_start++; + } + + int base = 10; + + if (*number_start == '0') { + number_start++; + + if (IsOctalDigit(*number_start)) { + THROW_EXCEPTION(SyntaxError, "Use new octal literal syntax"); + return Undefined(isolate); + } else if (*number_start == 'b') { + base = 2; + number_start++; + } else if (*number_start == 'o') { + base = 8; + number_start++; + } else if (*number_start == 'x') { + base = 16; + number_start++; + } else { + number_start--; + } + } + + if (base == 10) { + return ParseDecimalNumber(isolate, begin, end, size); + } else { + auto value = ParseIntegerNumber(isolate, number_start, end, size, + base, negate_result); + auto offset = static_cast(number_start - begin); + *size += offset; + return value; + } +} + +Local ParseDecimalNumber(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + auto result = Number::New(isolate, atof(begin)); + *size = end - begin; + size_t i = 0; + while (begin[i] != ',' && + begin[i] != '}' && + begin[i] != ']' && + i < *size) { + i++; + } + *size = i; + return result; +} + +Local ParseIntegerNumber(Isolate* isolate, + const char* begin, + const char* end, + size_t* size, + int base, + bool negate_result) { + char* number_end; + int32_t value = strtol(begin, &number_end, base); + if (negate_result) { + value = -value; + } + *size = static_cast(number_end - begin); + return Integer::New(isolate, value); +} + +static char* GetControlChar(Isolate* isolate, + const char* str, + size_t* res_len, + size_t* size); + +Local ParseString(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + *size = end - begin; + char* result = new char[*size + 1]; + memset(result, 0, *size + 1); + + enum { kApostrophe = 0, kQMarks} string_mode = (*begin == '\'') ? + kApostrophe : + kQMarks; + bool is_ended = false; + size_t res_index = 0; + size_t out_offset, in_offset; + + for (size_t i = 1; i < *size; i++) { + if ((string_mode == kQMarks && begin[i] == '\"') || + (string_mode == kApostrophe && begin[i] == '\'')) { + is_ended = true; + *size = i + 1; + result[res_index] = '\0'; + break; + } + + if (begin[i] == '\\') { + char* symb = + GetControlChar(isolate, begin + ++i, &out_offset, &in_offset); + if (!symb) { + return String::Empty(isolate); + } + strncpy(result + res_index, symb, out_offset); + delete[] symb; + i += in_offset - 1; + res_index += out_offset; + } else { + result[res_index++] = begin[i]; + } + } + + if (!is_ended) { + THROW_EXCEPTION(SyntaxError, "Error while parsing string"); + return String::Empty(isolate); + } + + Local result_str = String::NewFromUtf8(isolate, result, + NewStringType::kNormal, static_cast(res_index)).ToLocalChecked(); + delete[] result; + return result_str; +} + +static unsigned int ReadHexNumber(const char* str, size_t len, bool* ok); + +// Parses a part of a JavaScript string representation after the backslash +// character (i.e., an escape sequence without \) into an unescaped control +// character. +static char* GetControlChar(Isolate* isolate, + const char* str, + size_t* res_len, + size_t* size) { + char* result = new char[5]; + *size = 1; + *res_len = 1; + bool ok; + switch (str[0]) { + case 'b': *result = '\b'; break; + case 'f': *result = '\f'; break; + case 'n': *result = '\n'; break; + case 'r': *result = '\r'; break; + case 't': *result = '\t'; break; + case 'v': *result = '\v'; break; + case '0': *result = '\0'; break; + + case 'x': { + *result = ReadHexNumber(str + 1, 2, &ok); + if (!ok) { + delete[] result; + THROW_EXCEPTION(SyntaxError, "Invalid hexadecimal escape sequence"); + return nullptr; + } + *size = 3; + break; + } + + case 'u': { + unsigned int symb_code; + if (isxdigit(str[1])) { + symb_code = ReadHexNumber(str + 1, 4, &ok); + *size = 5; + } else if (str[1] == '{') { + size_t hex_size; // maximal hex is 10FFFF + for (hex_size = 1; + str[hex_size + 2] != '}' && hex_size <= 6; + hex_size++) { + if (str[hex_size + 2] == '\0') { + delete[] result; + THROW_EXCEPTION(SyntaxError, "Invalid Unicode code point escape"); + return nullptr; + } + } + symb_code = ReadHexNumber(str + 2, hex_size, &ok); + *size = hex_size + 3; + } else { + ok = false; + } + + if (!ok) { + delete[] result; + THROW_EXCEPTION(SyntaxError, "Invalid Unicode escape sequence"); + return nullptr; + } + char* unicode_symbol = CodePointToUtf8(symb_code, res_len); + delete[] result; + return unicode_symbol; + } + + default: + *result = str[0]; + } + + return result; +} + +// Parses a hexadecimal number into unsigned int. Whether the parsing +// was successful is determined by the value of `ok`. +static unsigned int ReadHexNumber(const char* str, size_t len, bool* ok) { + char t[6]; + char* end; + strncpy(t, str, len); + t[len] = '\0'; + unsigned int result = strtol(t, &end, 16); + if (end - t != static_cast(len)) { + *ok = false; + } else { + *ok = true; + } + return result; +} + +Local ParseKeyInObject(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + *size = end - begin; + Local result; + if (begin[0] == '\'' || begin[0] == '"') { + Type current_type; + bool valid = GetType(begin, end, ¤t_type); + if (valid && current_type == Type::kString) { + size_t offset; + result = ParseString(isolate, begin, end, &offset).As(); + *size = offset; + return result; + } else { + THROW_EXCEPTION(SyntaxError, + "Invalid format in object: key is invalid string"); + return Local(); + } + } else { + size_t current_length = 0; + for (size_t i = 0; i < *size; i++) { + if (begin[i] == ':') { + if (current_length != 0) { + result = String::NewFromUtf8(isolate, begin, + NewStringType::kInternalized, + static_cast(current_length)) + .ToLocalChecked(); + break; + } else { + THROW_EXCEPTION(SyntaxError, "Unexpected token :"); + return Local(); + } + } else if (begin[i] == '_' || (i != 0 ? + isalnum(begin[i]) : + isalpha(begin[i]))) { + current_length++; + } else { + THROW_EXCEPTION(SyntaxError, + "Invalid format in object: key has invalid type"); + return Local(); + } + } + *size = current_length; + return result; + } +} + +Local ParseValueInObject(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + Local value; + Type current_type; + bool valid = GetType(begin, end, ¤t_type); + if (valid) { + value = (kParseFunctions[current_type])(isolate, begin, end, size); + return value; + } else { + THROW_EXCEPTION(TypeError, "Invalid type in object"); + return Object::New(isolate); + } +} + +Local ParseObject(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + bool key_mode = true; + *size = end - begin; + Local current_key; + Local current_value; + size_t current_length = 0; + auto result = Object::New(isolate); + + for (size_t i = 1; i < *size; i++) { + if (key_mode) { + if (begin[i] == '}') { + if (begin[i - 1] != ',') { // In case of empty object + *size = 2; + } else { // In case of trailing comma + *size = i + 1; + } + break; + } + current_key = ParseKeyInObject(isolate, + begin + i, + end, + ¤t_length); + i += current_length; + } else { + current_value = ParseValueInObject(isolate, + begin + i, + end, + ¤t_length); + if (!current_value->IsUndefined()) { + Maybe is_ok = result->Set(isolate->GetCurrentContext(), + current_key, + current_value); + if (is_ok.IsNothing()) { + THROW_EXCEPTION(Error, "Cannot add property to object"); + } + } + i += current_length; + if (begin[i] != ',' && begin[i] != '}') { + THROW_EXCEPTION(SyntaxError, "Invalid format in object"); + return Object::New(isolate); + } else if (begin[i] == '}') { + *size = i + 1; + break; + } + } + key_mode = !key_mode; + } + return result; +} + +Local ParseArray(Isolate* isolate, + const char* begin, + const char* end, + size_t* size) { + auto array = Array::New(isolate); + size_t current_length = 0; + *size = end - begin; + if (*begin == '[' && *(begin + 1) == ']') { // In case of empty array + *size = 2; + return array; + } + + size_t current_element = 0; + for (size_t i = 1; i < *size; i++) { + Type current_type; + bool valid = GetType(begin + i, end, ¤t_type); + if (valid) { + auto t = kParseFunctions[current_type](isolate, + begin + i, + end, + ¤t_length); + if (!(current_type == Type::kUndefined && begin[i] == ']')) { + array->Set(static_cast(current_element++), t); + } + + i += current_length; + current_length = 0; + + if (begin[i] != ',' && begin[i] != ']') { + THROW_EXCEPTION(SyntaxError, "Invalid format in array: missed comma"); + return Array::New(isolate); + } else if (begin[i] == ']') { + *size = i + 1; + break; + } + } else { + THROW_EXCEPTION(TypeError, "Invalid type in array"); + return Array::New(isolate); + } + } + + return array; +} + +} // namespace internal + +} // namespace parser + +} // namespace jstp diff --git a/src/jsrs_parser.h b/src/jsrs_parser.h new file mode 100644 index 00000000..712faf4f --- /dev/null +++ b/src/jsrs_parser.h @@ -0,0 +1,127 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#ifndef SRC_JSRS_PARSER_H_ +#define SRC_JSRS_PARSER_H_ + +#include + +namespace jstp { + +namespace parser { + +// Deserializes a UTF-8 encoded string in the JSTP Record Serialization format +// into a JavaScript value and returns a handle to it. +v8::Local Parse(v8::Isolate* isolate, + const v8::String::Utf8Value& in); + +namespace internal { + +// Prepares a source string for parsing throwing out whitespace and comments. +const char* PrepareString(v8::Isolate* isolate, + const char* str, + std::size_t length, + std::size_t* new_length); + +// Parses an undefined value from `begin` but never past `end` and returns the +// parsed JavaScript value. The `size` is incremented by the number of +// characters the function has used in the string so that the calling side +// knows where to continue from. +v8::Local ParseUndefined(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses a null value from `begin` but never past `end` and returns the parsed +// JavaScript value. The `size` is incremented by the number of characters the +// function has used in the string so that the calling side knows where to +// continue from. +v8::Local ParseNull(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses a boolean value from `begin` but never past `end` and returns the +// parsed JavaScript value. The `size` is incremented by the number of +// characters the function has used in the string so that the calling side +// knows where to continue from. +v8::Local ParseBool(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses a numeric value from `begin` but never past `end` and returns the +// parsed JavaScript value. The `size` is incremented by the number of +// characters the function has used in the string so that the calling side +// knows where to continue from. +v8::Local ParseNumber(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses a string value from `begin` but never past `end` and returns the +// parsed JavaScript value. The `size` is incremented by the number of +// characters the function has used in the string so that the calling side +// knows where to continue from. +v8::Local ParseString(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses an array from `begin` but never past `end` and returns the parsed +// JavaScript value. The `size` is incremented by the number of characters the +// function has used in the string so that the calling side knows where to +// continue from. +v8::Local ParseArray(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses an object key from `begin` but never past `end` and returns +// the parsed JavaScript value. The `size` is incremented by the number +// of characters the function has used in the string so that the calling side +// knows where to continue from. +v8::Local ParseKeyInObject(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses a value corresponding to key inside object from `begin` +// but never past `end` and returns the parsed JavaScript value. +// The `size` is incremented by the number of characters the function has used +// in the string so that the calling side knows where to continue from. +v8::Local ParseValueInObject(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses an object from `begin` but never past `end` and returns the parsed +// JavaScript value. The `size` is incremented by the number of characters the +// function has used in the string so that the calling side knows where to +// continue from. +v8::Local ParseObject(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses a decimal number, either integer or float. +v8::Local ParseDecimalNumber(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size); + +// Parses an integer number in arbitrary base without prefixes. +v8::Local ParseIntegerNumber(v8::Isolate* isolate, + const char* begin, + const char* end, + std::size_t* size, + int base, + bool negate_result); + +} // namespace internal + +} // namespace parser + +} // namespace jstp + +#endif // SRC_JSRS_PARSER_H_ diff --git a/src/jsrs_serializer.cc b/src/jsrs_serializer.cc new file mode 100644 index 00000000..702a16e9 --- /dev/null +++ b/src/jsrs_serializer.cc @@ -0,0 +1,215 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#include "jsrs_serializer.h" + +#include +#include +#include + +#include + +using std::isalpha; +using std::isalnum; +using std::size_t; + +using v8::Array; +using v8::Context; +using v8::Date; +using v8::Function; +using v8::Isolate; +using v8::Local; +using v8::Object; +using v8::NewStringType; +using v8::String; +using v8::Value; + +namespace jstp { + +namespace serializer { + +Local Stringify(Isolate* isolate, Local value) { + if (value->IsFunction()) { + return Local(); + } + if (value->IsNumber() || + value->IsBoolean() || + value->IsUndefined() || + value->IsNull()) { + return value->ToString(isolate->GetCurrentContext()).ToLocalChecked(); + } else if (value->IsDate()) { + return StringifyDate(isolate, value.As()); + } else if (value->IsString()) { + return StringifyString(isolate, value.As()); + } else if (value->IsArray()) { + return StringifyArray(isolate, value.As()); + } else if (value->IsObject()) { + return StringifyObject(isolate, value.As()); + } else { + return Local(); + } +} + +Local StringifyDate(Isolate* isolate, Local date) { + auto context = isolate->GetCurrentContext(); + auto toISOString = date->Get(context, + String::NewFromUtf8(isolate, "toISOString")).ToLocalChecked(); + auto result = toISOString.As()->Call(context, + date, 0, nullptr).ToLocalChecked(); + auto quotes = String::NewFromUtf8(isolate, "\'"); + auto res_str = result->ToString(); + res_str = String::Concat(quotes, String::Concat(res_str, quotes)); + return res_str->ToString(); +} + +Local StringifyArray(Isolate* isolate, Local array) { + auto comma = String::NewFromUtf8(isolate, ","); + auto result = String::NewFromUtf8(isolate, "["); + uint32_t length = array->Length(); + + for (uint32_t index = 0; index < length; index++) { + auto value = array->Get(index); + if (!value->IsUndefined()) { + auto chunk = Stringify(isolate, value); + if (chunk.IsEmpty()) continue; + result = String::Concat(result, chunk); + } + if (index != length - 1) { + result = String::Concat(result, comma); + } + } + + result = String::Concat(result, String::NewFromUtf8(isolate, "]")); + return result; +} + +static const char* GetEscapedControlChar(char str, size_t* size); + +Local StringifyString(Isolate* isolate, Local string) { + uint32_t length = string->Length(); + + std::vector result_str; + result_str.reserve((length + 1) * 2); + result_str.push_back('\''); + + String::Utf8Value utf8string(string); + const char* c_string = *utf8string; + + for (uint32_t i = 0; i < length; i++) { + size_t offset; + const char* ch = GetEscapedControlChar(c_string[i], &offset); + if (ch) { + for (size_t k = 0; k < offset; k++) { + result_str.push_back(ch[k]); + } + } else { + result_str.push_back(c_string[i]); + } + } + + result_str.push_back('\''); + + return String::NewFromUtf8(isolate, result_str.data(), + NewStringType::kNormal, + static_cast(result_str.size())).ToLocalChecked(); +} + +// Returns a string representing an escaped control character. +// If the given character is not a control one, returns nullptr. +// The `size` is being incremented by the length of the resulting +// string, but always at least by two, even when the function +// returns nullptr. +static const char* GetEscapedControlChar(char str, size_t* size) { + static constexpr const char* control_chars[0x20] = { + "\\u0000", "\\u0001", "\\u0002", + "\\u0003", "\\u0004", "\\u0005", + "\\u0006", "\\u0007", "\\u0008", + "\\u0009", "\\u000a", "\\u000b", + "\\u000c", "\\u000d", "\\u000e", + "\\u000f", "\\u0010", "\\u0011", + "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", + "\\u0018", "\\u0019", "\\u001a", + "\\u001b", "\\u001c", "\\u001d", + "\\u001e", "\\u001f" + }; + + *size = 2; + + switch (str) { + case '\b': return "\\b"; + case '\f': return "\\f"; + case '\n': return "\\n"; + case '\r': return "\\r"; + case '\t': return "\\t"; + case '\v': return "\\v"; + case '\\': return "\\\\"; + case '\'': return "\\'"; + case 0x7F: return "\\u007f"; + default: + if (str < 0x20) { + *size = 6; + return control_chars[static_cast(str)]; + } else { + return nullptr; + } + } +} + +Local StringifyObject(Isolate* isolate, Local object) { + auto comma = String::NewFromUtf8(isolate, ","); + auto colon = String::NewFromUtf8(isolate, ":"); + auto result = String::NewFromUtf8(isolate, "{"); + auto context = isolate->GetCurrentContext(); + auto keys = object->GetOwnPropertyNames(context).ToLocalChecked(); + + bool first_defined = true; + for (uint32_t i = 0; i < keys->Length(); i++) { + auto key = keys->Get(context, i).ToLocalChecked(); + auto value = object->Get(context, key).ToLocalChecked(); + auto chunk = Stringify(isolate, value); + if (!value->IsUndefined() && !chunk.IsEmpty()) { + if (i != 0 && first_defined) { + result = String::Concat(result, comma); + } + first_defined = true; + result = String::Concat(result, + StringifyKey(isolate, key->ToString())); + result = String::Concat(result, colon); + result = String::Concat(result, chunk); + } else { + if (i == 0) first_defined = false; + } + } + + result = String::Concat(result, String::NewFromUtf8(isolate, "}")); + return result; +} + +static bool IsValidKey(Isolate* isolate, const String::Utf8Value& key); + +Local StringifyKey(Isolate* isolate, Local key) { + String::Utf8Value key_str(key.As()); + if (!IsValidKey(isolate, key_str)) { + return StringifyString(isolate, key); + } + return key; +} + +// Checks if a string can be a non-quoted object key. +static bool IsValidKey(Isolate* isolate, const String::Utf8Value& key) { + bool result = true; + const char* key_str = *key; + for (int i = 0; i < key.length(); i++) { + if (key_str[i] == '_') continue; + if ((i == 0 && !isalpha(key_str[i])) || !isalnum(key_str[i])) { + result = false; + break; + } + } + return result; +} + +} // namespace serializer + +} // namespace jstp diff --git a/src/jsrs_serializer.h b/src/jsrs_serializer.h new file mode 100644 index 00000000..541cb80c --- /dev/null +++ b/src/jsrs_serializer.h @@ -0,0 +1,47 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#ifndef SRC_JSRS_SERIALIZER_H_ +#define SRC_JSRS_SERIALIZER_H_ + +#include + +namespace jstp { + +namespace serializer { + +// Serializes a JavaScript value using the JSTP Record Serialization format +// and returns a string representing it. +v8::Local Stringify(v8::Isolate* isolate, + v8::Local value); + +// Serializes a JavaScript date using the JSTP Record Serialization format +// and returns a string representing it. +v8::Local StringifyDate(v8::Isolate* isolate, + v8::Local date); + +// Serializes a JavaScript array using the JSTP Record Serialization format +// and returns a string representing it. +v8::Local StringifyArray(v8::Isolate* isolate, + v8::Local array); + +// Serializes a JavaScript string using the JSTP Record Serialization format +// and returns a string representing it. +v8::Local StringifyString(v8::Isolate* isolate, + v8::Local string); + +// Serializes a JavaScript object using the JSTP Record Serialization format +// and returns a string representing it. +v8::Local StringifyObject(v8::Isolate* isolate, + v8::Local object); + +// Serializes a key of an object using the JSTP Record Serialization format +// and returns a string representing it. +v8::Local StringifyKey(v8::Isolate* isolate, + v8::Local key); + +} // namespace serializer + +} // namespace jstp + +#endif // SRC_JSRS_SERIALIZER_H_ diff --git a/src/node_bindings.cc b/src/node_bindings.cc new file mode 100644 index 00000000..59df22a8 --- /dev/null +++ b/src/node_bindings.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#include +#include + +#include "common.h" +#include "jsrs_parser.h" +#include "jsrs_serializer.h" +#include "packet_parser.h" + +using v8::Array; +using v8::FunctionCallbackInfo; +using v8::HandleScope; +using v8::Isolate; +using v8::Local; +using v8::Object; +using v8::String; +using v8::Value; + +namespace jstp { + +namespace bindings { + +void Stringify(const FunctionCallbackInfo& args) { + Isolate* isolate = args.GetIsolate(); + + if (args.Length() != 1) { + THROW_EXCEPTION(TypeError, "Wrong number of arguments"); + return; + } + + HandleScope scope(isolate); + + auto result = jstp::serializer::Stringify(isolate, args[0]); + args.GetReturnValue().Set(result); +} + +void Parse(const FunctionCallbackInfo& args) { + Isolate* isolate = args.GetIsolate(); + + if (args.Length() != 1) { + THROW_EXCEPTION(TypeError, "Wrong number of arguments"); + return; + } + if (!args[0]->IsString() && !args[0]->IsUint8Array()) { + THROW_EXCEPTION(TypeError, "Wrong argument type"); + return; + } + + HandleScope scope(isolate); + + String::Utf8Value str(args[0]->ToString()); + auto result = jstp::parser::Parse(isolate, str); + args.GetReturnValue().Set(result); +} + +void ParseNetworkPackets(const FunctionCallbackInfo& args) { + Isolate* isolate = args.GetIsolate(); + + if (args.Length() != 2) { + THROW_EXCEPTION(TypeError, "Wrong number of arguments"); + return; + } + if (!args[0]->IsString() || !args[1]->IsArray()) { + THROW_EXCEPTION(TypeError, "Wrong argument type"); + return; + } + + HandleScope scope(isolate); + + String::Utf8Value str(args[0]->ToString()); + auto array = args[1].As(); + auto result = jstp::packet_parser::ParseNetworkPackets(isolate, str, array); + args.GetReturnValue().Set(result); +} + +void Init(Local target) { + NODE_SET_METHOD(target, "stringify", Stringify); + NODE_SET_METHOD(target, "parse", Parse); + NODE_SET_METHOD(target, "parseNetworkPackets", ParseNetworkPackets); +} + +NODE_MODULE(jsrs, Init); + +} // namespace bindings + +} // namespace jstp diff --git a/src/packet_parser.cc b/src/packet_parser.cc new file mode 100644 index 00000000..8bb9de72 --- /dev/null +++ b/src/packet_parser.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#include "packet_parser.h" + +#include +#include + +#include + +#include "common.h" +#include "jsrs_parser.h" + +using std::size_t; +using std::strlen; + +using v8::Array; +using v8::Isolate; +using v8::Local; +using v8::String; + +using jstp::parser::internal::PrepareString; +using jstp::parser::internal::ParseObject; + +namespace jstp { + +namespace packet_parser { + +Local ParseNetworkPackets(Isolate* isolate, + const String::Utf8Value& in, + Local out) { + size_t total_size = 0; + size_t parsed_size = 0; + const char* source = PrepareString(isolate, *in, in.length(), &total_size); + const char* curr_chunk = source; + int index = 0; + + while (parsed_size < total_size) { + auto chunk_size = strlen(curr_chunk); + parsed_size += chunk_size + 1; + + if (parsed_size <= total_size) { + size_t parsed_chunk_size = 0; + auto result = ParseObject(isolate, curr_chunk, + curr_chunk + chunk_size, &parsed_chunk_size); + + if (parsed_chunk_size != chunk_size) { + delete[] source; + THROW_EXCEPTION(SyntaxError, "Invalid format"); + return String::Empty(isolate); + } + + out->Set(index++, result); + curr_chunk += chunk_size + 1; + } + } + + auto rest = String::NewFromUtf8(isolate, curr_chunk); + delete[] source; + return rest; +} + +} // namespace packet_parser + +} // namespace jstp diff --git a/src/packet_parser.h b/src/packet_parser.h new file mode 100644 index 00000000..19931d3d --- /dev/null +++ b/src/packet_parser.h @@ -0,0 +1,23 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#ifndef SRC_PACKET_PARSER_H_ +#define SRC_PACKET_PARSER_H_ + +#include + +namespace jstp { + +namespace packet_parser { + +// Efficiently parses JSTP packets for transports that require packet +// delimiters eliminating the need to split the stream data into parts before +// parsing and allowing to do that in one pass. +v8::Local ParseNetworkPackets(v8::Isolate* isolate, + const v8::String::Utf8Value& in, v8::Local out); + +} // namespace packet_parser + +} // namespace jstp + +#endif // SRC_PACKET_PARSER_H_ diff --git a/src/unicode_utils.cc b/src/unicode_utils.cc new file mode 100644 index 00000000..962ebdc5 --- /dev/null +++ b/src/unicode_utils.cc @@ -0,0 +1,110 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#include "unicode_utils.h" + +#include + +using std::size_t; + +namespace jstp { + +namespace unicode_utils { + +bool IsLineTerminatorSequence(const char* str, size_t* size) { + if (str[0] == '\x0D' && str[1] == '\x0A') { + *size = 2; + return true; + } else if (str[0] == '\x0D' || str[0] == '\x0A') { + *size = 1; + return true; + } else if (str[0] == '\xE2' && + str[1] == '\x80' && + (str[2] == '\xA8' || + str[2] == '\xA9')) { + *size = 3; + return true; + } + return false; +} + +bool IsWhiteSpaceCharacter(const char* str, size_t* size) { + if (str[0] == '\x09' || + str[0] == '\x0B' || + str[0] == '\x0C' || + str[0] == '\x20' || + str[0] == '\xA0') { + *size = 1; + return true; + } else if (str[0] == '\xC2' && str[1] == '\xA0') { + *size = 2; + return true; + } else { + bool is_multibyte_space = false; + switch (str[0]) { + case '\xE1': + if (str[1] == '\xBB' && str[2] == '\xBF') { + is_multibyte_space = true; + } + break; + case '\xE2': + if ((str[1] == '\x80' && + ((static_cast(str[2]) & 0x7F) <= 0xA || + str[2] == '\xAF')) || + (str[1] == '\x81' && str[2] == '\x9F')) { + is_multibyte_space = true; + } + break; + case '\xE3': + if (str[1] == '\x80' && str[2] == '\x80') { + is_multibyte_space = true; + } + break; + case '\xEF': + if (str[1] == '\xBB' && str[2] == '\xBF') { + is_multibyte_space = true; + } + break; + } + if (is_multibyte_space) { + *size = 3; + return true; + } + } + return false; +} + +char* CodePointToUtf8(unsigned int c, size_t* size) { + char* result = new char[4]; + char* b = result; + if (c < 0x80) { + *b++ = c; + *size = 1; + } else if (c < 0x800) { + *b++ = 192 + c / 64; + *b++ = 128 + c % 64; + *size = 2; + } else if (c - 0xd800u < 0x800) { + delete[] result; + return CodePointToUtf8(0xFFFD, size); + } else if (c < 0x10000) { + *b++ = 224 + c / 4096; + *b++ = 128 + c / 64 % 64; + *b++ = 128 + c % 64; + *size = 3; + } else if (c < 0x110000) { + *b++ = 240 + c / 262144; + *b++ = 128 + c / 4096 % 64; + *b++ = 128 + c / 64 % 64; + *b++ = 128 + c % 64; + *size = 4; + } else { + delete[] result; + return CodePointToUtf8(0xFFFD, size); + } + return result; +} + +} // namespace unicode_utils + +} // namespace jstp diff --git a/src/unicode_utils.h b/src/unicode_utils.h new file mode 100644 index 00000000..48f452a3 --- /dev/null +++ b/src/unicode_utils.h @@ -0,0 +1,31 @@ +// Copyright (c) 2016-2017 JSTP project authors. Use of this source code is +// governed by the MIT license that can be found in the LICENSE file. + +#ifndef SRC_UNICODE_UTILS_H_ +#define SRC_UNICODE_UTILS_H_ + +#include + +namespace jstp { + +namespace unicode_utils { + +// Returns true if `str` points to a valid Line Terminator Sequence code point, +// false otherwise. `size` will receive the number of bytes used by this +// code point (1, 2, 3). +bool IsLineTerminatorSequence(const char* str, std::size_t* size); + +// Returns true if `str` points to a valid White space code point, +// false otherwise. `size` will receive the number of bytes used by this +// code point (1, 2, 3). +bool IsWhiteSpaceCharacter(const char* str, std::size_t* size); + +// Encodes a Unicode code point in UTF-8. `size` will receive the number of +// bytes used (1, 2, 3 or 4). +char* CodePointToUtf8(unsigned int c, std::size_t* size); + +} // namespace unicode_utils + +} // namespace jstp + +#endif // SRC_UNICODE_UTILS_H_ From 890a047426f5e7cf8abef6d172c2585890176e7c Mon Sep 17 00:00:00 2001 From: Mykola Bilochub Date: Thu, 9 Feb 2017 17:52:21 +0200 Subject: [PATCH 2/2] src,build: add missing header Fix compilation error caused by missing `` header needed for `std::size_t` type. PR-URL: https://github.com/metarhia/JSTP/pull/64 --- src/jsrs_parser.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/jsrs_parser.h b/src/jsrs_parser.h index 712faf4f..3ff40572 100644 --- a/src/jsrs_parser.h +++ b/src/jsrs_parser.h @@ -4,6 +4,8 @@ #ifndef SRC_JSRS_PARSER_H_ #define SRC_JSRS_PARSER_H_ +#include + #include namespace jstp {