Skip to content

Commit

Permalink
improve stringwrapper bytestring unwrap
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Oct 1, 2024
1 parent 924b27e commit 3e5af61
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/workerd/jsg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ wd_cc_library(
"web-idl.h",
"wrappable.h",
],
implementation_deps = ["@simdutf"],
visibility = ["//visibility:public"],
deps = [
":exception",
Expand All @@ -65,6 +64,7 @@ wd_cc_library(
"//src/workerd/util:thread-scopes",
"//src/workerd/util:uuid",
"@capnp-cpp//src/kj",
"@simdutf",
"@workerd-v8//:v8",
],
)
Expand Down
30 changes: 18 additions & 12 deletions src/workerd/jsg/value.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// Handling of various basic value types: numbers, booleans, strings, optionals, maybes, variants,
// arrays, buffers, dicts.

#include "simdutf.h"
#include "util.h"
#include "web-idl.h"
#include "wrappable.h"
Expand Down Expand Up @@ -468,20 +469,25 @@ class StringWrapper {
kj::Maybe<v8::Local<v8::Object>> parentObject) {
// TODO(cleanup): Move to a HeaderStringWrapper in the api directory.
v8::Local<v8::String> str = check(handle->ToString(context));
auto result =
ByteString(KJ_ASSERT_NONNULL(tryUnwrap(context, str, (kj::String*)nullptr, parentObject)));
if (str->ContainsOnlyOneByte()) {
auto buf = kj::heapArray<kj::byte>(str->Length() + 1);
str->WriteOneByte(context->GetIsolate(), buf.begin());
for (auto b: buf) {
if (b >= 128) {
result.warning = ByteString::Warning::CONTAINS_EXTENDED_ASCII;
break;
}
auto result = ByteString(KJ_ASSERT_NONNULL(
tryUnwrap(context, str, static_cast<kj::String*>(nullptr), parentObject)));

if (!simdutf::validate_ascii(result.begin(), result.size())) {
v8::String::ValueView str_view(context->GetIsolate(), str);

// If storage is one-byte or the string contains only one-byte
// characters, we know that it contains extended ASCII characters.
//
// The order of execution matters, since ContainsOnlyOneByte()
// will scan the whole string for two-byte storage.
if (str_view.is_one_byte() || str->ContainsOnlyOneByte()) {
result.warning = ByteString::Warning::CONTAINS_EXTENDED_ASCII;
} else {
// Storage is two-bytes and it contains two-byte characters.
result.warning = ByteString::Warning::CONTAINS_UNICODE;
}
} else {
result.warning = ByteString::Warning::CONTAINS_UNICODE;
}

return kj::mv(result);
}
};
Expand Down

0 comments on commit 3e5af61

Please sign in to comment.