Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport WriteUtf8V2 patch from v8 13.5 #3697

Merged
merged 2 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build/deps/v8.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ PATCHES = [
"0022-Reset-code_range_-before-pointer-compression-cage.patch",
"0023-Move-tear-down-in-IsolateGroup-Release-into-destruct.patch",
"0024-Modify-where-to-look-for-fast_float-and-simdutf.patch",
"0025-add-processed_characters-option-to-WriteUtf8V2.patch",
]

# V8 and its dependencies
Expand Down
2 changes: 1 addition & 1 deletion docs/v8-updates.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ To update the version of V8 used by workerd, the steps are:
the command would be:

```sh
git format-patch --full-index -k --no-signature --no-stat HEAD~19
git format-patch --full-index -k --no-signature --no-stat --zero-commit HEAD~19
```

8. Remove the existing patches from `workerd/patches/v8` and copy over the latest generated patches
Expand Down
2 changes: 1 addition & 1 deletion patches/v8/0003-Allow-Windows-builds-under-Bazel.patch
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Subject: Allow Windows builds under Bazel


diff --git a/BUILD.bazel b/BUILD.bazel
index 967f7bbfb2810366de89b7a7a90f284efab6c9c6..bfe3365942400fc77f346f2138ac31014f1020aa 100644
index 967f7bbfb2810366de89b7a7a90f284efab6c9c6..702648004a7c68721c80e7b43cdcd355ceb16710 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -3829,6 +3829,8 @@ filegroup(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ speedup for the build: Components like ICU were previously compiled in
both target and exec configurations as generator tools depend on them.

diff --git a/BUILD.bazel b/BUILD.bazel
index bfe3365942400fc77f346f2138ac31014f1020aa..96a35feb537ddc9edf864cadba402dfcc40f4ed6 100644
index 702648004a7c68721c80e7b43cdcd355ceb16710..e65bd03bec247979b2825584f5b59e5ab0c40283 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -17,6 +17,7 @@ load(
Expand All @@ -19,7 +19,7 @@ index bfe3365942400fc77f346f2138ac31014f1020aa..96a35feb537ddc9edf864cadba402dfc
)
load(":bazel/v8-non-pointer-compression.bzl", "v8_binary_non_pointer_compression")

@@ -4219,22 +4220,20 @@ filegroup(
@@ -4217,22 +4218,20 @@ filegroup(
],
)

Expand Down Expand Up @@ -48,7 +48,7 @@ index bfe3365942400fc77f346f2138ac31014f1020aa..96a35feb537ddc9edf864cadba402dfc
)

v8_mksnapshot(
@@ -4443,7 +4442,6 @@ v8_binary(
@@ -4441,7 +4440,6 @@ v8_binary(
srcs = [
"src/regexp/gen-regexp-special-case.cc",
"src/regexp/special-case.h",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ Subject: Modify where to look for fp16 dependency. This dependency is normally


diff --git a/BUILD.bazel b/BUILD.bazel
index 96a35feb537ddc9edf864cadba402dfcc40f4ed6..bf957ac52fc0d99ec4c18b325bef455e1c92c4c3 100644
index e65bd03bec247979b2825584f5b59e5ab0c40283..bfc3ab07e3efddc81965bb8dfbe7ce1f50aa72b2 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -3843,16 +3843,22 @@ filegroup(
@@ -3841,16 +3841,22 @@ filegroup(
}),
)

Expand Down
8 changes: 4 additions & 4 deletions patches/v8/0019-Enable-V8-shared-linkage.patch
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Subject: Enable V8 shared linkage


diff --git a/BUILD.bazel b/BUILD.bazel
index bf957ac52fc0d99ec4c18b325bef455e1c92c4c3..0b95fa690001e06ef39227c66dbd0866d33b2926 100644
index bfc3ab07e3efddc81965bb8dfbe7ce1f50aa72b2..aa72436fed2ce640b919caefac2ccad700f1c400 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -1372,6 +1372,7 @@ filegroup(
Expand Down Expand Up @@ -41,15 +41,15 @@ index bf957ac52fc0d99ec4c18b325bef455e1c92c4c3..0b95fa690001e06ef39227c66dbd0866
"src/builtins/setup-builtins-internal.cc",
"src/builtins/torque-csa-header-includes.h",
"src/codegen/turboshaft-builtins-assembler-inl.h",
@@ -3909,6 +3906,7 @@ filegroup(
@@ -3907,6 +3904,7 @@ filegroup(
"src/snapshot/snapshot-empty.cc",
"src/snapshot/static-roots-gen.cc",
"src/snapshot/static-roots-gen.h",
+ "src/execution/isolate.cc",
],
)

@@ -4019,6 +4017,10 @@ filegroup(
@@ -4017,6 +4015,10 @@ filegroup(
name = "noicu/snapshot_files",
srcs = [
"src/init/setup-isolate-deserialize.cc",
Expand All @@ -60,7 +60,7 @@ index bf957ac52fc0d99ec4c18b325bef455e1c92c4c3..0b95fa690001e06ef39227c66dbd0866
] + select({
"@v8//bazel/config:v8_target_arm": [
"google3/snapshots/arm/noicu/embedded.S",
@@ -4036,6 +4038,7 @@ filegroup(
@@ -4034,6 +4036,7 @@ filegroup(
name = "icu/snapshot_files",
srcs = [
"src/init/setup-isolate-deserialize.cc",
Expand Down
4 changes: 2 additions & 2 deletions patches/v8/0020-Fix-macOS-build.patch
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ and hwy bindings as a whole are no longer needed; this patch can be dropped.
Change-Id: I7c52113596247f8f254ac5f882b41da1ba32e3b1

diff --git a/BUILD.bazel b/BUILD.bazel
index 0b95fa690001e06ef39227c66dbd0866d33b2926..42198db354f84480b3a16ef8df2556bbfb65819a 100644
index aa72436fed2ce640b919caefac2ccad700f1c400..f19f85b5b94a36b0b00c37b075e6cc7291ee0b34 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4302,6 +4302,10 @@ cc_library(
@@ -4300,6 +4300,10 @@ cc_library(
"src/torque/kythe-data.h",
"src/torque/torque-compiler.h",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Subject: Modify where to look for fast_float and simdutf.
Similar to fp16, these dependencies now needs to be downloaded by bazel.

diff --git a/BUILD.bazel b/BUILD.bazel
index 42198db354f84480b3a16ef8df2556bbfb65819a..053ad3d3c264e6e098fea9ac3b2a371aeac3b765 100644
index f19f85b5b94a36b0b00c37b075e6cc7291ee0b34..19d7d942c9dc49b48425285d92d6c93e27371ab9 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -2542,8 +2542,6 @@ filegroup(
Expand All @@ -18,7 +18,7 @@ index 42198db354f84480b3a16ef8df2556bbfb65819a..053ad3d3c264e6e098fea9ac3b2a371a
"third_party/siphash/halfsiphash.cc",
"third_party/siphash/halfsiphash.h",
"third_party/utf8-decoder/utf8-decoder.h",
@@ -4327,12 +4325,6 @@ cc_library(
@@ -4325,12 +4323,6 @@ cc_library(
],
)

Expand All @@ -31,7 +31,7 @@ index 42198db354f84480b3a16ef8df2556bbfb65819a..053ad3d3c264e6e098fea9ac3b2a371a
v8_library(
name = "v8_libshared",
srcs = [
@@ -4361,9 +4353,9 @@ v8_library(
@@ -4359,9 +4351,9 @@ v8_library(
":noicu/generated_torque_definitions",
],
deps = [
Expand Down
189 changes: 189 additions & 0 deletions patches/v8/0025-add-processed_characters-option-to-WriteUtf8V2.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli <yagiz@nizipli.com>
Date: Tue, 18 Feb 2025 11:21:51 -0500
Subject: add processed_characters option to WriteUtf8V2

Bug: https://issues.chromium.org/issues/397377176

Change-Id: I22086a675eb5565bef254a94ac1b6827a1c61a51
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6276706
Reviewed-by: Erik Corry <erikcorry@chromium.org>
Auto-Submit: Yagiz Nizipli <yagiz@nizipli.com>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/main@{#98780}

diff --git a/include/v8-primitive.h b/include/v8-primitive.h
index 01773bcaff9b921e77ae70ef09d8a30c1637d533..b608535abae0ed2d5ee74a327203a4bffb9847fd 100644
--- a/include/v8-primitive.h
+++ b/include/v8-primitive.h
@@ -257,11 +257,14 @@ class V8_EXPORT String : public Name {
* \param buffer The buffer into which the string will be written.
* \param capacity The number of bytes available in the output buffer.
* \param flags Various flags that influence the behavior of this operation.
+ * \param processed_characters_return The number of processed characters from
+ * the buffer.
* \return The number of bytes copied to the buffer including the null
* terminator (if written).
*/
size_t WriteUtf8V2(Isolate* isolate, char* buffer, size_t capacity,
- int flags = WriteFlags::kNone) const;
+ int flags = WriteFlags::kNone,
+ size_t* processed_characters_return = nullptr) const;

/**
* A zero length string.
diff --git a/src/api/api.cc b/src/api/api.cc
index fccbd853f957617c79d97dbdd69fec7c39f65af5..43540a968a5b7e94c3e883d1bf8b5f51072bae05 100644
--- a/src/api/api.cc
+++ b/src/api/api.cc
@@ -6163,7 +6163,8 @@ void String::WriteOneByteV2(Isolate* v8_isolate, uint32_t offset,
}

size_t String::WriteUtf8V2(Isolate* v8_isolate, char* buffer, size_t capacity,
- int flags) const {
+ int flags,
+ size_t* processed_characters_return) const {
auto str = Utils::OpenDirectHandle(this);
i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
API_RCS_SCOPE(i_isolate, String, WriteUtf8);
@@ -6175,7 +6176,8 @@ size_t String::WriteUtf8V2(Isolate* v8_isolate, char* buffer, size_t capacity,
if (flags & String::WriteFlags::kReplaceInvalidUtf8) {
i_flags |= i::String::Utf8EncodingFlag::kReplaceInvalid;
}
- return i::String::WriteUtf8(i_isolate, str, buffer, capacity, i_flags);
+ return i::String::WriteUtf8(i_isolate, str, buffer, capacity, i_flags,
+ processed_characters_return);
}

namespace {
diff --git a/src/objects/string.cc b/src/objects/string.cc
index e6ad2e286bdb05273f0152c214f34f332d67854c..a6013dd168a31e336e79ce5019b36482b6096211 100644
--- a/src/objects/string.cc
+++ b/src/objects/string.cc
@@ -1111,8 +1111,8 @@ void String::WriteToFlat2(SinkCharT* dst, Tagged<ConsString> src,

// static
size_t String::WriteUtf8(Isolate* isolate, DirectHandle<String> string,
- char* buffer, size_t capacity,
- Utf8EncodingFlags flags) {
+ char* buffer, size_t capacity, Utf8EncodingFlags flags,
+ size_t* processed_characters_return) {
DCHECK_IMPLIES(flags & Utf8EncodingFlag::kNullTerminate, capacity > 0);
DCHECK_IMPLIES(capacity > 0, buffer != nullptr);

@@ -1121,19 +1121,22 @@ size_t String::WriteUtf8(Isolate* isolate, DirectHandle<String> string,
DisallowGarbageCollection no_gc;
FlatContent content = string->GetFlatContent(no_gc);
DCHECK(content.IsFlat());
- if (content.IsOneByte()) {
- return unibrow::Utf8::Encode<uint8_t>(
- content.ToOneByteVector(), buffer, capacity,
- flags & Utf8EncodingFlag::kNullTerminate,
- flags & Utf8EncodingFlag::kReplaceInvalid)
- .bytes_written;
- } else {
- return unibrow::Utf8::Encode<uint16_t>(
- content.ToUC16Vector(), buffer, capacity,
- flags & Utf8EncodingFlag::kNullTerminate,
- flags & Utf8EncodingFlag::kReplaceInvalid)
- .bytes_written;
+
+ auto encoding_result = content.IsOneByte()
+ ? unibrow::Utf8::Encode<uint8_t>(
+ content.ToOneByteVector(), buffer, capacity,
+ flags & Utf8EncodingFlag::kNullTerminate,
+ flags & Utf8EncodingFlag::kReplaceInvalid)
+ : unibrow::Utf8::Encode<uint16_t>(
+ content.ToUC16Vector(), buffer, capacity,
+ flags & Utf8EncodingFlag::kNullTerminate,
+ flags & Utf8EncodingFlag::kReplaceInvalid);
+
+ if (processed_characters_return != nullptr) {
+ *processed_characters_return = encoding_result.characters_processed;
}
+
+ return encoding_result.bytes_written;
}

template <typename SourceChar>
diff --git a/src/objects/string.h b/src/objects/string.h
index d456749e52cbbab17b95334bcc6fee18a597fe58..238310eb3d89a2e9206ff6f58c4fcfacd00bba33 100644
--- a/src/objects/string.h
+++ b/src/objects/string.h
@@ -553,7 +553,8 @@ V8_OBJECT class String : public Name {
using Utf8EncodingFlags = base::Flags<Utf8EncodingFlag>;
static size_t WriteUtf8(Isolate* isolate, DirectHandle<String> string,
char* buffer, size_t capacity,
- Utf8EncodingFlags flags);
+ Utf8EncodingFlags flags,
+ size_t* processed_characters_return = nullptr);

// Returns true if this string has no unpaired surrogates and false otherwise.
static inline bool IsWellFormedUnicode(Isolate* isolate,
diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc
index 3e7f2aa117f64c9ef00bcd9f3492d4e39e5945f0..434a6f56ce6389f9eec463386e19cb61c08e6206 100644
--- a/test/cctest/test-api.cc
+++ b/test/cctest/test-api.cc
@@ -8607,6 +8607,7 @@ THREADED_TEST(StringWrite) {
char utf8buf[0xD800 * 3];
uint16_t wbuf[100];
size_t len;
+ size_t processed_characters;

memset(utf8buf, 0x1, 1000);
len = v8::String::Empty(isolate)->WriteUtf8V2(
@@ -8621,8 +8622,10 @@ THREADED_TEST(StringWrite) {
CHECK_EQ(0, strcmp(utf8buf, "abc\xC3\xB0\xE2\x98\x83"));

memset(utf8buf, 0x1, 1000);
- len = str2->WriteUtf8V2(isolate, utf8buf, 8);
+ len = str2->WriteUtf8V2(isolate, utf8buf, 8, String::WriteFlags::kNone,
+ &processed_characters);
CHECK_EQ(8, len);
+ CHECK_EQ(5, processed_characters);
CHECK_EQ(0, strncmp(utf8buf, "abc\xC3\xB0\xE2\x98\x83\x01", 9));

memset(utf8buf, 0x1, 1000);
@@ -8828,8 +8831,10 @@ THREADED_TEST(StringWrite) {

memset(utf8buf, 0x1, sizeof(utf8buf));
utf8buf[5] = 'X';
- len = str->WriteUtf8V2(isolate, utf8buf, sizeof(utf8buf));
+ len = str->WriteUtf8V2(isolate, utf8buf, sizeof(utf8buf),
+ String::WriteFlags::kNone, &processed_characters);
CHECK_EQ(5, len);
+ CHECK_EQ(5, processed_characters);
CHECK_EQ('X', utf8buf[5]); // Test that the sixth character is untouched.
utf8buf[5] = '\0';
CHECK_EQ(0, strcmp(utf8buf, "abcde"));
@@ -8846,6 +8851,29 @@ THREADED_TEST(StringWrite) {
str->WriteV2(isolate, 0, 0, nullptr);
len = str->WriteUtf8V2(isolate, nullptr, 0);
CHECK_EQ(0, len);
+
+ std::tuple<const char*, size_t, size_t> cases[] = {
+ {"\xC3\xA9", 0, 0}, // é (2-byte) but buffer is 0
+ {"\xC3\xA9", 1, 0}, // é (2-byte) but buffer is 1
+ {"\xE2\x82\xAC", 0, 0}, // € (3-byte) but buffer is 0
+ {"\xE2\x82\xAC", 1, 0}, // € (3-byte) but buffer is 1
+ {"\xE2\x82\xAC", 2, 0}, // € (3-byte) but buffer is 2
+ {"\xF0\x9F\x98\x81", 0, 0}, // 😁 (4-byte) but buffer is 0
+ {"\xF0\x9F\x98\x81", 1, 0}, // 😁 (4-byte) but buffer is 1
+ {"\xF0\x9F\x98\x81", 2, 0}, // 😁 (4-byte) but buffer is 2
+ };
+
+ for (const auto& test_case : cases) {
+ auto test_str =
+ String::NewFromUtf8(isolate, std::get<0>(test_case)).ToLocalChecked();
+ auto test_buffer_capacity = std::get<1>(test_case);
+ char test_buffer[4];
+ len =
+ test_str->WriteUtf8V2(isolate, test_buffer, test_buffer_capacity,
+ String::WriteFlags::kNone, &processed_characters);
+ CHECK_EQ(std::get<2>(test_case), len);
+ CHECK_EQ(0, processed_characters);
+ }
}

static void Utf16Helper(LocalContext& context, const char* name,