Skip to content

Commit 52d3e9d

Browse files
authored
Merge pull request #3697 from cloudflare/ketan/backport-writeutf-v8
Backport WriteUtf8V2 patch from v8 13.5
2 parents c03a25b + 9a6196c commit 52d3e9d

9 files changed

+206
-16
lines changed

build/deps/v8.bzl

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ PATCHES = [
3030
"0022-Reset-code_range_-before-pointer-compression-cage.patch",
3131
"0023-Move-tear-down-in-IsolateGroup-Release-into-destruct.patch",
3232
"0024-Modify-where-to-look-for-fast_float-and-simdutf.patch",
33+
"0025-add-processed_characters-option-to-WriteUtf8V2.patch",
3334
]
3435

3536
# V8 and its dependencies

docs/v8-updates.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ To update the version of V8 used by workerd, the steps are:
5656
the command would be:
5757

5858
```sh
59-
git format-patch --full-index -k --no-signature --no-stat HEAD~19
59+
git format-patch --full-index -k --no-signature --no-stat --zero-commit HEAD~19
6060
```
6161

6262
8. Remove the existing patches from `workerd/patches/v8` and copy over the latest generated patches

patches/v8/0003-Allow-Windows-builds-under-Bazel.patch

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Subject: Allow Windows builds under Bazel
55

66

77
diff --git a/BUILD.bazel b/BUILD.bazel
8-
index 967f7bbfb2810366de89b7a7a90f284efab6c9c6..bfe3365942400fc77f346f2138ac31014f1020aa 100644
8+
index 967f7bbfb2810366de89b7a7a90f284efab6c9c6..702648004a7c68721c80e7b43cdcd355ceb16710 100644
99
--- a/BUILD.bazel
1010
+++ b/BUILD.bazel
1111
@@ -3829,6 +3829,8 @@ filegroup(

patches/v8/0005-Speed-up-V8-bazel-build-by-always-using-target-cfg.patch

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ speedup for the build: Components like ICU were previously compiled in
88
both target and exec configurations as generator tools depend on them.
99

1010
diff --git a/BUILD.bazel b/BUILD.bazel
11-
index bfe3365942400fc77f346f2138ac31014f1020aa..96a35feb537ddc9edf864cadba402dfcc40f4ed6 100644
11+
index 702648004a7c68721c80e7b43cdcd355ceb16710..e65bd03bec247979b2825584f5b59e5ab0c40283 100644
1212
--- a/BUILD.bazel
1313
+++ b/BUILD.bazel
1414
@@ -17,6 +17,7 @@ load(
@@ -19,7 +19,7 @@ index bfe3365942400fc77f346f2138ac31014f1020aa..96a35feb537ddc9edf864cadba402dfc
1919
)
2020
load(":bazel/v8-non-pointer-compression.bzl", "v8_binary_non_pointer_compression")
2121

22-
@@ -4219,22 +4220,20 @@ filegroup(
22+
@@ -4217,22 +4218,20 @@ filegroup(
2323
],
2424
)
2525

@@ -48,7 +48,7 @@ index bfe3365942400fc77f346f2138ac31014f1020aa..96a35feb537ddc9edf864cadba402dfc
4848
)
4949

5050
v8_mksnapshot(
51-
@@ -4443,7 +4442,6 @@ v8_binary(
51+
@@ -4441,7 +4440,6 @@ v8_binary(
5252
srcs = [
5353
"src/regexp/gen-regexp-special-case.cc",
5454
"src/regexp/special-case.h",

patches/v8/0010-Modify-where-to-look-for-fp16-dependency.-This-depen.patch

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ Subject: Modify where to look for fp16 dependency. This dependency is normally
77

88

99
diff --git a/BUILD.bazel b/BUILD.bazel
10-
index 96a35feb537ddc9edf864cadba402dfcc40f4ed6..bf957ac52fc0d99ec4c18b325bef455e1c92c4c3 100644
10+
index e65bd03bec247979b2825584f5b59e5ab0c40283..bfc3ab07e3efddc81965bb8dfbe7ce1f50aa72b2 100644
1111
--- a/BUILD.bazel
1212
+++ b/BUILD.bazel
13-
@@ -3843,16 +3843,22 @@ filegroup(
13+
@@ -3841,16 +3841,22 @@ filegroup(
1414
}),
1515
)
1616

patches/v8/0019-Enable-V8-shared-linkage.patch

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Subject: Enable V8 shared linkage
55

66

77
diff --git a/BUILD.bazel b/BUILD.bazel
8-
index bf957ac52fc0d99ec4c18b325bef455e1c92c4c3..0b95fa690001e06ef39227c66dbd0866d33b2926 100644
8+
index bfc3ab07e3efddc81965bb8dfbe7ce1f50aa72b2..aa72436fed2ce640b919caefac2ccad700f1c400 100644
99
--- a/BUILD.bazel
1010
+++ b/BUILD.bazel
1111
@@ -1372,6 +1372,7 @@ filegroup(
@@ -41,15 +41,15 @@ index bf957ac52fc0d99ec4c18b325bef455e1c92c4c3..0b95fa690001e06ef39227c66dbd0866
4141
"src/builtins/setup-builtins-internal.cc",
4242
"src/builtins/torque-csa-header-includes.h",
4343
"src/codegen/turboshaft-builtins-assembler-inl.h",
44-
@@ -3909,6 +3906,7 @@ filegroup(
44+
@@ -3907,6 +3904,7 @@ filegroup(
4545
"src/snapshot/snapshot-empty.cc",
4646
"src/snapshot/static-roots-gen.cc",
4747
"src/snapshot/static-roots-gen.h",
4848
+ "src/execution/isolate.cc",
4949
],
5050
)
5151

52-
@@ -4019,6 +4017,10 @@ filegroup(
52+
@@ -4017,6 +4015,10 @@ filegroup(
5353
name = "noicu/snapshot_files",
5454
srcs = [
5555
"src/init/setup-isolate-deserialize.cc",
@@ -60,7 +60,7 @@ index bf957ac52fc0d99ec4c18b325bef455e1c92c4c3..0b95fa690001e06ef39227c66dbd0866
6060
] + select({
6161
"@v8//bazel/config:v8_target_arm": [
6262
"google3/snapshots/arm/noicu/embedded.S",
63-
@@ -4036,6 +4038,7 @@ filegroup(
63+
@@ -4034,6 +4036,7 @@ filegroup(
6464
name = "icu/snapshot_files",
6565
srcs = [
6666
"src/init/setup-isolate-deserialize.cc",

patches/v8/0020-Fix-macOS-build.patch

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ and hwy bindings as a whole are no longer needed; this patch can be dropped.
1313
Change-Id: I7c52113596247f8f254ac5f882b41da1ba32e3b1
1414

1515
diff --git a/BUILD.bazel b/BUILD.bazel
16-
index 0b95fa690001e06ef39227c66dbd0866d33b2926..42198db354f84480b3a16ef8df2556bbfb65819a 100644
16+
index aa72436fed2ce640b919caefac2ccad700f1c400..f19f85b5b94a36b0b00c37b075e6cc7291ee0b34 100644
1717
--- a/BUILD.bazel
1818
+++ b/BUILD.bazel
19-
@@ -4302,6 +4302,10 @@ cc_library(
19+
@@ -4300,6 +4300,10 @@ cc_library(
2020
"src/torque/kythe-data.h",
2121
"src/torque/torque-compiler.h",
2222
],

patches/v8/0024-Modify-where-to-look-for-fast_float-and-simdutf.patch

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Subject: Modify where to look for fast_float and simdutf.
66
Similar to fp16, these dependencies now needs to be downloaded by bazel.
77

88
diff --git a/BUILD.bazel b/BUILD.bazel
9-
index 42198db354f84480b3a16ef8df2556bbfb65819a..053ad3d3c264e6e098fea9ac3b2a371aeac3b765 100644
9+
index f19f85b5b94a36b0b00c37b075e6cc7291ee0b34..19d7d942c9dc49b48425285d92d6c93e27371ab9 100644
1010
--- a/BUILD.bazel
1111
+++ b/BUILD.bazel
1212
@@ -2542,8 +2542,6 @@ filegroup(
@@ -18,7 +18,7 @@ index 42198db354f84480b3a16ef8df2556bbfb65819a..053ad3d3c264e6e098fea9ac3b2a371a
1818
"third_party/siphash/halfsiphash.cc",
1919
"third_party/siphash/halfsiphash.h",
2020
"third_party/utf8-decoder/utf8-decoder.h",
21-
@@ -4327,12 +4325,6 @@ cc_library(
21+
@@ -4325,12 +4323,6 @@ cc_library(
2222
],
2323
)
2424

@@ -31,7 +31,7 @@ index 42198db354f84480b3a16ef8df2556bbfb65819a..053ad3d3c264e6e098fea9ac3b2a371a
3131
v8_library(
3232
name = "v8_libshared",
3333
srcs = [
34-
@@ -4361,9 +4353,9 @@ v8_library(
34+
@@ -4359,9 +4351,9 @@ v8_library(
3535
":noicu/generated_torque_definitions",
3636
],
3737
deps = [
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2+
From: Yagiz Nizipli <yagiz@nizipli.com>
3+
Date: Tue, 18 Feb 2025 11:21:51 -0500
4+
Subject: add processed_characters option to WriteUtf8V2
5+
6+
Bug: https://issues.chromium.org/issues/397377176
7+
8+
Change-Id: I22086a675eb5565bef254a94ac1b6827a1c61a51
9+
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6276706
10+
Reviewed-by: Erik Corry <erikcorry@chromium.org>
11+
Auto-Submit: Yagiz Nizipli <yagiz@nizipli.com>
12+
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
13+
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
14+
Cr-Commit-Position: refs/heads/main@{#98780}
15+
16+
diff --git a/include/v8-primitive.h b/include/v8-primitive.h
17+
index 01773bcaff9b921e77ae70ef09d8a30c1637d533..b608535abae0ed2d5ee74a327203a4bffb9847fd 100644
18+
--- a/include/v8-primitive.h
19+
+++ b/include/v8-primitive.h
20+
@@ -257,11 +257,14 @@ class V8_EXPORT String : public Name {
21+
* \param buffer The buffer into which the string will be written.
22+
* \param capacity The number of bytes available in the output buffer.
23+
* \param flags Various flags that influence the behavior of this operation.
24+
+ * \param processed_characters_return The number of processed characters from
25+
+ * the buffer.
26+
* \return The number of bytes copied to the buffer including the null
27+
* terminator (if written).
28+
*/
29+
size_t WriteUtf8V2(Isolate* isolate, char* buffer, size_t capacity,
30+
- int flags = WriteFlags::kNone) const;
31+
+ int flags = WriteFlags::kNone,
32+
+ size_t* processed_characters_return = nullptr) const;
33+
34+
/**
35+
* A zero length string.
36+
diff --git a/src/api/api.cc b/src/api/api.cc
37+
index fccbd853f957617c79d97dbdd69fec7c39f65af5..43540a968a5b7e94c3e883d1bf8b5f51072bae05 100644
38+
--- a/src/api/api.cc
39+
+++ b/src/api/api.cc
40+
@@ -6163,7 +6163,8 @@ void String::WriteOneByteV2(Isolate* v8_isolate, uint32_t offset,
41+
}
42+
43+
size_t String::WriteUtf8V2(Isolate* v8_isolate, char* buffer, size_t capacity,
44+
- int flags) const {
45+
+ int flags,
46+
+ size_t* processed_characters_return) const {
47+
auto str = Utils::OpenDirectHandle(this);
48+
i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
49+
API_RCS_SCOPE(i_isolate, String, WriteUtf8);
50+
@@ -6175,7 +6176,8 @@ size_t String::WriteUtf8V2(Isolate* v8_isolate, char* buffer, size_t capacity,
51+
if (flags & String::WriteFlags::kReplaceInvalidUtf8) {
52+
i_flags |= i::String::Utf8EncodingFlag::kReplaceInvalid;
53+
}
54+
- return i::String::WriteUtf8(i_isolate, str, buffer, capacity, i_flags);
55+
+ return i::String::WriteUtf8(i_isolate, str, buffer, capacity, i_flags,
56+
+ processed_characters_return);
57+
}
58+
59+
namespace {
60+
diff --git a/src/objects/string.cc b/src/objects/string.cc
61+
index e6ad2e286bdb05273f0152c214f34f332d67854c..a6013dd168a31e336e79ce5019b36482b6096211 100644
62+
--- a/src/objects/string.cc
63+
+++ b/src/objects/string.cc
64+
@@ -1111,8 +1111,8 @@ void String::WriteToFlat2(SinkCharT* dst, Tagged<ConsString> src,
65+
66+
// static
67+
size_t String::WriteUtf8(Isolate* isolate, DirectHandle<String> string,
68+
- char* buffer, size_t capacity,
69+
- Utf8EncodingFlags flags) {
70+
+ char* buffer, size_t capacity, Utf8EncodingFlags flags,
71+
+ size_t* processed_characters_return) {
72+
DCHECK_IMPLIES(flags & Utf8EncodingFlag::kNullTerminate, capacity > 0);
73+
DCHECK_IMPLIES(capacity > 0, buffer != nullptr);
74+
75+
@@ -1121,19 +1121,22 @@ size_t String::WriteUtf8(Isolate* isolate, DirectHandle<String> string,
76+
DisallowGarbageCollection no_gc;
77+
FlatContent content = string->GetFlatContent(no_gc);
78+
DCHECK(content.IsFlat());
79+
- if (content.IsOneByte()) {
80+
- return unibrow::Utf8::Encode<uint8_t>(
81+
- content.ToOneByteVector(), buffer, capacity,
82+
- flags & Utf8EncodingFlag::kNullTerminate,
83+
- flags & Utf8EncodingFlag::kReplaceInvalid)
84+
- .bytes_written;
85+
- } else {
86+
- return unibrow::Utf8::Encode<uint16_t>(
87+
- content.ToUC16Vector(), buffer, capacity,
88+
- flags & Utf8EncodingFlag::kNullTerminate,
89+
- flags & Utf8EncodingFlag::kReplaceInvalid)
90+
- .bytes_written;
91+
+
92+
+ auto encoding_result = content.IsOneByte()
93+
+ ? unibrow::Utf8::Encode<uint8_t>(
94+
+ content.ToOneByteVector(), buffer, capacity,
95+
+ flags & Utf8EncodingFlag::kNullTerminate,
96+
+ flags & Utf8EncodingFlag::kReplaceInvalid)
97+
+ : unibrow::Utf8::Encode<uint16_t>(
98+
+ content.ToUC16Vector(), buffer, capacity,
99+
+ flags & Utf8EncodingFlag::kNullTerminate,
100+
+ flags & Utf8EncodingFlag::kReplaceInvalid);
101+
+
102+
+ if (processed_characters_return != nullptr) {
103+
+ *processed_characters_return = encoding_result.characters_processed;
104+
}
105+
+
106+
+ return encoding_result.bytes_written;
107+
}
108+
109+
template <typename SourceChar>
110+
diff --git a/src/objects/string.h b/src/objects/string.h
111+
index d456749e52cbbab17b95334bcc6fee18a597fe58..238310eb3d89a2e9206ff6f58c4fcfacd00bba33 100644
112+
--- a/src/objects/string.h
113+
+++ b/src/objects/string.h
114+
@@ -553,7 +553,8 @@ V8_OBJECT class String : public Name {
115+
using Utf8EncodingFlags = base::Flags<Utf8EncodingFlag>;
116+
static size_t WriteUtf8(Isolate* isolate, DirectHandle<String> string,
117+
char* buffer, size_t capacity,
118+
- Utf8EncodingFlags flags);
119+
+ Utf8EncodingFlags flags,
120+
+ size_t* processed_characters_return = nullptr);
121+
122+
// Returns true if this string has no unpaired surrogates and false otherwise.
123+
static inline bool IsWellFormedUnicode(Isolate* isolate,
124+
diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc
125+
index 3e7f2aa117f64c9ef00bcd9f3492d4e39e5945f0..434a6f56ce6389f9eec463386e19cb61c08e6206 100644
126+
--- a/test/cctest/test-api.cc
127+
+++ b/test/cctest/test-api.cc
128+
@@ -8607,6 +8607,7 @@ THREADED_TEST(StringWrite) {
129+
char utf8buf[0xD800 * 3];
130+
uint16_t wbuf[100];
131+
size_t len;
132+
+ size_t processed_characters;
133+
134+
memset(utf8buf, 0x1, 1000);
135+
len = v8::String::Empty(isolate)->WriteUtf8V2(
136+
@@ -8621,8 +8622,10 @@ THREADED_TEST(StringWrite) {
137+
CHECK_EQ(0, strcmp(utf8buf, "abc\xC3\xB0\xE2\x98\x83"));
138+
139+
memset(utf8buf, 0x1, 1000);
140+
- len = str2->WriteUtf8V2(isolate, utf8buf, 8);
141+
+ len = str2->WriteUtf8V2(isolate, utf8buf, 8, String::WriteFlags::kNone,
142+
+ &processed_characters);
143+
CHECK_EQ(8, len);
144+
+ CHECK_EQ(5, processed_characters);
145+
CHECK_EQ(0, strncmp(utf8buf, "abc\xC3\xB0\xE2\x98\x83\x01", 9));
146+
147+
memset(utf8buf, 0x1, 1000);
148+
@@ -8828,8 +8831,10 @@ THREADED_TEST(StringWrite) {
149+
150+
memset(utf8buf, 0x1, sizeof(utf8buf));
151+
utf8buf[5] = 'X';
152+
- len = str->WriteUtf8V2(isolate, utf8buf, sizeof(utf8buf));
153+
+ len = str->WriteUtf8V2(isolate, utf8buf, sizeof(utf8buf),
154+
+ String::WriteFlags::kNone, &processed_characters);
155+
CHECK_EQ(5, len);
156+
+ CHECK_EQ(5, processed_characters);
157+
CHECK_EQ('X', utf8buf[5]); // Test that the sixth character is untouched.
158+
utf8buf[5] = '\0';
159+
CHECK_EQ(0, strcmp(utf8buf, "abcde"));
160+
@@ -8846,6 +8851,29 @@ THREADED_TEST(StringWrite) {
161+
str->WriteV2(isolate, 0, 0, nullptr);
162+
len = str->WriteUtf8V2(isolate, nullptr, 0);
163+
CHECK_EQ(0, len);
164+
+
165+
+ std::tuple<const char*, size_t, size_t> cases[] = {
166+
+ {"\xC3\xA9", 0, 0}, // é (2-byte) but buffer is 0
167+
+ {"\xC3\xA9", 1, 0}, // é (2-byte) but buffer is 1
168+
+ {"\xE2\x82\xAC", 0, 0}, // € (3-byte) but buffer is 0
169+
+ {"\xE2\x82\xAC", 1, 0}, // € (3-byte) but buffer is 1
170+
+ {"\xE2\x82\xAC", 2, 0}, // € (3-byte) but buffer is 2
171+
+ {"\xF0\x9F\x98\x81", 0, 0}, // 😁 (4-byte) but buffer is 0
172+
+ {"\xF0\x9F\x98\x81", 1, 0}, // 😁 (4-byte) but buffer is 1
173+
+ {"\xF0\x9F\x98\x81", 2, 0}, // 😁 (4-byte) but buffer is 2
174+
+ };
175+
+
176+
+ for (const auto& test_case : cases) {
177+
+ auto test_str =
178+
+ String::NewFromUtf8(isolate, std::get<0>(test_case)).ToLocalChecked();
179+
+ auto test_buffer_capacity = std::get<1>(test_case);
180+
+ char test_buffer[4];
181+
+ len =
182+
+ test_str->WriteUtf8V2(isolate, test_buffer, test_buffer_capacity,
183+
+ String::WriteFlags::kNone, &processed_characters);
184+
+ CHECK_EQ(std::get<2>(test_case), len);
185+
+ CHECK_EQ(0, processed_characters);
186+
+ }
187+
}
188+
189+
static void Utf16Helper(LocalContext& context, const char* name,

0 commit comments

Comments
 (0)