Skip to content

Commit 75e4d0d

Browse files
Mert Can Altinjasnell
Mert Can Altin
authored andcommitted
node-api: add support for UTF-8 and Latin-1 property keys
PR-URL: #52984 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Chengzhong Wu <legendecas@gmail.com> Reviewed-By: Vladimir Morozov <vmorozov@microsoft.com>
1 parent b4088f5 commit 75e4d0d

File tree

5 files changed

+224
-129
lines changed

5 files changed

+224
-129
lines changed

doc/api/n-api.md

+77-11
Original file line numberDiff line numberDiff line change
@@ -3088,6 +3088,54 @@ The native string is copied.
30883088
The JavaScript `string` type is described in
30893089
[Section 6.1.4][] of the ECMAScript Language Specification.
30903090

3091+
### Functions to create optimized property keys
3092+
3093+
Many JavaScript engines including V8 use internalized strings as keys
3094+
to set and get property values. They typically use a hash table to create
3095+
and lookup such strings. While it adds some cost per key creation, it improves
3096+
the performance after that by enabling comparison of string pointers instead
3097+
of the whole strings.
3098+
3099+
If a new JavaScript string is intended to be used as a property key, then for
3100+
some JavaScript engines it will be more efficient to use the functions in this
3101+
section. Otherwise, use the `napi_create_string_utf8` or
3102+
`node_api_create_external_string_utf8` series functions as there may be
3103+
additional overhead in creating/storing strings with the property key
3104+
creation methods.
3105+
3106+
#### `node_api_create_property_key_latin1`
3107+
3108+
<!-- YAML
3109+
added: REPLACEME
3110+
-->
3111+
3112+
> Stability: 1 - Experimental
3113+
3114+
```c
3115+
napi_status NAPI_CDECL node_api_create_property_key_latin1(napi_env env,
3116+
const char* str,
3117+
size_t length,
3118+
napi_value* result);
3119+
```
3120+
3121+
* `[in] env`: The environment that the API is invoked under.
3122+
* `[in] str`: Character buffer representing an ISO-8859-1-encoded string.
3123+
* `[in] length`: The length of the string in bytes, or `NAPI_AUTO_LENGTH` if it
3124+
is null-terminated.
3125+
* `[out] result`: A `napi_value` representing an optimized JavaScript `string`
3126+
to be used as a property key for objects.
3127+
3128+
Returns `napi_ok` if the API succeeded.
3129+
3130+
This API creates an optimized JavaScript `string` value from
3131+
an ISO-8859-1-encoded C string to be used as a property key for objects.
3132+
The native string is copied. In contrast with `napi_create_string_latin1`,
3133+
subsequent calls to this function with the same `str` pointer may benefit from a speedup
3134+
in the creation of the requested `napi_value`, depending on the engine.
3135+
3136+
The JavaScript `string` type is described in
3137+
[Section 6.1.4][] of the ECMAScript Language Specification.
3138+
30913139
#### `node_api_create_property_key_utf16`
30923140

30933141
<!-- YAML
@@ -3118,18 +3166,36 @@ This API creates an optimized JavaScript `string` value from
31183166
a UTF16-LE-encoded C string to be used as a property key for objects.
31193167
The native string is copied.
31203168

3121-
Many JavaScript engines including V8 use internalized strings as keys
3122-
to set and get property values. They typically use a hash table to create
3123-
and lookup such strings. While it adds some cost per key creation, it improves
3124-
the performance after that by enabling comparison of string pointers instead
3125-
of the whole strings.
3169+
The JavaScript `string` type is described in
3170+
[Section 6.1.4][] of the ECMAScript Language Specification.
31263171

3127-
If a new JavaScript string is intended to be used as a property key, then for
3128-
some JavaScript engines it will be more efficient to use
3129-
the `node_api_create_property_key_utf16` function.
3130-
Otherwise, use the `napi_create_string_utf16` or
3131-
`node_api_create_external_string_utf16` functions as there may be additional
3132-
overhead in creating/storing strings with this method.
3172+
#### `node_api_create_property_key_utf8`
3173+
3174+
<!-- YAML
3175+
added: REPLACEME
3176+
-->
3177+
3178+
> Stability: 1 - Experimental
3179+
3180+
```c
3181+
napi_status NAPI_CDECL node_api_create_property_key_utf8(napi_env env,
3182+
const char* str,
3183+
size_t length,
3184+
napi_value* result);
3185+
```
3186+
3187+
* `[in] env`: The environment that the API is invoked under.
3188+
* `[in] str`: Character buffer representing a UTF8-encoded string.
3189+
* `[in] length`: The length of the string in two-byte code units, or
3190+
`NAPI_AUTO_LENGTH` if it is null-terminated.
3191+
* `[out] result`: A `napi_value` representing an optimized JavaScript `string`
3192+
to be used as a property key for objects.
3193+
3194+
Returns `napi_ok` if the API succeeded.
3195+
3196+
This API creates an optimized JavaScript `string` value from
3197+
a UTF8-encoded C string to be used as a property key for objects.
3198+
The native string is copied.
31333199

31343200
The JavaScript `string` type is described in
31353201
[Section 6.1.4][] of the ECMAScript Language Specification.

src/js_native_api.h

+4
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ node_api_create_external_string_utf16(napi_env env,
114114

115115
#ifdef NAPI_EXPERIMENTAL
116116
#define NODE_API_EXPERIMENTAL_HAS_PROPERTY_KEYS
117+
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_latin1(
118+
napi_env env, const char* str, size_t length, napi_value* result);
119+
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf8(
120+
napi_env env, const char* str, size_t length, napi_value* result);
117121
NAPI_EXTERN napi_status NAPI_CDECL node_api_create_property_key_utf16(
118122
napi_env env, const char16_t* str, size_t length, napi_value* result);
119123
#endif // NAPI_EXPERIMENTAL

src/js_native_api_v8.cc

+24
Original file line numberDiff line numberDiff line change
@@ -1704,6 +1704,30 @@ napi_status NAPI_CDECL node_api_create_external_string_utf16(
17041704
});
17051705
}
17061706

1707+
napi_status node_api_create_property_key_latin1(napi_env env,
1708+
const char* str,
1709+
size_t length,
1710+
napi_value* result) {
1711+
return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) {
1712+
return v8::String::NewFromOneByte(isolate,
1713+
reinterpret_cast<const uint8_t*>(str),
1714+
v8::NewStringType::kInternalized,
1715+
length);
1716+
});
1717+
}
1718+
1719+
napi_status node_api_create_property_key_utf8(napi_env env,
1720+
const char* str,
1721+
size_t length,
1722+
napi_value* result) {
1723+
return v8impl::NewString(env, str, length, result, [&](v8::Isolate* isolate) {
1724+
return v8::String::NewFromUtf8(isolate,
1725+
str,
1726+
v8::NewStringType::kInternalized,
1727+
static_cast<int>(length));
1728+
});
1729+
}
1730+
17071731
napi_status NAPI_CDECL node_api_create_property_key_utf16(napi_env env,
17081732
const char16_t* str,
17091733
size_t length,

test/js-native-api/test_string/test.js

+64-118
Original file line numberDiff line numberDiff line change
@@ -4,131 +4,77 @@ const assert = require('assert');
44

55
// Testing api calls for string
66
const test_string = require(`./build/${common.buildType}/test_string`);
7+
// The insufficient buffer test case allocates a buffer of size 4, including
8+
// the null terminator.
9+
const kInsufficientIdx = 3;
710

8-
const empty = '';
9-
assert.strictEqual(test_string.TestLatin1(empty), empty);
10-
assert.strictEqual(test_string.TestUtf8(empty), empty);
11-
assert.strictEqual(test_string.TestUtf16(empty), empty);
12-
assert.strictEqual(test_string.TestLatin1AutoLength(empty), empty);
13-
assert.strictEqual(test_string.TestUtf8AutoLength(empty), empty);
14-
assert.strictEqual(test_string.TestUtf16AutoLength(empty), empty);
15-
assert.strictEqual(test_string.TestLatin1External(empty), empty);
16-
assert.strictEqual(test_string.TestUtf16External(empty), empty);
17-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(empty), empty);
18-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(empty), empty);
19-
assert.strictEqual(test_string.TestPropertyKeyUtf16(empty), empty);
20-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(empty), empty);
21-
assert.strictEqual(test_string.Utf16Length(empty), 0);
22-
assert.strictEqual(test_string.Utf8Length(empty), 0);
11+
const asciiCases = [
12+
'',
13+
'hello world',
14+
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
15+
'?!@#$%^&*()_+-=[]{}/.,<>\'"\\',
16+
];
2317

24-
const str1 = 'hello world';
25-
assert.strictEqual(test_string.TestLatin1(str1), str1);
26-
assert.strictEqual(test_string.TestUtf8(str1), str1);
27-
assert.strictEqual(test_string.TestUtf16(str1), str1);
28-
assert.strictEqual(test_string.TestLatin1AutoLength(str1), str1);
29-
assert.strictEqual(test_string.TestUtf8AutoLength(str1), str1);
30-
assert.strictEqual(test_string.TestUtf16AutoLength(str1), str1);
31-
assert.strictEqual(test_string.TestLatin1External(str1), str1);
32-
assert.strictEqual(test_string.TestUtf16External(str1), str1);
33-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str1), str1);
34-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str1), str1);
35-
assert.strictEqual(test_string.TestLatin1Insufficient(str1), str1.slice(0, 3));
36-
assert.strictEqual(test_string.TestUtf8Insufficient(str1), str1.slice(0, 3));
37-
assert.strictEqual(test_string.TestUtf16Insufficient(str1), str1.slice(0, 3));
38-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str1), str1);
39-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str1), str1);
40-
assert.strictEqual(test_string.Utf16Length(str1), 11);
41-
assert.strictEqual(test_string.Utf8Length(str1), 11);
18+
const latin1Cases = [
19+
{
20+
str: '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿',
21+
utf8Length: 62,
22+
utf8InsufficientIdx: 1,
23+
},
24+
{
25+
str: 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ',
26+
utf8Length: 126,
27+
utf8InsufficientIdx: 1,
28+
},
29+
];
4230

43-
const str2 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
44-
assert.strictEqual(test_string.TestLatin1(str2), str2);
45-
assert.strictEqual(test_string.TestUtf8(str2), str2);
46-
assert.strictEqual(test_string.TestUtf16(str2), str2);
47-
assert.strictEqual(test_string.TestLatin1AutoLength(str2), str2);
48-
assert.strictEqual(test_string.TestUtf8AutoLength(str2), str2);
49-
assert.strictEqual(test_string.TestUtf16AutoLength(str2), str2);
50-
assert.strictEqual(test_string.TestLatin1External(str2), str2);
51-
assert.strictEqual(test_string.TestUtf16External(str2), str2);
52-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str2), str2);
53-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str2), str2);
54-
assert.strictEqual(test_string.TestLatin1Insufficient(str2), str2.slice(0, 3));
55-
assert.strictEqual(test_string.TestUtf8Insufficient(str2), str2.slice(0, 3));
56-
assert.strictEqual(test_string.TestUtf16Insufficient(str2), str2.slice(0, 3));
57-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str2), str2);
58-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str2), str2);
59-
assert.strictEqual(test_string.Utf16Length(str2), 62);
60-
assert.strictEqual(test_string.Utf8Length(str2), 62);
31+
const unicodeCases = [
32+
{
33+
str: '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}',
34+
utf8Length: 14,
35+
utf8InsufficientIdx: 1,
36+
},
37+
];
6138

62-
const str3 = '?!@#$%^&*()_+-=[]{}/.,<>\'"\\';
63-
assert.strictEqual(test_string.TestLatin1(str3), str3);
64-
assert.strictEqual(test_string.TestUtf8(str3), str3);
65-
assert.strictEqual(test_string.TestUtf16(str3), str3);
66-
assert.strictEqual(test_string.TestLatin1AutoLength(str3), str3);
67-
assert.strictEqual(test_string.TestUtf8AutoLength(str3), str3);
68-
assert.strictEqual(test_string.TestUtf16AutoLength(str3), str3);
69-
assert.strictEqual(test_string.TestLatin1External(str3), str3);
70-
assert.strictEqual(test_string.TestUtf16External(str3), str3);
71-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str3), str3);
72-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str3), str3);
73-
assert.strictEqual(test_string.TestLatin1Insufficient(str3), str3.slice(0, 3));
74-
assert.strictEqual(test_string.TestUtf8Insufficient(str3), str3.slice(0, 3));
75-
assert.strictEqual(test_string.TestUtf16Insufficient(str3), str3.slice(0, 3));
76-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str3), str3);
77-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str3), str3);
78-
assert.strictEqual(test_string.Utf16Length(str3), 27);
79-
assert.strictEqual(test_string.Utf8Length(str3), 27);
39+
function testLatin1Cases(str) {
40+
assert.strictEqual(test_string.TestLatin1(str), str);
41+
assert.strictEqual(test_string.TestLatin1AutoLength(str), str);
42+
assert.strictEqual(test_string.TestLatin1External(str), str);
43+
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str), str);
44+
assert.strictEqual(test_string.TestPropertyKeyLatin1(str), str);
45+
assert.strictEqual(test_string.TestPropertyKeyLatin1AutoLength(str), str);
46+
assert.strictEqual(test_string.Latin1Length(str), str.length);
8047

81-
const str4 = '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿';
82-
assert.strictEqual(test_string.TestLatin1(str4), str4);
83-
assert.strictEqual(test_string.TestUtf8(str4), str4);
84-
assert.strictEqual(test_string.TestUtf16(str4), str4);
85-
assert.strictEqual(test_string.TestLatin1AutoLength(str4), str4);
86-
assert.strictEqual(test_string.TestUtf8AutoLength(str4), str4);
87-
assert.strictEqual(test_string.TestUtf16AutoLength(str4), str4);
88-
assert.strictEqual(test_string.TestLatin1External(str4), str4);
89-
assert.strictEqual(test_string.TestUtf16External(str4), str4);
90-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str4), str4);
91-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str4), str4);
92-
assert.strictEqual(test_string.TestLatin1Insufficient(str4), str4.slice(0, 3));
93-
assert.strictEqual(test_string.TestUtf8Insufficient(str4), str4.slice(0, 1));
94-
assert.strictEqual(test_string.TestUtf16Insufficient(str4), str4.slice(0, 3));
95-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str4), str4);
96-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str4), str4);
97-
assert.strictEqual(test_string.Utf16Length(str4), 31);
98-
assert.strictEqual(test_string.Utf8Length(str4), 62);
48+
if (str !== '') {
49+
assert.strictEqual(test_string.TestLatin1Insufficient(str), str.slice(0, kInsufficientIdx));
50+
}
51+
}
9952

100-
const str5 = 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ';
101-
assert.strictEqual(test_string.TestLatin1(str5), str5);
102-
assert.strictEqual(test_string.TestUtf8(str5), str5);
103-
assert.strictEqual(test_string.TestUtf16(str5), str5);
104-
assert.strictEqual(test_string.TestLatin1AutoLength(str5), str5);
105-
assert.strictEqual(test_string.TestUtf8AutoLength(str5), str5);
106-
assert.strictEqual(test_string.TestUtf16AutoLength(str5), str5);
107-
assert.strictEqual(test_string.TestLatin1External(str5), str5);
108-
assert.strictEqual(test_string.TestUtf16External(str5), str5);
109-
assert.strictEqual(test_string.TestLatin1ExternalAutoLength(str5), str5);
110-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str5), str5);
111-
assert.strictEqual(test_string.TestLatin1Insufficient(str5), str5.slice(0, 3));
112-
assert.strictEqual(test_string.TestUtf8Insufficient(str5), str5.slice(0, 1));
113-
assert.strictEqual(test_string.TestUtf16Insufficient(str5), str5.slice(0, 3));
114-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str5), str5);
115-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str5), str5);
116-
assert.strictEqual(test_string.Utf16Length(str5), 63);
117-
assert.strictEqual(test_string.Utf8Length(str5), 126);
53+
function testUnicodeCases(str, utf8Length, utf8InsufficientIdx) {
54+
assert.strictEqual(test_string.TestUtf8(str), str);
55+
assert.strictEqual(test_string.TestUtf16(str), str);
56+
assert.strictEqual(test_string.TestUtf8AutoLength(str), str);
57+
assert.strictEqual(test_string.TestUtf16AutoLength(str), str);
58+
assert.strictEqual(test_string.TestUtf16External(str), str);
59+
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str), str);
60+
assert.strictEqual(test_string.TestPropertyKeyUtf8(str), str);
61+
assert.strictEqual(test_string.TestPropertyKeyUtf8AutoLength(str), str);
62+
assert.strictEqual(test_string.TestPropertyKeyUtf16(str), str);
63+
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str), str);
64+
assert.strictEqual(test_string.Utf8Length(str), utf8Length);
65+
assert.strictEqual(test_string.Utf16Length(str), str.length);
11866

119-
const str6 = '\u{2003}\u{2101}\u{2001}\u{202}\u{2011}';
120-
assert.strictEqual(test_string.TestUtf8(str6), str6);
121-
assert.strictEqual(test_string.TestUtf16(str6), str6);
122-
assert.strictEqual(test_string.TestUtf8AutoLength(str6), str6);
123-
assert.strictEqual(test_string.TestUtf16AutoLength(str6), str6);
124-
assert.strictEqual(test_string.TestUtf16External(str6), str6);
125-
assert.strictEqual(test_string.TestUtf16ExternalAutoLength(str6), str6);
126-
assert.strictEqual(test_string.TestUtf8Insufficient(str6), str6.slice(0, 1));
127-
assert.strictEqual(test_string.TestUtf16Insufficient(str6), str6.slice(0, 3));
128-
assert.strictEqual(test_string.TestPropertyKeyUtf16(str6), str6);
129-
assert.strictEqual(test_string.TestPropertyKeyUtf16AutoLength(str6), str6);
130-
assert.strictEqual(test_string.Utf16Length(str6), 5);
131-
assert.strictEqual(test_string.Utf8Length(str6), 14);
67+
if (str !== '') {
68+
assert.strictEqual(test_string.TestUtf8Insufficient(str), str.slice(0, utf8InsufficientIdx));
69+
assert.strictEqual(test_string.TestUtf16Insufficient(str), str.slice(0, kInsufficientIdx));
70+
}
71+
}
72+
73+
asciiCases.forEach(testLatin1Cases);
74+
asciiCases.forEach((str) => testUnicodeCases(str, str.length, kInsufficientIdx));
75+
latin1Cases.forEach((it) => testLatin1Cases(it.str));
76+
latin1Cases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx));
77+
unicodeCases.forEach((it) => testUnicodeCases(it.str, it.utf8Length, it.utf8InsufficientIdx));
13278

13379
assert.throws(() => {
13480
test_string.TestLargeUtf8();

0 commit comments

Comments
 (0)