Skip to content

Commit 8df4bbf

Browse files
committed
src: add node:encoding module
1 parent b2d4c4e commit 8df4bbf

File tree

10 files changed

+229
-0
lines changed

10 files changed

+229
-0
lines changed

doc/api/encoding.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Encoding
2+
3+
<!-- introduced_in=REPLACEME -->
4+
5+
> Stability: 1 - Experimental
6+
7+
<!-- source_link=lib/encoding.js -->
8+
9+
The `node:encoding` module provides unicode validation and transcoding.
10+
To access it:
11+
12+
```mjs
13+
import encoding from 'node:encoding';
14+
```
15+
16+
```cjs
17+
const encoding = require('node:encoding');
18+
```
19+
20+
This module is only available under the `node:` scheme. The following will not
21+
work:
22+
23+
```mjs
24+
import encoding from 'encoding';
25+
```
26+
27+
```cjs
28+
const encoding = require('encoding');
29+
```
30+
31+
## `isAscii(input)`
32+
33+
<!-- YAML
34+
added: REPLACEME
35+
-->
36+
37+
* input {Buffer | Uint8Array | string} The ASCII input to validate.
38+
* Returns: {boolean} Returns true if and only if the input is valid ASCII.
39+
40+
This function is used to check if input contains ASCII code points (characters).
41+
42+
## `isUtf8(input)`
43+
44+
<!-- YAML
45+
added: REPLACEME
46+
-->
47+
48+
* input {Buffer | Uint8Array} The UTF8 input to validate.
49+
* Returns: {boolean} Returns true if and only if the input is valid UTF8.
50+
51+
This function is used to check if input contains UTF8 code points (characters).
52+
53+
## `countUtf8(input)`
54+
55+
<!-- YAML
56+
added: REPLACEME
57+
-->
58+
59+
* input {Buffer | Uint8Array}
60+
* Returns: {number}
61+
62+
This function is used to count the number of code points (characters) in the
63+
input assuming that it is a valid UTF8 input.

doc/api/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
* [DNS](dns.md)
3030
* [Domain](domain.md)
3131
* [Errors](errors.md)
32+
* [Encoding](encoding.md)
3233
* [Events](events.md)
3334
* [File system](fs.md)
3435
* [Globals](globals.md)

lib/encoding.js

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
'use strict';
2+
3+
const {
4+
isAscii: _isAscii,
5+
isUtf8: _isUtf8,
6+
countUtf8: _countUtf8,
7+
} = internalBinding('encoding_methods');
8+
9+
const {
10+
isUint8Array,
11+
} = require('internal/util/types');
12+
13+
const {
14+
emitExperimentalWarning,
15+
} = require('internal/util');
16+
17+
const { TextEncoder } = require('util');
18+
const { Buffer } = require('buffer');
19+
20+
const encoder = new TextEncoder();
21+
22+
emitExperimentalWarning('Encoding');
23+
24+
function isAscii(input) {
25+
if (Buffer.isBuffer(input) || isUint8Array(input)) {
26+
return _isAscii(input.buffer);
27+
}
28+
29+
if (typeof input === 'string') {
30+
const { buffer } = encoder.encode(input);
31+
return _isAscii(buffer);
32+
}
33+
34+
return false;
35+
}
36+
37+
function isUtf8(input) {
38+
if (Buffer.isBuffer(input) || isUint8Array(input)) {
39+
return _isUtf8(input.buffer);
40+
}
41+
42+
return false;
43+
}
44+
45+
function countUtf8(input) {
46+
if (Buffer.isBuffer(input) || isUint8Array(input)) {
47+
return _countUtf8(input.buffer);
48+
}
49+
50+
return 0;
51+
}
52+
53+
module.exports = {
54+
isAscii,
55+
isUtf8,
56+
countUtf8,
57+
};

lib/internal/bootstrap/loaders.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ const internalBindingAllowlist = new SafeSet([
8585
'constants',
8686
'contextify',
8787
'crypto',
88+
'encoding_methods',
8889
'fs',
8990
'fs_event_wrap',
9091
'http_parser',
@@ -124,6 +125,7 @@ const legacyWrapperList = new SafeSet([
124125

125126
// Modules that can only be imported via the node: scheme.
126127
const schemelessBlockList = new SafeSet([
128+
'encoding',
127129
'test',
128130
]);
129131

node.gyp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@
501501
'src/node_dir.cc',
502502
'src/node_env_var.cc',
503503
'src/node_errors.cc',
504+
'src/node_encoding.cc',
504505
'src/node_external_reference.cc',
505506
'src/node_file.cc',
506507
'src/node_http_parser.cc',

src/node_binding.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
V(contextify) \
4444
V(credentials) \
4545
V(errors) \
46+
V(encoding_methods) \
4647
V(fs) \
4748
V(fs_dir) \
4849
V(fs_event_wrap) \

src/node_encoding.cc

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#include "env-inl.h"
2+
#include "node.h"
3+
#include "node_errors.h"
4+
#include "node_external_reference.h"
5+
#include "util-inl.h"
6+
7+
#include "simdutf.h"
8+
9+
namespace node {
10+
11+
using v8::ArrayBuffer;
12+
using v8::BackingStore;
13+
using v8::CFunction;
14+
using v8::Context;
15+
using v8::FastApiTypedArray;
16+
using v8::FunctionCallbackInfo;
17+
using v8::Isolate;
18+
using v8::Local;
19+
using v8::MaybeLocal;
20+
using v8::Object;
21+
using v8::String;
22+
using v8::Uint32Array;
23+
using v8::Uint8Array;
24+
using v8::Value;
25+
26+
// TODO(anonrig): Replace this with encoding when encoding enum is renamed.
27+
namespace encoding_methods {
28+
29+
static void IsAscii(const FunctionCallbackInfo<Value>& args) {
30+
CHECK_GE(args.Length(), 1);
31+
CHECK(args[0]->IsArrayBuffer());
32+
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
33+
auto external_resource = static_cast<const char*>(input->Data());
34+
args.GetReturnValue().Set(
35+
simdutf::validate_ascii(external_resource, input->ByteLength()));
36+
}
37+
38+
static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
39+
CHECK_GE(args.Length(), 1);
40+
CHECK(args[0]->IsArrayBuffer());
41+
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
42+
auto external_resource = static_cast<const char*>(input->Data());
43+
args.GetReturnValue().Set(
44+
simdutf::validate_utf8(external_resource, input->ByteLength()));
45+
}
46+
47+
static void CountUtf8(const FunctionCallbackInfo<Value>& args) {
48+
CHECK_GE(args.Length(), 1);
49+
CHECK(args[0]->IsArrayBuffer());
50+
Local<ArrayBuffer> input = args[0].As<ArrayBuffer>();
51+
auto external_resource = static_cast<const char*>(input->Data());
52+
int count = simdutf::count_utf8(external_resource, input->ByteLength());
53+
args.GetReturnValue().Set(count);
54+
}
55+
56+
static void Initialize(Local<Object> target,
57+
Local<Value> unused,
58+
Local<Context> context,
59+
void* priv) {
60+
SetMethodNoSideEffect(context, target, "isAscii", IsAscii);
61+
SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);
62+
SetMethodNoSideEffect(context, target, "countUtf8", CountUtf8);
63+
}
64+
65+
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
66+
registry->Register(IsAscii);
67+
registry->Register(IsUtf8);
68+
registry->Register(CountUtf8);
69+
}
70+
71+
} // namespace encoding_methods
72+
} // namespace node
73+
74+
NODE_BINDING_CONTEXT_AWARE_INTERNAL(encoding_methods,
75+
node::encoding_methods::Initialize)
76+
NODE_BINDING_EXTERNAL_REFERENCE(
77+
encoding_methods, node::encoding_methods::RegisterExternalReferences)

src/node_external_reference.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ class ExternalReferenceRegistry {
6767
V(credentials) \
6868
V(env_var) \
6969
V(errors) \
70+
V(encoding_methods) \
7071
V(fs) \
7172
V(fs_dir) \
7273
V(fs_event_wrap) \

test/parallel/test-encoding.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Flags: --no-warnings
2+
'use strict';
3+
require('../common');
4+
5+
const assert = require('assert');
6+
const encoding = require('node:encoding');
7+
const { TextEncoder } = require('util');
8+
9+
const encoder = new TextEncoder();
10+
11+
assert.deepStrictEqual(encoding.isAscii(encoder.encode('hello')), true);
12+
assert.deepStrictEqual(encoding.isAscii(encoder.encode('ğ')), false);
13+
assert.deepStrictEqual(encoding.isAscii('hello'), true);
14+
assert.deepStrictEqual(encoding.isAscii('ğ'), false);
15+
16+
assert.deepStrictEqual(encoding.isUtf8(encoder.encode('hello')), true);
17+
assert.deepStrictEqual(encoding.isUtf8(encoder.encode('ğ')), true);
18+
assert.deepStrictEqual(encoding.isUtf8(Buffer.from([0xf8])), false);
19+
20+
assert.deepStrictEqual(encoding.countUtf8(encoder.encode('hello')), 5);
21+
assert.deepStrictEqual(encoding.countUtf8(encoder.encode('Yağız')), 5);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
declare function InternalBinding(binding: 'encoding_methods'): {
2+
validateAscii(input: Uint8Array): boolean
3+
validateUtf8(input: Uint8Array): boolean
4+
countUtf8(input: Uint8Array): boolean
5+
};

0 commit comments

Comments
 (0)