Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions to get JsString as UTF-16 #944

Merged
merged 7 commits into from
Nov 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions crates/neon/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ edition = "2018"

[dev-dependencies]
semver = "1"
psd = "0.3.1" # used for a doc example
anyhow = "1.0.58" # used for a doc example
psd = "0.3.1" # used for a doc example
anyhow = "1.0.58" # used for a doc example
widestring = "1.0.2" # used for a doc example

[target.'cfg(not(target = "windows"))'.dev-dependencies]
# Avoid `clang` as a dependency on windows
Expand Down
11 changes: 11 additions & 0 deletions crates/neon/src/sys/bindings/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ mod napi1 {
result: *mut usize,
) -> Status;

// The `buf` argument is defined as a `char16_t` which _should_ be a `u16` on most
// platforms. When generating bindings with `rust-bindgen` it unconditionally defines
// it as `u16` as well.
fn get_value_string_utf16(
env: Env,
value: Value,
buf: *mut u16,
dnaka91 marked this conversation as resolved.
Show resolved Hide resolved
bufsize: usize,
result: *mut usize,
) -> Status;

fn create_type_error(env: Env, code: Value, msg: Value, result: *mut Value) -> Status;

fn create_range_error(env: Env, code: Value, msg: Value, result: *mut Value) -> Status;
Expand Down
29 changes: 23 additions & 6 deletions crates/neon/src/sys/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,40 @@ pub unsafe fn new(out: &mut Local, env: Env, data: *const u8, len: i32) -> bool
status == napi::Status::Ok
}

pub unsafe fn utf8_len(env: Env, value: Local) -> isize {
pub unsafe fn utf8_len(env: Env, value: Local) -> usize {
let mut len = MaybeUninit::uninit();
let status = napi::get_value_string_utf8(env, value, ptr::null_mut(), 0, len.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

len.assume_init() as isize
len.assume_init()
}

pub unsafe fn data(env: Env, out: *mut u8, len: isize, value: Local) -> isize {
pub unsafe fn data(env: Env, out: *mut u8, len: usize, value: Local) -> usize {
let mut read = MaybeUninit::uninit();
let status =
napi::get_value_string_utf8(env, value, out as *mut _, len as usize, read.as_mut_ptr());
let status = napi::get_value_string_utf8(env, value, out as *mut _, len, read.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

read.assume_init() as isize
read.assume_init()
}

pub unsafe fn utf16_len(env: Env, value: Local) -> usize {
let mut len = MaybeUninit::uninit();
let status = napi::get_value_string_utf16(env, value, ptr::null_mut(), 0, len.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

len.assume_init()
}

pub unsafe fn data_utf16(env: Env, out: *mut u16, len: usize, value: Local) -> usize {
let mut read = MaybeUninit::uninit();
let status = napi::get_value_string_utf16(env, value, out, len, read.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

read.assume_init()
}

pub unsafe fn run_script(out: &mut Local, env: Env, value: Local) -> bool {
Expand Down
122 changes: 116 additions & 6 deletions crates/neon/src/types_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,22 +343,132 @@ impl private::ValueInternal for JsString {
}

impl JsString {
pub fn size<'a, C: Context<'a>>(&self, cx: &mut C) -> isize {
/// Return the byte size of this string when converted to a Rust [`String`] with
/// [`JsString::value`].
///
/// # Example
///
/// A function that verifies the length of the passed JavaScript string. The string is assumed
/// to be `hello 🥹` here, which encodes as 10 bytes in UTF-8:
///
/// - 6 bytes for `hello ` (including the space).
/// - 4 bytes for the emoji `🥹`.
///
/// ```rust
/// # use neon::prelude::*;
/// fn string_len(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let len = cx.argument::<JsString>(0)?.size(&mut cx);
/// // assuming the function is called with the JS string `hello 🥹`.
/// assert_eq!(10, len);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn size<'a, C: Context<'a>>(&self, cx: &mut C) -> usize {
let env = cx.env().to_raw();

unsafe { sys::string::utf8_len(env, self.to_raw()) }
}

/// Return the size of this string encoded as UTF-16 with [`JsString::to_utf16`].
///
/// # Example
///
/// A function that verifies the length of the passed JavaScript string. The string is assumed
/// to be `hello 🥹` here, which encodes as 8 `u16`s in UTF-16:
///
/// - 6 `u16`s for `hello ` (including the space).
/// - 2 `u16`s for the emoji `🥹`.
///
/// ```rust
/// # use neon::prelude::*;
/// fn string_len_utf16(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let len = cx.argument::<JsString>(0)?.size_utf16(&mut cx);
/// // assuming the function is called with the JS string `hello 🥹`.
/// assert_eq!(8, len);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn size_utf16<'a, C: Context<'a>>(&self, cx: &mut C) -> usize {
let env = cx.env().to_raw();

unsafe { sys::string::utf16_len(env, self.to_raw()) }
}

/// Convert the JavaScript string into a Rust [`String`].
///
/// # Example
///
/// A function that expects a single JavaScript string as argument and prints it out.
///
/// ```rust
/// # use neon::prelude::*;
/// fn print_string(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let s = cx.argument::<JsString>(0)?.value(&mut cx);
/// println!("JavaScript string content: {}", s);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn value<'a, C: Context<'a>>(&self, cx: &mut C) -> String {
let env = cx.env().to_raw();

unsafe {
let capacity = sys::string::utf8_len(env, self.to_raw()) + 1;
let mut buffer: Vec<u8> = Vec::with_capacity(capacity as usize);
let p = buffer.as_mut_ptr();
std::mem::forget(buffer);
let len = sys::string::data(env, p, capacity, self.to_raw());
String::from_raw_parts(p, len as usize, capacity as usize)
let mut buffer: Vec<u8> = Vec::with_capacity(capacity);
let len = sys::string::data(env, buffer.as_mut_ptr(), capacity, self.to_raw());
buffer.set_len(len);
String::from_utf8_unchecked(buffer)
}
}

/// Convert the JavaScript String into a UTF-16 encoded [`Vec<u16>`].
///
/// The returned vector is guaranteed to be valid UTF-16. Therefore, any external crate that
/// handles UTF-16 encoded strings, can assume the content to be valid and skip eventual
/// validation steps.
///
/// # Example
///
/// A function that expects a single JavaScript string as argument and prints it out as a raw
/// vector of `u16`s.
///
/// ```rust
/// # use neon::prelude::*;
/// fn print_string_as_utf16(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let s = cx.argument::<JsString>(0)?.to_utf16(&mut cx);
/// println!("JavaScript string as raw UTF-16: {:?}", s);
///
/// Ok(cx.undefined())
/// }
/// ```
///
/// Again a function that expects a single JavaScript string as argument, but utilizes the
/// [`widestring`](https://crates.io/crates/widestring) crate to handle the raw [`Vec<u16>`] as
/// a typical string.
///
/// ```rust
/// # use neon::prelude::*;
/// fn print_with_widestring(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let s = cx.argument::<JsString>(0)?.to_utf16(&mut cx);
/// // The returned vector is guaranteed to be valid UTF-16.
/// // Therefore, we can skip the validation step.
/// let s = unsafe { widestring::Utf16String::from_vec_unchecked(s) };
dnaka91 marked this conversation as resolved.
Show resolved Hide resolved
/// println!("JavaScript string as UTF-16: {}", s);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn to_utf16<'a, C: Context<'a>>(&self, cx: &mut C) -> Vec<u16> {
let env = cx.env().to_raw();

unsafe {
let capacity = sys::string::utf16_len(env, self.to_raw()) + 1;
dnaka91 marked this conversation as resolved.
Show resolved Hide resolved
let mut buffer: Vec<u16> = Vec::with_capacity(capacity);
let len = sys::string::data_utf16(env, buffer.as_mut_ptr(), capacity, self.to_raw());
buffer.set_len(len);
buffer
}
}

Expand Down
12 changes: 12 additions & 0 deletions test/napi/lib/strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ describe("JsString", function () {
it("should return a JsString built in Rust", function () {
assert.equal(addon.return_js_string(), "hello node");
});
it("should return a raw valid UTF-16 string built in Rust", function () {
const decoder = new TextDecoder("utf-16");
assert.equal(decoder.decode(addon.return_js_string_utf16()), "hello 🥹");
});
describe("encoding", function () {
it("should return the UTF-8 string length", function () {
assert.equal(addon.return_length_utf8("hello 🥹"), 10);
});
it("should return the UTF-16 string length", function () {
dnaka91 marked this conversation as resolved.
Show resolved Hide resolved
assert.equal(addon.return_length_utf16("hello 🥹"), 8);
});
});
describe("run_as_script", function () {
it("should return the evaluated value", function () {
assert.equal(addon.run_string_as_script("6 * 7"), 42);
Expand Down
15 changes: 15 additions & 0 deletions test/napi/src/js/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ pub fn return_js_string(mut cx: FunctionContext) -> JsResult<JsString> {
Ok(cx.string("hello node"))
}

pub fn return_js_string_utf16(mut cx: FunctionContext) -> JsResult<JsTypedArray<u16>> {
let raw = "hello 🥹".encode_utf16().collect::<Vec<_>>();
JsTypedArray::from_slice(&mut cx, &raw)
}

pub fn return_length_utf8(mut cx: FunctionContext) -> JsResult<JsNumber> {
let value = cx.argument::<JsString>(0)?.value(&mut cx);
Ok(cx.number(value.len() as f64))
}

pub fn return_length_utf16(mut cx: FunctionContext) -> JsResult<JsNumber> {
let value = cx.argument::<JsString>(0)?.to_utf16(&mut cx);
Ok(cx.number(value.len() as f64))
}

pub fn run_string_as_script(mut cx: FunctionContext) -> JsResult<JsValue> {
let string_script = cx.argument::<JsString>(0)?;
eval(&mut cx, string_script)
Expand Down
3 changes: 3 additions & 0 deletions test/napi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> {
cx.export_function("add1", add1)?;

cx.export_function("return_js_string", return_js_string)?;
cx.export_function("return_js_string_utf16", return_js_string_utf16)?;
cx.export_function("return_length_utf8", return_length_utf8)?;
cx.export_function("return_length_utf16", return_length_utf16)?;
cx.export_function("run_string_as_script", run_string_as_script)?;

cx.export_function("return_js_number", return_js_number)?;
Expand Down