Skip to content

Commit

Permalink
Merge pull request #944 from dnaka91/string-utf16
Browse files Browse the repository at this point in the history
Add functions to get JsString as UTF-16
  • Loading branch information
kjvalencik authored Nov 29, 2022
2 parents 45e60fb + b9de324 commit 681a71d
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 14 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions crates/neon/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ edition = "2018"

[dev-dependencies]
semver = "1"
psd = "0.3.1" # used for a doc example
anyhow = "1.0.58" # used for a doc example
psd = "0.3.1" # used for a doc example
anyhow = "1.0.58" # used for a doc example
widestring = "1.0.2" # used for a doc example

[target.'cfg(not(target = "windows"))'.dev-dependencies]
# Avoid `clang` as a dependency on windows
Expand Down
11 changes: 11 additions & 0 deletions crates/neon/src/sys/bindings/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ mod napi1 {
result: *mut usize,
) -> Status;

// The `buf` argument is defined as a `char16_t` which _should_ be a `u16` on most
// platforms. When generating bindings with `rust-bindgen` it unconditionally defines
// it as `u16` as well.
fn get_value_string_utf16(
env: Env,
value: Value,
buf: *mut u16,
bufsize: usize,
result: *mut usize,
) -> Status;

fn create_type_error(env: Env, code: Value, msg: Value, result: *mut Value) -> Status;

fn create_range_error(env: Env, code: Value, msg: Value, result: *mut Value) -> Status;
Expand Down
29 changes: 23 additions & 6 deletions crates/neon/src/sys/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,40 @@ pub unsafe fn new(out: &mut Local, env: Env, data: *const u8, len: i32) -> bool
status == napi::Status::Ok
}

pub unsafe fn utf8_len(env: Env, value: Local) -> isize {
pub unsafe fn utf8_len(env: Env, value: Local) -> usize {
let mut len = MaybeUninit::uninit();
let status = napi::get_value_string_utf8(env, value, ptr::null_mut(), 0, len.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

len.assume_init() as isize
len.assume_init()
}

pub unsafe fn data(env: Env, out: *mut u8, len: isize, value: Local) -> isize {
pub unsafe fn data(env: Env, out: *mut u8, len: usize, value: Local) -> usize {
let mut read = MaybeUninit::uninit();
let status =
napi::get_value_string_utf8(env, value, out as *mut _, len as usize, read.as_mut_ptr());
let status = napi::get_value_string_utf8(env, value, out as *mut _, len, read.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

read.assume_init() as isize
read.assume_init()
}

pub unsafe fn utf16_len(env: Env, value: Local) -> usize {
let mut len = MaybeUninit::uninit();
let status = napi::get_value_string_utf16(env, value, ptr::null_mut(), 0, len.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

len.assume_init()
}

pub unsafe fn data_utf16(env: Env, out: *mut u16, len: usize, value: Local) -> usize {
let mut read = MaybeUninit::uninit();
let status = napi::get_value_string_utf16(env, value, out, len, read.as_mut_ptr());

assert_eq!(status, napi::Status::Ok);

read.assume_init()
}

pub unsafe fn run_script(out: &mut Local, env: Env, value: Local) -> bool {
Expand Down
122 changes: 116 additions & 6 deletions crates/neon/src/types_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,22 +343,132 @@ impl private::ValueInternal for JsString {
}

impl JsString {
pub fn size<'a, C: Context<'a>>(&self, cx: &mut C) -> isize {
/// Return the byte size of this string when converted to a Rust [`String`] with
/// [`JsString::value`].
///
/// # Example
///
/// A function that verifies the length of the passed JavaScript string. The string is assumed
/// to be `hello 🥹` here, which encodes as 10 bytes in UTF-8:
///
/// - 6 bytes for `hello ` (including the space).
/// - 4 bytes for the emoji `🥹`.
///
/// ```rust
/// # use neon::prelude::*;
/// fn string_len(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let len = cx.argument::<JsString>(0)?.size(&mut cx);
/// // assuming the function is called with the JS string `hello 🥹`.
/// assert_eq!(10, len);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn size<'a, C: Context<'a>>(&self, cx: &mut C) -> usize {
let env = cx.env().to_raw();

unsafe { sys::string::utf8_len(env, self.to_raw()) }
}

/// Return the size of this string encoded as UTF-16 with [`JsString::to_utf16`].
///
/// # Example
///
/// A function that verifies the length of the passed JavaScript string. The string is assumed
/// to be `hello 🥹` here, which encodes as 8 `u16`s in UTF-16:
///
/// - 6 `u16`s for `hello ` (including the space).
/// - 2 `u16`s for the emoji `🥹`.
///
/// ```rust
/// # use neon::prelude::*;
/// fn string_len_utf16(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let len = cx.argument::<JsString>(0)?.size_utf16(&mut cx);
/// // assuming the function is called with the JS string `hello 🥹`.
/// assert_eq!(8, len);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn size_utf16<'a, C: Context<'a>>(&self, cx: &mut C) -> usize {
let env = cx.env().to_raw();

unsafe { sys::string::utf16_len(env, self.to_raw()) }
}

/// Convert the JavaScript string into a Rust [`String`].
///
/// # Example
///
/// A function that expects a single JavaScript string as argument and prints it out.
///
/// ```rust
/// # use neon::prelude::*;
/// fn print_string(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let s = cx.argument::<JsString>(0)?.value(&mut cx);
/// println!("JavaScript string content: {}", s);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn value<'a, C: Context<'a>>(&self, cx: &mut C) -> String {
let env = cx.env().to_raw();

unsafe {
let capacity = sys::string::utf8_len(env, self.to_raw()) + 1;
let mut buffer: Vec<u8> = Vec::with_capacity(capacity as usize);
let p = buffer.as_mut_ptr();
std::mem::forget(buffer);
let len = sys::string::data(env, p, capacity, self.to_raw());
String::from_raw_parts(p, len as usize, capacity as usize)
let mut buffer: Vec<u8> = Vec::with_capacity(capacity);
let len = sys::string::data(env, buffer.as_mut_ptr(), capacity, self.to_raw());
buffer.set_len(len);
String::from_utf8_unchecked(buffer)
}
}

/// Convert the JavaScript String into a UTF-16 encoded [`Vec<u16>`].
///
/// The returned vector is guaranteed to be valid UTF-16. Therefore, any external crate that
/// handles UTF-16 encoded strings, can assume the content to be valid and skip eventual
/// validation steps.
///
/// # Example
///
/// A function that expects a single JavaScript string as argument and prints it out as a raw
/// vector of `u16`s.
///
/// ```rust
/// # use neon::prelude::*;
/// fn print_string_as_utf16(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let s = cx.argument::<JsString>(0)?.to_utf16(&mut cx);
/// println!("JavaScript string as raw UTF-16: {:?}", s);
///
/// Ok(cx.undefined())
/// }
/// ```
///
/// Again a function that expects a single JavaScript string as argument, but utilizes the
/// [`widestring`](https://crates.io/crates/widestring) crate to handle the raw [`Vec<u16>`] as
/// a typical string.
///
/// ```rust
/// # use neon::prelude::*;
/// fn print_with_widestring(mut cx: FunctionContext) -> JsResult<JsUndefined> {
/// let s = cx.argument::<JsString>(0)?.to_utf16(&mut cx);
/// // The returned vector is guaranteed to be valid UTF-16.
/// // Therefore, we can skip the validation step.
/// let s = unsafe { widestring::Utf16String::from_vec_unchecked(s) };
/// println!("JavaScript string as UTF-16: {}", s);
///
/// Ok(cx.undefined())
/// }
/// ```
pub fn to_utf16<'a, C: Context<'a>>(&self, cx: &mut C) -> Vec<u16> {
let env = cx.env().to_raw();

unsafe {
let capacity = sys::string::utf16_len(env, self.to_raw()) + 1;
let mut buffer: Vec<u16> = Vec::with_capacity(capacity);
let len = sys::string::data_utf16(env, buffer.as_mut_ptr(), capacity, self.to_raw());
buffer.set_len(len);
buffer
}
}

Expand Down
12 changes: 12 additions & 0 deletions test/napi/lib/strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ describe("JsString", function () {
it("should return a JsString built in Rust", function () {
assert.equal(addon.return_js_string(), "hello node");
});
it("should return a raw valid UTF-16 string built in Rust", function () {
const decoder = new TextDecoder("utf-16");
assert.equal(decoder.decode(addon.return_js_string_utf16()), "hello 🥹");
});
describe("encoding", function () {
it("should return the UTF-8 string length", function () {
assert.equal(addon.return_length_utf8("hello 🥹"), 10);
});
it("should return the UTF-16 string length", function () {
assert.equal(addon.return_length_utf16("hello 🥹"), 8);
});
});
describe("run_as_script", function () {
it("should return the evaluated value", function () {
assert.equal(addon.run_string_as_script("6 * 7"), 42);
Expand Down
15 changes: 15 additions & 0 deletions test/napi/src/js/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ pub fn return_js_string(mut cx: FunctionContext) -> JsResult<JsString> {
Ok(cx.string("hello node"))
}

pub fn return_js_string_utf16(mut cx: FunctionContext) -> JsResult<JsTypedArray<u16>> {
let raw = "hello 🥹".encode_utf16().collect::<Vec<_>>();
JsTypedArray::from_slice(&mut cx, &raw)
}

pub fn return_length_utf8(mut cx: FunctionContext) -> JsResult<JsNumber> {
let value = cx.argument::<JsString>(0)?.value(&mut cx);
Ok(cx.number(value.len() as f64))
}

pub fn return_length_utf16(mut cx: FunctionContext) -> JsResult<JsNumber> {
let value = cx.argument::<JsString>(0)?.to_utf16(&mut cx);
Ok(cx.number(value.len() as f64))
}

pub fn run_string_as_script(mut cx: FunctionContext) -> JsResult<JsValue> {
let string_script = cx.argument::<JsString>(0)?;
eval(&mut cx, string_script)
Expand Down
3 changes: 3 additions & 0 deletions test/napi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> {
cx.export_function("add1", add1)?;

cx.export_function("return_js_string", return_js_string)?;
cx.export_function("return_js_string_utf16", return_js_string_utf16)?;
cx.export_function("return_length_utf8", return_length_utf8)?;
cx.export_function("return_length_utf16", return_length_utf16)?;
cx.export_function("run_string_as_script", run_string_as_script)?;

cx.export_function("return_js_number", return_js_number)?;
Expand Down

0 comments on commit 681a71d

Please sign in to comment.