Skip to content

Commit

Permalink
Optionally trust the C API to always receive valid UTF-8 (#597)
Browse files Browse the repository at this point in the history
This adds a new feature to the C API that allows it to skip UTF-8
validation on strings passed to it. Depending on where the C API is used
from you may always be sure that valid UTF-8 strings are being passed,
so no additional validation should be necessary. On top of that since we
always know that the strings passed from JavaScript in the web are going
to be valid UTF-8, the validation is always skipped there.
  • Loading branch information
CryZe authored Nov 8, 2022
1 parent 7e673df commit 7a02ef2
Showing 4 changed files with 38 additions and 257 deletions.
1 change: 1 addition & 0 deletions capi/Cargo.toml
Original file line number Diff line number Diff line change
@@ -20,3 +20,4 @@ image-shrinking = ["livesplit-core/image-shrinking"]
software-rendering = ["livesplit-core/software-rendering"]
wasm-web = ["livesplit-core/wasm-web"]
auto-splitting = ["livesplit-core/auto-splitting"]
assume-str-parameters-are-utf8 = []
135 changes: 8 additions & 127 deletions capi/bind_gen/src/wasm.rs
Original file line number Diff line number Diff line change
@@ -472,69 +472,10 @@ export async function load(path?: string) {
wasm = await WebAssembly.instantiate(bytes, imports);
}
let encodeUtf8: (str: string) => Uint8Array;
if (!(global as any)["TextEncoder"]) {
encodeUtf8 = (str) => {
var utf8 = [];
for (var i = 0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) {
utf8.push(charcode);
} else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
} else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
} else {
i++;
charcode = 0x10000 + (((charcode & 0x3ff) << 10)
| (str.charCodeAt(i) & 0x3ff))
utf8.push(0xf0 | (charcode >> 18),
0x80 | ((charcode >> 12) & 0x3f),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return new Uint8Array(utf8);
};
} else {
const encoder = new TextEncoder("UTF-8");
encodeUtf8 = (str) => encoder.encode(str);
}
let decodeUtf8: (data: Uint8Array) => string;
if (!(global as any)["TextDecoder"]) {
decodeUtf8 = (data) => {
var str = '',
i;
for (i = 0; i < data.length; i++) {
var value = data[i];
if (value < 0x80) {
str += String.fromCharCode(value);
} else if (value > 0xBF && value < 0xE0) {
str += String.fromCharCode((value & 0x1F) << 6 | data[i + 1] & 0x3F);
i += 1;
} else if (value > 0xDF && value < 0xF0) {
str += String.fromCharCode((value & 0x0F) << 12 | (data[i + 1] & 0x3F) << 6 | data[i + 2] & 0x3F);
i += 2;
} else {
var charCode = ((value & 0x07) << 18 | (data[i + 1] & 0x3F) << 12 | (data[i + 2] & 0x3F) << 6 | data[i + 3] & 0x3F) - 0x010000;
str += String.fromCharCode(charCode >> 10 | 0xD800, charCode & 0x03FF | 0xDC00);
i += 3;
}
}
return str;
};
} else {
const decoder = new TextDecoder("UTF-8");
decodeUtf8 = (data) => decoder.decode(data);
}
const encoder = new TextEncoder("UTF-8");
const decoder = new TextDecoder("UTF-8");
const encodeUtf8: (str: string) => Uint8Array = (str) => encoder.encode(str);
const decodeUtf8: (data: Uint8Array) => string = (data) => decoder.decode(data);
interface Slice {
ptr: number,
@@ -645,70 +586,10 @@ exports.load = async function (path) {
wasm = await WebAssembly.instantiate(bytes, imports);
}
let encodeUtf8;
if (!global["TextEncoder"]) {
encodeUtf8 = (str) => {
var utf8 = [];
for (var i = 0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) {
utf8.push(charcode);
} else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
}
else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
} else {
i++;
charcode = 0x10000 + (((charcode & 0x3ff) << 10)
| (str.charCodeAt(i) & 0x3ff))
utf8.push(0xf0 | (charcode >> 18),
0x80 | ((charcode >> 12) & 0x3f),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return new Uint8Array(utf8);
};
} else {
const encoder = new TextEncoder("UTF-8");
encodeUtf8 = (str) => encoder.encode(str);
}
let decodeUtf8;
if (!global["TextDecoder"]) {
decodeUtf8 = (data) => {
var str = '',
i;
for (i = 0; i < data.length; i++) {
var value = data[i];
if (value < 0x80) {
str += String.fromCharCode(value);
} else if (value > 0xBF && value < 0xE0) {
str += String.fromCharCode((value & 0x1F) << 6 | data[i + 1] & 0x3F);
i += 1;
} else if (value > 0xDF && value < 0xF0) {
str += String.fromCharCode((value & 0x0F) << 12 | (data[i + 1] & 0x3F) << 6 | data[i + 2] & 0x3F);
i += 2;
} else {
var charCode = ((value & 0x07) << 18 | (data[i + 1] & 0x3F) << 12 | (data[i + 2] & 0x3F) << 6 | data[i + 3] & 0x3F) - 0x010000;
str += String.fromCharCode(charCode >> 10 | 0xD800, charCode & 0x03FF | 0xDC00);
i += 3;
}
}
return str;
};
} else {
const decoder = new TextDecoder("UTF-8");
decodeUtf8 = (data) => decoder.decode(data);
}
const encoder = new TextEncoder("UTF-8");
const decoder = new TextDecoder("UTF-8");
const encodeUtf8 = (str) => encoder.encode(str);
const decodeUtf8 = (data) => decoder.decode(data);
function allocInt8Array(src) {
const len = src.length;
135 changes: 8 additions & 127 deletions capi/bind_gen/src/wasm_bindgen.rs
Original file line number Diff line number Diff line change
@@ -392,69 +392,10 @@ declare namespace TextEncoding {
}
}
let encodeUtf8: (str: string) => Uint8Array;
if (!(global as any)["TextEncoder"]) {
encodeUtf8 = (str) => {
var utf8 = [];
for (var i = 0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) {
utf8.push(charcode);
} else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
} else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
} else {
i++;
charcode = 0x10000 + (((charcode & 0x3ff) << 10)
| (str.charCodeAt(i) & 0x3ff))
utf8.push(0xf0 | (charcode >> 18),
0x80 | ((charcode >> 12) & 0x3f),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return new Uint8Array(utf8);
};
} else {
const encoder = new TextEncoder("UTF-8");
encodeUtf8 = (str) => encoder.encode(str);
}
let decodeUtf8: (data: Uint8Array) => string;
if (!(global as any)["TextDecoder"]) {
decodeUtf8 = (data) => {
var str = '',
i;
for (i = 0; i < data.length; i++) {
var value = data[i];
if (value < 0x80) {
str += String.fromCharCode(value);
} else if (value > 0xBF && value < 0xE0) {
str += String.fromCharCode((value & 0x1F) << 6 | data[i + 1] & 0x3F);
i += 1;
} else if (value > 0xDF && value < 0xF0) {
str += String.fromCharCode((value & 0x0F) << 12 | (data[i + 1] & 0x3F) << 6 | data[i + 2] & 0x3F);
i += 2;
} else {
var charCode = ((value & 0x07) << 18 | (data[i + 1] & 0x3F) << 12 | (data[i + 2] & 0x3F) << 6 | data[i + 3] & 0x3F) - 0x010000;
str += String.fromCharCode(charCode >> 10 | 0xD800, charCode & 0x03FF | 0xDC00);
i += 3;
}
}
return str;
};
} else {
const decoder = new TextDecoder("UTF-8");
decodeUtf8 = (data) => decoder.decode(data);
}
const encoder = new TextEncoder("UTF-8");
const decoder = new TextDecoder("UTF-8");
const encodeUtf8: (str: string) => Uint8Array = (str) => encoder.encode(str);
const decodeUtf8: (data: Uint8Array) => string = (data) => decoder.decode(data);
interface Slice {
ptr: number,
@@ -506,70 +447,10 @@ function dealloc(slice: Slice) {
"{}",
r#"import * as wasm from "./livesplit_core_bg.wasm";
let encodeUtf8;
if (!global["TextEncoder"]) {
encodeUtf8 = (str) => {
var utf8 = [];
for (var i = 0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) {
utf8.push(charcode);
} else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
}
else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
} else {
i++;
charcode = 0x10000 + (((charcode & 0x3ff) << 10)
| (str.charCodeAt(i) & 0x3ff))
utf8.push(0xf0 | (charcode >> 18),
0x80 | ((charcode >> 12) & 0x3f),
0x80 | ((charcode >> 6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return new Uint8Array(utf8);
};
} else {
const encoder = new TextEncoder("UTF-8");
encodeUtf8 = (str) => encoder.encode(str);
}
let decodeUtf8;
if (!global["TextDecoder"]) {
decodeUtf8 = (data) => {
var str = '',
i;
for (i = 0; i < data.length; i++) {
var value = data[i];
if (value < 0x80) {
str += String.fromCharCode(value);
} else if (value > 0xBF && value < 0xE0) {
str += String.fromCharCode((value & 0x1F) << 6 | data[i + 1] & 0x3F);
i += 1;
} else if (value > 0xDF && value < 0xF0) {
str += String.fromCharCode((value & 0x0F) << 12 | (data[i + 1] & 0x3F) << 6 | data[i + 2] & 0x3F);
i += 2;
} else {
var charCode = ((value & 0x07) << 18 | (data[i + 1] & 0x3F) << 12 | (data[i + 2] & 0x3F) << 6 | data[i + 3] & 0x3F) - 0x010000;
str += String.fromCharCode(charCode >> 10 | 0xD800, charCode & 0x03FF | 0xDC00);
i += 3;
}
}
return str;
};
} else {
const decoder = new TextDecoder("UTF-8");
decodeUtf8 = (data) => decoder.decode(data);
}
const encoder = new TextEncoder("UTF-8");
const decoder = new TextDecoder("UTF-8");
const encodeUtf8 = (str) => encoder.encode(str);
const decodeUtf8 = (data) => decoder.decode(data);
function allocUint8Array(src) {
const len = src.length;
24 changes: 21 additions & 3 deletions capi/src/lib.rs
Original file line number Diff line number Diff line change
@@ -143,7 +143,25 @@ unsafe fn str(s: *const c_char) -> &'static str {
if s.is_null() {
""
} else {
CStr::from_ptr(s as _).to_str().unwrap()
let bytes = CStr::from_ptr(s as _).to_bytes();

// Depending on where the C API is used, you may be able to fully trust
// that the caller always passes valid UTF-8. On the web we use the
// `TextEncoder` which always produces valid UTF-8.
#[cfg(any(
feature = "assume-str-parameters-are-utf8",
all(target_family = "wasm", feature = "wasm-web"),
))]
{
std::str::from_utf8_unchecked(bytes)
}
#[cfg(not(any(
feature = "assume-str-parameters-are-utf8",
all(target_family = "wasm", feature = "wasm-web"),
)))]
{
simdutf8::basic::from_utf8(bytes).unwrap()
}
}
}

@@ -166,7 +184,7 @@ unsafe fn get_file(_: i64) -> ManuallyDrop<File> {
}

/// Allocate memory.
#[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))]
#[cfg(all(target_family = "wasm", not(target_os = "wasi")))]
#[no_mangle]
pub extern "C" fn alloc(size: usize) -> *mut u8 {
let mut buf = Vec::with_capacity(size);
@@ -176,7 +194,7 @@ pub extern "C" fn alloc(size: usize) -> *mut u8 {
}

/// Deallocate memory.
#[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))]
#[cfg(all(target_family = "wasm", not(target_os = "wasi")))]
#[no_mangle]
pub extern "C" fn dealloc(ptr: *mut u8, cap: usize) {
unsafe {

0 comments on commit 7a02ef2

Please sign in to comment.