Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(node:util): fast path for extractedSplitNewLines #15838

Merged
merged 10 commits into from
Dec 20, 2024
67 changes: 67 additions & 0 deletions src/bun.js/node/node_util_binding.zig
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,70 @@ pub fn internalErrorName(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFr
var fmtstring = bun.String.createFormat("Unknown system error {d}", .{err_int}) catch bun.outOfMemory();
return fmtstring.transferToJS(globalThis);
}

/// `extractedSplitNewLines` for ASCII/Latin1 strings. Panics if passed a non-string.
//Returns `undefined` if param is utf8 or utf16 and not fully ascii.
///
/// ```js
/// // util.js
/// const extractedNewLineRe = new RegExp("(?<=\\n)");
/// extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
/// ```
pub fn extractedSplitNewLinesFastPathStringsOnly(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
DonIsaac marked this conversation as resolved.
Show resolved Hide resolved
var fallback = std.heap.stackFallback(1024, bun.default_allocator);
const allocator = fallback.get();
bun.assert(callframe.argumentsCount() == 1);
const value = callframe.argument(0);
bun.assert(value.isString());

const str = try value.toBunString2(globalThis);

// if (str.isUTF16()) return JSC.JSValue.jsUndefined();
if (str.is8Bit() or bun.strings.isAllASCII(str.byteSlice())) {
DonIsaac marked this conversation as resolved.
Show resolved Hide resolved
var lines: std.ArrayListUnmanaged(bun.String) = .{};
defer {
for (lines.items) |out| {
out.deref();
}
lines.deinit(allocator);
}

var start: usize = 0;
const bytes = str.byteSlice();

while (std.mem.indexOfScalarPos(u8, bytes, start, '\n')) |delim_start| {
const end = delim_start + 1;
const buf = bytes[start..end];
const s = switch (str.encoding()) {
DonIsaac marked this conversation as resolved.
Show resolved Hide resolved
.latin1 => bun.String.fromBytes(buf),
.utf8 => bun.String.fromUTF8(buf),
.utf16 => blk: {
var _s = bun.String.fromBytes(buf);
_s.value.ZigString.markUTF16();
break :blk _s;
},
// bun.String.fromUTF16(@ptrCast(buf)),
};
try lines.append(allocator, s); // include the newline
start = end;
}

if (start < bytes.len) {
const buf = bytes[start..];
const s = switch (str.encoding()) {
.latin1 => bun.String.fromBytes(buf),
.utf8 => bun.String.fromUTF8(buf),
.utf16 => blk: {
var _s = bun.String.fromBytes(buf);
_s.value.ZigString.markUTF16();
break :blk _s;
},
};
try lines.append(allocator, s);
}

return bun.String.toJSArray(globalThis, lines.items);
}

return JSC.JSValue.jsUndefined();
}
28 changes: 25 additions & 3 deletions src/js/internal/util/inspect.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,21 @@ const kRejected = Symbol("kRejected"); // state ID 2
const ALL_PROPERTIES = 0;
const ONLY_ENUMERABLE = 2;

/**
* Fast path for {@link extractedSplitNewLines} for ASCII/Latin1 strings.
* @returns `value` split on newlines (newline included at end), or `undefined`
* if non-ascii UTF8/UTF16.
*
* Passing this a non-string will cause a panic.
*
* @type {(value: string) => string[] | undefined}
*/
const extractedSplitNewLinesFastPathStringsOnly = $newZigFunction(
"node_util_binding.zig",
"extractedSplitNewLinesFastPathStringsOnly",
1,
);

const isAsyncFunction = v =>
typeof v === "function" && StringPrototypeStartsWith(FunctionPrototypeToString(v), "async");
const isGeneratorFunction = v =>
Expand Down Expand Up @@ -397,7 +412,7 @@ let strEscapeSequencesRegExp,
strEscapeSequencesReplacer,
strEscapeSequencesRegExpSingle,
strEscapeSequencesReplacerSingle,
extractedSplitNewLines;
extractedSplitNewLinesSlow;
try {
// Change from regex literals to RegExp constructors to avoid unrecoverable
// syntax error at load time.
Expand All @@ -416,7 +431,7 @@ try {
"g",
);
const extractedNewLineRe = new RegExp("(?<=\\n)");
extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
extractedSplitNewLinesSlow = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
// CI doesn't run in an elderly runtime
} catch {
// These are from a previous version of node,
Expand All @@ -426,7 +441,7 @@ try {
strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g;
strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/;
strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g;
extractedSplitNewLines = value => {
extractedSplitNewLinesSlow = value => {
const lines = RegExpPrototypeSymbolSplit(/\n/, value);
const last = ArrayPrototypePop(lines);
const nlLines = ArrayPrototypeMap(lines, line => line + "\n");
Expand All @@ -437,6 +452,13 @@ try {
};
}

const extractedSplitNewLines = value => {
if (typeof value === "string") {
return extractedSplitNewLinesFastPathStringsOnly(value) || extractedSplitNewLinesSlow(value);
}
return extractedSplitNewLinesSlow(value);
}

const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
const numberRegExp = /^(0|[1-9][0-9]*)$/;

Expand Down