Skip to content

Commit

Permalink
perf(node:util): fast path for extractedSplitNewLines (oven-sh#15838)
Browse files Browse the repository at this point in the history
Co-authored-by: Don Isaac <don@bun.sh>
Co-authored-by: DonIsaac <DonIsaac@users.noreply.github.com>
  • Loading branch information
3 people authored and probably-neb committed Jan 7, 2025
1 parent b944c7d commit 5d66e06
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 5 deletions.
90 changes: 90 additions & 0 deletions src/bun.js/node/node_util_binding.zig
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
const std = @import("std");
const bun = @import("root").bun;
const Allocator = std.mem.Allocator;
const Environment = bun.Environment;
const JSC = bun.JSC;
const string = bun.string;
Expand Down Expand Up @@ -105,3 +106,92 @@ pub fn internalErrorName(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFr
var fmtstring = bun.String.createFormat("Unknown system error {d}", .{err_int}) catch bun.outOfMemory();
return fmtstring.transferToJS(globalThis);
}

/// `extractedSplitNewLines` for ASCII/Latin1 strings. Panics if passed a non-string.
/// Returns `undefined` if param is utf8 or utf16 and not fully ascii.
///
/// ```js
/// // util.js
/// const extractedNewLineRe = new RegExp("(?<=\\n)");
/// extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
/// ```
pub fn extractedSplitNewLinesFastPathStringsOnly(globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) bun.JSError!JSC.JSValue {
bun.assert(callframe.argumentsCount() == 1);
const value = callframe.argument(0);
bun.assert(value.isString());

const str = try value.toBunString2(globalThis);
defer str.deref();

return switch (str.encoding()) {
inline .utf16, .latin1 => |encoding| split(encoding, globalThis, bun.default_allocator, &str),
.utf8 => if (bun.strings.isAllASCII(str.byteSlice()))
return split(.utf8, globalThis, bun.default_allocator, &str)
else
return JSC.JSValue.jsUndefined(),
};
}

fn split(
comptime encoding: bun.strings.EncodingNonAscii,
globalThis: *JSC.JSGlobalObject,
allocator: Allocator,
str: *const bun.String,
) bun.JSError!JSC.JSValue {
std.debug.print("{any}\n", .{encoding});
var fallback = std.heap.stackFallback(1024, allocator);
const alloc = fallback.get();
const Char = switch (encoding) {
.utf8, .latin1 => u8,
.utf16 => u16,
};

var lines: std.ArrayListUnmanaged(bun.String) = .{};
defer {
for (lines.items) |out| {
out.deref();
}
lines.deinit(alloc);
}

const buffer: []const Char = if (encoding == .utf16)
str.utf16()
else
str.byteSlice();
var it: SplitNewlineIterator(Char) = .{ .buffer = buffer, .index = 0 };
while (it.next()) |line| {
const encoded_line = switch (encoding) {
inline .utf8 => bun.String.fromUTF8(line),
inline .latin1 => bun.String.createLatin1(line),
inline .utf16 => bun.String.fromUTF16(line),
};
errdefer encoded_line.deref();
try lines.append(alloc, encoded_line);
}

return bun.String.toJSArray(globalThis, lines.items);
}

pub fn SplitNewlineIterator(comptime T: type) type {
return struct {
buffer: []const T,
index: ?usize,

const Self = @This();

/// Returns a slice of the next field, or null if splitting is complete.
pub fn next(self: *Self) ?[]const T {
const start = self.index orelse return null;

if (std.mem.indexOfScalarPos(T, self.buffer, start, '\n')) |delim_start| {
const end = delim_start + 1;
const slice = self.buffer[start..end];
self.index = end;
return slice;
} else {
self.index = null;
return self.buffer[start..];
}
}
};
}
28 changes: 25 additions & 3 deletions src/js/internal/util/inspect.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,21 @@ const kRejected = Symbol("kRejected"); // state ID 2
const ALL_PROPERTIES = 0;
const ONLY_ENUMERABLE = 2;

/**
* Fast path for {@link extractedSplitNewLines} for ASCII/Latin1 strings.
* @returns `value` split on newlines (newline included at end), or `undefined`
* if non-ascii UTF8/UTF16.
*
* Passing this a non-string will cause a panic.
*
* @type {(value: string) => string[] | undefined}
*/
const extractedSplitNewLinesFastPathStringsOnly = $newZigFunction(
"node_util_binding.zig",
"extractedSplitNewLinesFastPathStringsOnly",
1,
);

const isAsyncFunction = v =>
typeof v === "function" && StringPrototypeStartsWith(FunctionPrototypeToString(v), "async");
const isGeneratorFunction = v =>
Expand Down Expand Up @@ -397,7 +412,7 @@ let strEscapeSequencesRegExp,
strEscapeSequencesReplacer,
strEscapeSequencesRegExpSingle,
strEscapeSequencesReplacerSingle,
extractedSplitNewLines;
extractedSplitNewLinesSlow;
try {
// Change from regex literals to RegExp constructors to avoid unrecoverable
// syntax error at load time.
Expand All @@ -416,7 +431,7 @@ try {
"g",
);
const extractedNewLineRe = new RegExp("(?<=\\n)");
extractedSplitNewLines = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
extractedSplitNewLinesSlow = value => RegExpPrototypeSymbolSplit(extractedNewLineRe, value);
// CI doesn't run in an elderly runtime
} catch {
// These are from a previous version of node,
Expand All @@ -426,7 +441,7 @@ try {
strEscapeSequencesReplacer = /[\x00-\x1f\x27\x5c\x7f-\x9f]/g;
strEscapeSequencesRegExpSingle = /[\x00-\x1f\x5c\x7f-\x9f]/;
strEscapeSequencesReplacerSingle = /[\x00-\x1f\x5c\x7f-\x9f]/g;
extractedSplitNewLines = value => {
extractedSplitNewLinesSlow = value => {
const lines = RegExpPrototypeSymbolSplit(/\n/, value);
const last = ArrayPrototypePop(lines);
const nlLines = ArrayPrototypeMap(lines, line => line + "\n");
Expand All @@ -437,6 +452,13 @@ try {
};
}

const extractedSplitNewLines = value => {
if (typeof value === "string") {
return extractedSplitNewLinesFastPathStringsOnly(value) || extractedSplitNewLinesSlow(value);
}
return extractedSplitNewLinesSlow(value);
}

const keyStrRegExp = /^[a-zA-Z_][a-zA-Z_0-9]*$/;
const numberRegExp = /^(0|[1-9][0-9]*)$/;

Expand Down
4 changes: 2 additions & 2 deletions test/js/web/fetch/fetch.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { AnyFunction, serve, ServeOptions, Server, sleep, TCPSocketListener } from "bun";
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "bun:test";
import { chmodSync, readFileSync, rmSync, writeFileSync } from "fs";
import { bunEnv, bunExe, gc, isWindows, tls, tmpdirSync, withoutAggressiveGC } from "harness";
import { bunEnv, bunExe, gc, isBroken, isWindows, tls, tmpdirSync, withoutAggressiveGC } from "harness";
import { mkfifo } from "mkfifo";
import net from "net";
import { join } from "path";
Expand Down Expand Up @@ -2010,7 +2010,7 @@ describe("http/1.1 response body length", () => {
expect(response.arrayBuffer()).resolves.toHaveLength(0);
});

it("should ignore body on HEAD", async () => {
it.todoIf(isBroken)("should ignore body on HEAD", async () => {
const response = await fetch(`http://${getHost()}/text`, { method: "HEAD" });
expect(response.status).toBe(200);
expect(response.arrayBuffer()).resolves.toHaveLength(0);
Expand Down

0 comments on commit 5d66e06

Please sign in to comment.