From 68f6d0a0f37ef166adb75e43cfaee61f90ff8242 Mon Sep 17 00:00:00 2001 From: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> Date: Thu, 3 Aug 2023 19:21:30 -0700 Subject: [PATCH] Fix thread safety issue in async fs functions file paths --- src/ArenaAllocator.zig | 11 +++++------ src/bun.js/bindings/BunString.cpp | 11 +++++++++++ src/bun.js/node/node_fs.zig | 9 ++++++--- src/bun.js/node/types.zig | 12 +++++++++++ src/string.zig | 24 ++++++++++++++++++++++ test/js/node/fs/fs.test.ts | 33 +++++++++++++++++++++++++++++++ 6 files changed, 91 insertions(+), 9 deletions(-) diff --git a/src/ArenaAllocator.zig b/src/ArenaAllocator.zig index 2ccb08d19aea5..c2d8718fd4d3a 100644 --- a/src/ArenaAllocator.zig +++ b/src/ArenaAllocator.zig @@ -1,4 +1,3 @@ -/// TODO: delete this once we've upgraded Zig and https://github.com/ziglang/zig/pull/15985 is merged. const std = @import("std"); const assert = std.debug.assert; const mem = std.mem; @@ -152,7 +151,7 @@ pub const ArenaAllocator = struct { return false; }; self.child_allocator.rawFree(first_alloc_buf, align_bits, @returnAddress()); - const node = @as(*BufNode, @ptrCast(@alignCast(new_ptr))); + const node: *BufNode = @ptrCast(@alignCast(new_ptr)); node.* = .{ .data = total_size }; self.state.buffer_list.first = node; } @@ -167,7 +166,7 @@ pub const ArenaAllocator = struct { const log2_align = comptime std.math.log2_int(usize, @alignOf(BufNode)); const ptr = self.child_allocator.rawAlloc(len, log2_align, @returnAddress()) orelse return null; - const buf_node = @as(*BufNode, @ptrCast(@alignCast(ptr))); + const buf_node: *BufNode = @ptrCast(@alignCast(ptr)); buf_node.* = .{ .data = len }; self.state.buffer_list.prepend(buf_node); self.state.end_index = 0; @@ -175,7 +174,7 @@ pub const ArenaAllocator = struct { } fn alloc(ctx: *anyopaque, n: usize, log2_ptr_align: u8, ra: usize) ?[*]u8 { - const self = @as(*ArenaAllocator, @ptrCast(@alignCast(ctx))); + const self: *ArenaAllocator = @ptrCast(@alignCast(ctx)); _ = ra; const ptr_align = @as(usize, 1) << @as(Allocator.Log2Align, @intCast(log2_ptr_align)); @@ -209,7 +208,7 @@ pub const ArenaAllocator = struct { } fn resize(ctx: *anyopaque, buf: []u8, log2_buf_align: u8, new_len: usize, ret_addr: usize) bool { - const self = @as(*ArenaAllocator, @ptrCast(@alignCast(ctx))); + const self: *ArenaAllocator = @ptrCast(@alignCast(ctx)); _ = log2_buf_align; _ = ret_addr; @@ -236,7 +235,7 @@ pub const ArenaAllocator = struct { _ = log2_buf_align; _ = ret_addr; - const self = @as(*ArenaAllocator, @ptrCast(@alignCast(ctx))); + const self: *ArenaAllocator = @ptrCast(@alignCast(ctx)); const cur_node = self.state.buffer_list.first orelse return; const cur_buf = @as([*]u8, @ptrCast(cur_node))[@sizeOf(BufNode)..cur_node.data]; diff --git a/src/bun.js/bindings/BunString.cpp b/src/bun.js/bindings/BunString.cpp index 630d9be76684a..09b545cba0880 100644 --- a/src/bun.js/bindings/BunString.cpp +++ b/src/bun.js/bindings/BunString.cpp @@ -96,6 +96,17 @@ BunString fromJS(JSC::JSGlobalObject* globalObject, JSValue value) return { BunStringTag::WTFStringImpl, { .wtf = wtfString.impl() } }; } +extern "C" void BunString__toThreadSafe(BunString* str) +{ + if (str->tag == BunStringTag::WTFStringImpl) { + auto impl = str->impl.wtf->isolatedCopy(); + if (impl.ptr() != str->impl.wtf) { + impl->ref(); + str->impl.wtf = &impl.leakRef(); + } + } +} + BunString toString(JSC::JSGlobalObject* globalObject, JSValue value) { return fromJS(globalObject, value); diff --git a/src/bun.js/node/node_fs.zig b/src/bun.js/node/node_fs.zig index 2068cca3f3a85..826fde635a39d 100644 --- a/src/bun.js/node/node_fs.zig +++ b/src/bun.js/node/node_fs.zig @@ -74,6 +74,7 @@ pub const AsyncReaddirTask = struct { .arena = arena, }; task.ref.ref(vm); + task.args.path.toThreadSafe(); JSC.WorkPool.schedule(&task.task); @@ -157,6 +158,7 @@ pub const AsyncStatTask = struct { .arena = arena, }; task.ref.ref(vm); + task.args.path.toThreadSafe(); JSC.WorkPool.schedule(&task.task); @@ -240,6 +242,7 @@ pub const AsyncReadFileTask = struct { .arena = arena, }; task.ref.ref(vm); + task.args.path.toThreadSafe(); JSC.WorkPool.schedule(&task.task); @@ -904,7 +907,7 @@ pub const Arguments = struct { } pub fn fromJS(ctx: JSC.C.JSContextRef, arguments: *ArgumentsSlice, exception: JSC.C.ExceptionRef) ?Stat { - const path = PathLike.fromJS(ctx, arguments, exception) orelse { + const path = PathLike.fromJSWithAllocator(ctx, arguments, bun.default_allocator, exception) orelse { if (exception.* == null) { JSC.throwInvalidArguments( "path must be a string or TypedArray", @@ -1405,7 +1408,7 @@ pub const Arguments = struct { } pub fn fromJS(ctx: JSC.C.JSContextRef, arguments: *ArgumentsSlice, exception: JSC.C.ExceptionRef) ?Readdir { - const path = PathLike.fromJS(ctx, arguments, exception) orelse { + const path = PathLike.fromJSWithAllocator(ctx, arguments, bun.default_allocator, exception) orelse { if (exception.* == null) { JSC.throwInvalidArguments( "path must be a string or TypedArray", @@ -1972,7 +1975,7 @@ pub const Arguments = struct { } pub fn fromJS(ctx: JSC.C.JSContextRef, arguments: *ArgumentsSlice, exception: JSC.C.ExceptionRef) ?ReadFile { - const path = PathOrFileDescriptor.fromJS(ctx, arguments, arguments.arena.allocator(), exception) orelse { + const path = PathOrFileDescriptor.fromJS(ctx, arguments, bun.default_allocator, exception) orelse { if (exception.* == null) { JSC.throwInvalidArguments( "path must be a string or a file descriptor", diff --git a/src/bun.js/node/types.zig b/src/bun.js/node/types.zig index 81073463a9c2f..3ba3db5b94b0c 100644 --- a/src/bun.js/node/types.zig +++ b/src/bun.js/node/types.zig @@ -629,6 +629,12 @@ pub const PathLike = union(Tag) { } } + pub fn toThreadSafe(this: *PathLike) void { + if (this.* == .slice_with_underlying_string) { + this.slice_with_underlying_string.toThreadSafe(); + } + } + pub fn deinitAndUnprotect(this: *const PathLike) void { switch (this.*) { .slice_with_underlying_string => |val| { @@ -1050,6 +1056,12 @@ pub const PathOrFileDescriptor = union(Tag) { } } + pub fn toThreadSafe(this: *PathOrFileDescriptor) void { + if (this.* == .path) { + this.path.toThreadSafe(); + } + } + pub fn deinitAndUnprotect(this: PathOrFileDescriptor) void { if (this == .path) { this.path.deinitAndUnprotect(); diff --git a/src/string.zig b/src/string.zig index 4fb6e001a0bb1..9002234cf37ca 100644 --- a/src/string.zig +++ b/src/string.zig @@ -829,6 +829,13 @@ pub const String = extern struct { return bun.strings.eqlLong(this.byteSlice(), value, true); } + extern fn BunString__toThreadSafe(this: *String) void; + pub fn toThreadSafe(this: *String) void { + if (this.tag == .WTFStringImpl) { + BunString__toThreadSafe(this); + } + } + pub fn eql(this: String, other: String) bool { return this.toZigString().eql(other.toZigString()); } @@ -838,6 +845,23 @@ pub const SliceWithUnderlyingString = struct { utf8: ZigString.Slice, underlying: String, + pub fn toThreadSafe(this: *SliceWithUnderlyingString) void { + std.debug.assert(this.underlying.tag == .WTFStringImpl); + + var orig = this.underlying.value.WTFStringImpl; + this.underlying.toThreadSafe(); + if (this.underlying.value.WTFStringImpl != orig) { + orig.deref(); + + if (this.utf8.allocator.get()) |allocator| { + if (String.isWTFAllocator(allocator)) { + this.utf8.deinit(); + this.utf8 = this.underlying.toUTF8(bun.default_allocator); + } + } + } + } + pub fn deinit(this: SliceWithUnderlyingString) void { this.utf8.deinit(); this.underlying.deref(); diff --git a/test/js/node/fs/fs.test.ts b/test/js/node/fs/fs.test.ts index 17c010fb63abb..86e64b7132740 100644 --- a/test/js/node/fs/fs.test.ts +++ b/test/js/node/fs/fs.test.ts @@ -229,6 +229,39 @@ it("promises.readFile", async () => { } }); +it("promises.readFile - UTF16 file path", async () => { + const dest = `/tmp/superduperduperdupduperdupersuperduperduperduperduperduperdupersuperduperduperduperduperduperdupersuperduperduperdupe-Bun-👍-${Date.now()}-${ + (Math.random() * 1024000) | 0 + }.txt`; + await fs.promises.copyFile(import.meta.path, dest); + const expected = readFileSync(import.meta.path, "utf-8"); + Bun.gc(true); + for (let i = 0; i < 100; i++) { + expect(await fs.promises.readFile(dest, "utf-8")).toEqual(expected); + } + Bun.gc(true); +}); + +it("promises.readFile - atomized file path", async () => { + const destInput = `/tmp/superduperduperdupduperdupersuperduperduperduperduperduperdupersuperduperduperduperduperduperdupersuperduperduperdupe-Bun-👍-${Date.now()}-${ + (Math.random() * 1024000) | 0 + }.txt`; + // Force it to become an atomized string by making it a property access + const dest: string = ( + { + [destInput]: destInput, + boop: 123, + } as const + )[destInput] as string; + await fs.promises.copyFile(import.meta.path, dest); + const expected = readFileSync(import.meta.path, "utf-8"); + Bun.gc(true); + for (let i = 0; i < 100; i++) { + expect(await fs.promises.readFile(dest, "utf-8")).toEqual(expected); + } + Bun.gc(true); +}); + it("promises.readFile with buffer as file path", async () => { for (let i = 0; i < 10; i++) expect(await fs.promises.readFile(Buffer.from(import.meta.path), "utf-8")).toEqual(