From a5144d19b7a3585122dafbe05f7a1ce21f61a992 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 30 Sep 2023 23:00:39 -0700 Subject: [PATCH 1/6] std.tar: support symlinks closes #16678 --- lib/std/tar.zig | 56 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index fc0d71ce02ee..d927cce30380 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -3,8 +3,13 @@ pub const Options = struct { strip_components: u32 = 0, /// How to handle the "mode" property of files from within the tar file. mode_mode: ModeMode = .executable_bit_only, + /// Provide this to receive detailed error messages. + /// When this is provided, some errors which would otherwise be returned immediately + /// will instead be added to this structure. The API user must check the errors + /// in diagnostics to know whether the operation succeeded or failed. + diagnostics: ?*Diagnostics = null, - const ModeMode = enum { + pub const ModeMode = enum { /// The mode from the tar file is completely ignored. Files are created /// with the default mode when creating files. ignore, @@ -13,6 +18,32 @@ pub const Options = struct { /// Other bits of the mode are left as the default when creating files. executable_bit_only, }; + + pub const Diagnostics = struct { + allocator: std.mem.Allocator, + errors: std.ArrayListUnmanaged(Error) = .{}, + + pub const Error = union(enum) { + unable_to_create_sym_link: struct { + code: anyerror, + file_name: []const u8, + link_name: []const u8, + }, + }; + + pub fn deinit(d: *Diagnostics) void { + for (d.errors.items) |item| { + switch (item) { + .unable_to_create_sym_link => |info| { + d.allocator.free(info.file_name); + d.allocator.free(info.link_name); + }, + } + } + d.errors.deinit(d.allocator); + d.* = undefined; + } + }; }; pub const Header = struct { @@ -65,6 +96,10 @@ pub const Header = struct { return str(header, 0, 0 + 100); } + pub fn linkName(header: Header) []const u8 { + return str(header, 157, 157 + 100); + } + pub fn prefix(header: Header) []const u8 { return str(header, 345, 345 + 155); } @@ -148,7 +183,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi const header: Header = .{ .bytes = chunk[0..512] }; const file_size = try header.fileSize(); const rounded_file_size = std.mem.alignForward(u64, file_size, 512); - const pad_len = @as(usize, @intCast(rounded_file_size - file_size)); + const pad_len: usize = @intCast(rounded_file_size - file_size); const unstripped_file_name = if (file_name_override_len > 0) file_name_buffer[0..file_name_override_len] else @@ -228,7 +263,22 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi buffer.skip(reader, @intCast(rounded_file_size)) catch return error.TarHeadersTooBig; }, .hard_link => return error.TarUnsupportedFileType, - .symbolic_link => return error.TarUnsupportedFileType, + .symbolic_link => { + const file_name = try stripComponents(unstripped_file_name, options.strip_components); + const link_name = header.linkName(); + + dir.symLink(link_name, file_name, .{}) catch |err| { + if (options.diagnostics) |d| { + try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{ + .code = err, + .file_name = try d.allocator.dupe(u8, file_name), + .link_name = try d.allocator.dupe(u8, link_name), + } }); + } else { + return error.UnableToCreateSymLink; + } + }; + }, else => return error.TarUnsupportedFileType, } } From a4352982b3ad4370543e0d4486347b58a958ed6b Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 1 Oct 2023 18:15:08 -0700 Subject: [PATCH 2/6] compiler: extract package hashing logic to separate file There are no functional changes in this commit. --- CMakeLists.txt | 1 + src/Package.zig | 128 +----------------------------------------- src/Package/hash.zig | 131 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 127 deletions(-) create mode 100644 src/Package/hash.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index ef194d3d03d1..eb53ac33dfc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -527,6 +527,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/Liveness.zig" "${CMAKE_SOURCE_DIR}/src/Module.zig" "${CMAKE_SOURCE_DIR}/src/Package.zig" + "${CMAKE_SOURCE_DIR}/src/Package/hash.zig" "${CMAKE_SOURCE_DIR}/src/RangeSet.zig" "${CMAKE_SOURCE_DIR}/src/Sema.zig" "${CMAKE_SOURCE_DIR}/src/TypedValue.zig" diff --git a/src/Package.zig b/src/Package.zig index 5802dde911f6..3c4148a5c607 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -10,7 +10,6 @@ const assert = std.debug.assert; const log = std.log.scoped(.package); const main = @import("main.zig"); const ThreadPool = std.Thread.Pool; -const WaitGroup = std.Thread.WaitGroup; const Compilation = @import("Compilation.zig"); const Module = @import("Module.zig"); @@ -18,6 +17,7 @@ const Cache = std.Build.Cache; const build_options = @import("build_options"); const Manifest = @import("Manifest.zig"); const git = @import("git.zig"); +const computePackageHash = @import("Package/hash.zig").compute; pub const Table = std.StringHashMapUnmanaged(*Package); @@ -1147,81 +1147,6 @@ fn unpackGitPack( try out_dir.deleteTree(".git"); } -const HashedFile = struct { - fs_path: []const u8, - normalized_path: []const u8, - hash: [Manifest.Hash.digest_length]u8, - failure: Error!void, - - const Error = fs.File.OpenError || fs.File.ReadError || fs.File.StatError; - - fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { - _ = context; - return mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path); - } -}; - -fn computePackageHash( - thread_pool: *ThreadPool, - pkg_dir: fs.IterableDir, -) ![Manifest.Hash.digest_length]u8 { - const gpa = thread_pool.allocator; - - // We'll use an arena allocator for the path name strings since they all - // need to be in memory for sorting. - var arena_instance = std.heap.ArenaAllocator.init(gpa); - defer arena_instance.deinit(); - const arena = arena_instance.allocator(); - - // Collect all files, recursively, then sort. - var all_files = std.ArrayList(*HashedFile).init(gpa); - defer all_files.deinit(); - - var walker = try pkg_dir.walk(gpa); - defer walker.deinit(); - - { - // The final hash will be a hash of each file hashed independently. This - // allows hashing in parallel. - var wait_group: WaitGroup = .{}; - defer wait_group.wait(); - - while (try walker.next()) |entry| { - switch (entry.kind) { - .directory => continue, - .file => {}, - else => return error.IllegalFileTypeInPackage, - } - const hashed_file = try arena.create(HashedFile); - const fs_path = try arena.dupe(u8, entry.path); - hashed_file.* = .{ - .fs_path = fs_path, - .normalized_path = try normalizePath(arena, fs_path), - .hash = undefined, // to be populated by the worker - .failure = undefined, // to be populated by the worker - }; - wait_group.start(); - try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group }); - - try all_files.append(hashed_file); - } - } - - mem.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan); - - var hasher = Manifest.Hash.init(.{}); - var any_failures = false; - for (all_files.items) |hashed_file| { - hashed_file.failure catch |err| { - any_failures = true; - std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) }); - }; - hasher.update(&hashed_file.hash); - } - if (any_failures) return error.PackageHashUnavailable; - return hasher.finalResult(); -} - /// Compute the hash of a file path. fn computePathHash(gpa: Allocator, dir: Compilation.Directory, path: []const u8) ![Manifest.Hash.digest_length]u8 { const resolved_path = try std.fs.path.resolve(gpa, &.{ dir.path.?, path }); @@ -1240,57 +1165,6 @@ fn isDirectory(root_dir: Compilation.Directory, path: []const u8) !bool { return true; } -/// Make a file system path identical independently of operating system path inconsistencies. -/// This converts backslashes into forward slashes. -fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { - const canonical_sep = '/'; - - if (fs.path.sep == canonical_sep) - return fs_path; - - const normalized = try arena.dupe(u8, fs_path); - for (normalized) |*byte| { - switch (byte.*) { - fs.path.sep => byte.* = canonical_sep, - else => continue, - } - } - return normalized; -} - -fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { - defer wg.finish(); - hashed_file.failure = hashFileFallible(dir, hashed_file); -} - -fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { - var buf: [8000]u8 = undefined; - var file = try dir.openFile(hashed_file.fs_path, .{}); - defer file.close(); - var hasher = Manifest.Hash.init(.{}); - hasher.update(hashed_file.normalized_path); - hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) }); - while (true) { - const bytes_read = try file.read(&buf); - if (bytes_read == 0) break; - hasher.update(buf[0..bytes_read]); - } - hasher.final(&hashed_file.hash); -} - -fn isExecutable(file: fs.File) !bool { - if (builtin.os.tag == .windows) { - // TODO check the ACL on Windows. - // Until this is implemented, this could be a false negative on - // Windows, which is why we do not yet set executable_bit_only above - // when unpacking the tarball. - return false; - } else { - const stat = try file.stat(); - return (stat.mode & std.os.S.IXUSR) != 0; - } -} - fn renameTmpIntoCache( cache_dir: fs.Dir, tmp_dir_sub_path: []const u8, diff --git a/src/Package/hash.zig b/src/Package/hash.zig new file mode 100644 index 000000000000..f8afa61e22fc --- /dev/null +++ b/src/Package/hash.zig @@ -0,0 +1,131 @@ +const builtin = @import("builtin"); +const std = @import("std"); +const fs = std.fs; +const ThreadPool = std.Thread.Pool; +const WaitGroup = std.Thread.WaitGroup; +const Allocator = std.mem.Allocator; + +const Hash = @import("../Manifest.zig").Hash; + +pub fn compute(thread_pool: *ThreadPool, pkg_dir: fs.IterableDir) ![Hash.digest_length]u8 { + const gpa = thread_pool.allocator; + + // We'll use an arena allocator for the path name strings since they all + // need to be in memory for sorting. + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // Collect all files, recursively, then sort. + var all_files = std.ArrayList(*HashedFile).init(gpa); + defer all_files.deinit(); + + var walker = try pkg_dir.walk(gpa); + defer walker.deinit(); + + { + // The final hash will be a hash of each file hashed independently. This + // allows hashing in parallel. + var wait_group: WaitGroup = .{}; + defer wait_group.wait(); + + while (try walker.next()) |entry| { + switch (entry.kind) { + .directory => continue, + .file => {}, + else => return error.IllegalFileTypeInPackage, + } + const hashed_file = try arena.create(HashedFile); + const fs_path = try arena.dupe(u8, entry.path); + hashed_file.* = .{ + .fs_path = fs_path, + .normalized_path = try normalizePath(arena, fs_path), + .hash = undefined, // to be populated by the worker + .failure = undefined, // to be populated by the worker + }; + wait_group.start(); + try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group }); + + try all_files.append(hashed_file); + } + } + + std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan); + + var hasher = Hash.init(.{}); + var any_failures = false; + for (all_files.items) |hashed_file| { + hashed_file.failure catch |err| { + any_failures = true; + std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) }); + }; + hasher.update(&hashed_file.hash); + } + if (any_failures) return error.PackageHashUnavailable; + return hasher.finalResult(); +} + +const HashedFile = struct { + fs_path: []const u8, + normalized_path: []const u8, + hash: [Hash.digest_length]u8, + failure: Error!void, + + const Error = fs.File.OpenError || fs.File.ReadError || fs.File.StatError; + + fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { + _ = context; + return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path); + } +}; + +/// Make a file system path identical independently of operating system path inconsistencies. +/// This converts backslashes into forward slashes. +fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { + const canonical_sep = '/'; + + if (fs.path.sep == canonical_sep) + return fs_path; + + const normalized = try arena.dupe(u8, fs_path); + for (normalized) |*byte| { + switch (byte.*) { + fs.path.sep => byte.* = canonical_sep, + else => continue, + } + } + return normalized; +} + +fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { + defer wg.finish(); + hashed_file.failure = hashFileFallible(dir, hashed_file); +} + +fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { + var buf: [8000]u8 = undefined; + var file = try dir.openFile(hashed_file.fs_path, .{}); + defer file.close(); + var hasher = Hash.init(.{}); + hasher.update(hashed_file.normalized_path); + hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) }); + while (true) { + const bytes_read = try file.read(&buf); + if (bytes_read == 0) break; + hasher.update(buf[0..bytes_read]); + } + hasher.final(&hashed_file.hash); +} + +fn isExecutable(file: fs.File) !bool { + if (builtin.os.tag == .windows) { + // TODO check the ACL on Windows. + // Until this is implemented, this could be a false negative on + // Windows, which is why we do not yet set executable_bit_only above + // when unpacking the tarball. + return false; + } else { + const stat = try file.stat(); + return (stat.mode & std.os.S.IXUSR) != 0; + } +} From 309c53295f26999065e4dc76cef4d90f8d85fb38 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 1 Oct 2023 23:04:32 -0700 Subject: [PATCH 3/6] std.fs: give readLink an explicit error set --- lib/std/fs.zig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 2429243ddaf7..0d1d506143fa 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -2003,10 +2003,12 @@ pub const Dir = struct { return os.windows.CreateSymbolicLink(self.fd, sym_link_path_w, target_path_w, flags.is_directory); } + pub const ReadLinkError = os.ReadLinkError; + /// Read value of a symbolic link. /// The return value is a slice of `buffer`, from index `0`. /// Asserts that the path parameter has no null bytes. - pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 { + pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u8 { if (builtin.os.tag == .wasi and !builtin.link_libc) { return self.readLinkWasi(sub_path, buffer); } From ef9966c9855dd855afda767f212abec6e5a36307 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 1 Oct 2023 23:05:01 -0700 Subject: [PATCH 4/6] introduce the 'zig fetch' command + symlink support zig fetch [options] zig fetch [options] Fetches a package which is found at or into the global cache directory, printing the package hash to stdout. Closes #16972 Related to #14280 Additionally, this commit: * Adds uncompressed .tar support to package fetching * Introduces symlink support to package fetching --- lib/std/tar.zig | 2 +- src/Package.zig | 236 ++++++++++++++++++++++++++----------------- src/Package/hash.zig | 44 ++++++-- src/main.zig | 127 +++++++++++++++++++++++ 4 files changed, 302 insertions(+), 107 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index d927cce30380..5ed6deed6aee 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -210,7 +210,7 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi while (true) { const temp = try buffer.readChunk(reader, @intCast(rounded_file_size + 512 - file_off)); if (temp.len == 0) return error.UnexpectedEndOfStream; - const slice = temp[0..@as(usize, @intCast(@min(file_size - file_off, temp.len)))]; + const slice = temp[0..@intCast(@min(file_size - file_off, temp.len))]; try file.writeAll(slice); file_off += slice.len; diff --git a/src/Package.zig b/src/Package.zig index 3c4148a5c607..9b3edf0d28d8 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -15,10 +15,10 @@ const Compilation = @import("Compilation.zig"); const Module = @import("Module.zig"); const Cache = std.Build.Cache; const build_options = @import("build_options"); -const Manifest = @import("Manifest.zig"); const git = @import("git.zig"); const computePackageHash = @import("Package/hash.zig").compute; +pub const Manifest = @import("Manifest.zig"); pub const Table = std.StringHashMapUnmanaged(*Package); root_src_directory: Compilation.Directory, @@ -454,8 +454,8 @@ pub fn createFilePkg( return createWithDir(gpa, cache_directory, o_dir_sub_path, basename); } -const Report = struct { - ast: *const std.zig.Ast, +pub const Report = struct { + ast: ?*const std.zig.Ast, directory: Compilation.Directory, error_bundle: *std.zig.ErrorBundle.Wip, @@ -465,6 +465,7 @@ const Report = struct { comptime fmt_string: []const u8, fmt_args: anytype, ) error{ PackageFetchFailed, OutOfMemory } { + const ast = report.ast orelse main.fatal(fmt_string, fmt_args); const gpa = report.error_bundle.gpa; const file_path = try report.directory.join(gpa, &.{Manifest.basename}); @@ -473,7 +474,7 @@ const Report = struct { const msg = try std.fmt.allocPrint(gpa, fmt_string, fmt_args); defer gpa.free(msg); - try addErrorMessage(report.ast.*, file_path, report.error_bundle, 0, .{ + try addErrorMessage(ast.*, file_path, report.error_bundle, 0, .{ .tok = tok, .off = 0, .msg = msg, @@ -482,6 +483,18 @@ const Report = struct { return error.PackageFetchFailed; } + fn addErrorWithNotes( + report: Report, + notes_len: u32, + msg: Manifest.ErrorMessage, + ) error{OutOfMemory}!void { + const ast = report.ast orelse main.fatal("{s}", .{msg.msg}); + const gpa = report.error_bundle.gpa; + const file_path = try report.directory.join(gpa, &.{Manifest.basename}); + defer gpa.free(file_path); + return addErrorMessage(ast.*, file_path, report.error_bundle, notes_len, msg); + } + fn addErrorMessage( ast: std.zig.Ast, file_path: []const u8, @@ -508,7 +521,7 @@ const Report = struct { } }; -const FetchLocation = union(enum) { +pub const FetchLocation = union(enum) { /// The relative path to a file or directory. /// This may be a file that requires unpacking (such as a .tar.gz), /// or the path to the root directory of a package. @@ -517,30 +530,27 @@ const FetchLocation = union(enum) { http_request: std.Uri, git_request: std.Uri, - pub fn init(gpa: Allocator, dep: Manifest.Dependency, root_dir: Compilation.Directory, report: Report) !FetchLocation { + pub fn init( + gpa: Allocator, + dep: Manifest.Dependency, + root_dir: Compilation.Directory, + report: Report, + ) !FetchLocation { switch (dep.location) { .url => |url| { const uri = std.Uri.parse(url) catch |err| switch (err) { error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI", .{}), else => return err, }; - if (ascii.eqlIgnoreCase(uri.scheme, "file")) { - return report.fail(dep.location_tok, "'file' scheme is not allowed for URLs. Use '.path' instead", .{}); - } else if (ascii.eqlIgnoreCase(uri.scheme, "http") or ascii.eqlIgnoreCase(uri.scheme, "https")) { - return .{ .http_request = uri }; - } else if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or ascii.eqlIgnoreCase(uri.scheme, "git+https")) { - return .{ .git_request = uri }; - } else { - return report.fail(dep.location_tok, "Unsupported URL scheme: {s}", .{uri.scheme}); - } + return initUri(uri, dep.location_tok, report); }, .path => |path| { if (fs.path.isAbsolute(path)) { - return report.fail(dep.location_tok, "Absolute paths are not allowed. Use a relative path instead", .{}); + return report.fail(dep.location_tok, "absolute paths are not allowed. Use a relative path instead", .{}); } const is_dir = isDirectory(root_dir, path) catch |err| switch (err) { - error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{path}), + error.FileNotFound => return report.fail(dep.location_tok, "file not found: {s}", .{path}), else => return err, }; @@ -552,9 +562,21 @@ const FetchLocation = union(enum) { } } + pub fn initUri(uri: std.Uri, location_tok: std.zig.Ast.TokenIndex, report: Report) !FetchLocation { + if (ascii.eqlIgnoreCase(uri.scheme, "file")) { + return report.fail(location_tok, "'file' scheme is not allowed for URLs. Use '.path' instead", .{}); + } else if (ascii.eqlIgnoreCase(uri.scheme, "http") or ascii.eqlIgnoreCase(uri.scheme, "https")) { + return .{ .http_request = uri }; + } else if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or ascii.eqlIgnoreCase(uri.scheme, "git+https")) { + return .{ .git_request = uri }; + } else { + return report.fail(location_tok, "unsupported URL scheme: {s}", .{uri.scheme}); + } + } + pub fn deinit(f: *FetchLocation, gpa: Allocator) void { switch (f.*) { - inline .file, .directory => |path| gpa.free(path), + .file, .directory => |path| gpa.free(path), .http_request, .git_request => {}, } f.* = undefined; @@ -565,7 +587,7 @@ const FetchLocation = union(enum) { gpa: Allocator, root_dir: Compilation.Directory, http_client: *std.http.Client, - dep: Manifest.Dependency, + dep_location_tok: std.zig.Ast.TokenIndex, report: Report, ) !ReadableResource { switch (f) { @@ -588,7 +610,7 @@ const FetchLocation = union(enum) { try req.wait(); if (req.response.status != .ok) { - return report.fail(dep.location_tok, "Expected response status '200 OK' got '{} {s}'", .{ + return report.fail(dep_location_tok, "expected response status '200 OK' got '{} {s}'", .{ @intFromEnum(req.response.status), req.response.status.phrase() orelse "", }); @@ -607,7 +629,7 @@ const FetchLocation = union(enum) { session.discoverCapabilities(gpa, &redirect_uri) catch |e| switch (e) { error.Redirected => { defer gpa.free(redirect_uri); - return report.fail(dep.location_tok, "Repository moved to {s}", .{redirect_uri}); + return report.fail(dep_location_tok, "repository moved to {s}", .{redirect_uri}); }, else => |other| return other, }; @@ -634,19 +656,16 @@ const FetchLocation = union(enum) { break :want_oid ref.peeled orelse ref.oid; } } - return report.fail(dep.location_tok, "Ref not found: {s}", .{want_ref}); + return report.fail(dep_location_tok, "ref not found: {s}", .{want_ref}); }; if (uri.fragment == null) { - const file_path = try report.directory.join(gpa, &.{Manifest.basename}); - defer gpa.free(file_path); - - const eb = report.error_bundle; const notes_len = 1; - try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ - .tok = dep.location_tok, + try report.addErrorWithNotes(notes_len, .{ + .tok = dep_location_tok, .off = 0, .msg = "url field is missing an explicit ref", }); + const eb = report.error_bundle; const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.printString("try .url = \"{+/}#{}\",", .{ uri, std.fmt.fmtSliceHexLower(&want_oid) }), @@ -669,12 +688,13 @@ const FetchLocation = union(enum) { } }; -const ReadableResource = struct { +pub const ReadableResource = struct { path: []const u8, resource: union(enum) { file: fs.File, http_request: std.http.Client.Request, git_fetch_stream: git.Session.FetchStream, + dir: fs.IterableDir, }, /// Unpack the package into the global cache directory. @@ -685,12 +705,12 @@ const ReadableResource = struct { allocator: Allocator, thread_pool: *ThreadPool, global_cache_directory: Compilation.Directory, - dep: Manifest.Dependency, + dep_location_tok: std.zig.Ast.TokenIndex, report: Report, pkg_prog_node: *std.Progress.Node, ) !PackageLocation { switch (rr.resource) { - inline .file, .http_request, .git_fetch_stream => |*r| { + inline .file, .http_request, .git_fetch_stream, .dir => |*r, tag| { const s = fs.path.sep_str; const rand_int = std.crypto.random.int(u64); const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); @@ -710,45 +730,58 @@ const ReadableResource = struct { }; defer tmp_directory.closeAndFree(allocator); - const opt_content_length = try rr.getSize(); - - var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{ - .child_reader = r.reader(), - .prog_node = pkg_prog_node, - .unit = if (opt_content_length) |content_length| unit: { - const kib = content_length / 1024; - const mib = kib / 1024; - if (mib > 0) { - pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); - pkg_prog_node.setUnit("MiB"); - break :unit .mib; - } else { - pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); - pkg_prog_node.setUnit("KiB"); - break :unit .kib; + if (tag != .dir) { + const opt_content_length = try rr.getSize(); + + var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{ + .child_reader = r.reader(), + .prog_node = pkg_prog_node, + .unit = if (opt_content_length) |content_length| unit: { + const kib = content_length / 1024; + const mib = kib / 1024; + if (mib > 0) { + pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); + pkg_prog_node.setUnit("MiB"); + break :unit .mib; + } else { + pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); + pkg_prog_node.setUnit("KiB"); + break :unit .kib; + } + } else .any, + }; + + switch (try rr.getFileType(dep_location_tok, report)) { + .tar => try unpackTarball(prog_reader.reader(), tmp_directory.handle), + .@"tar.gz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, std.compress.gzip), + .@"tar.xz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, std.compress.xz), + .git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle), + } + } else { + // Recursive directory copy. + var it = try r.walk(allocator); + defer it.deinit(); + while (try it.next()) |entry| { + switch (entry.kind) { + .directory => try tmp_directory.handle.makePath(entry.path), + .file => try r.dir.copyFile( + entry.path, + tmp_directory.handle, + entry.path, + .{}, + ), + .sym_link => { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const link_name = try r.dir.readLink(entry.path, &buf); + // TODO: if this would create a symlink to outside + // the destination directory, fail with an error instead. + try tmp_directory.handle.symLink(link_name, entry.path, .{}); + }, + else => return error.IllegalFileTypeInPackage, } - } else .any, - }; - pkg_prog_node.context.refresh(); - - switch (try rr.getFileType(dep, report)) { - .@"tar.gz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.gzip), - // I have not checked what buffer sizes the xz decompression implementation uses - // by default, so the same logic applies for buffering the reader as for gzip. - .@"tar.xz" => try unpackTarball(allocator, prog_reader, tmp_directory.handle, std.compress.xz), - .git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle), + } } - // Unpack completed - stop showing amount as progress - pkg_prog_node.setEstimatedTotalItems(0); - pkg_prog_node.setCompletedItems(0); - pkg_prog_node.context.refresh(); - - // TODO: delete files not included in the package prior to computing the package hash. - // for example, if the ini file has directives to include/not include certain files, - // apply those rules directly to the filesystem right here. This ensures that files - // not protected by the hash are not present on the file system. - break :h try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); }; @@ -769,6 +802,7 @@ const ReadableResource = struct { } const FileType = enum { + tar, @"tar.gz", @"tar.xz", git_pack, @@ -780,21 +814,28 @@ const ReadableResource = struct { // TODO: Handle case of chunked content-length .http_request => |req| return req.response.content_length, .git_fetch_stream => |stream| return stream.request.response.content_length, + .dir => unreachable, } } - pub fn getFileType(rr: ReadableResource, dep: Manifest.Dependency, report: Report) !FileType { + pub fn getFileType( + rr: ReadableResource, + dep_location_tok: std.zig.Ast.TokenIndex, + report: Report, + ) !FileType { switch (rr.resource) { .file => { return fileTypeFromPath(rr.path) orelse - return report.fail(dep.location_tok, "Unknown file type", .{}); + return report.fail(dep_location_tok, "unknown file type", .{}); }, .http_request => |req| { const content_type = req.response.headers.getFirstValue("Content-Type") orelse - return report.fail(dep.location_tok, "Missing 'Content-Type' header", .{}); + return report.fail(dep_location_tok, "missing 'Content-Type' header", .{}); // If the response has a different content type than the URI indicates, override // the previously assumed file type. + if (ascii.eqlIgnoreCase(content_type, "application/x-tar")) return .tar; + return if (ascii.eqlIgnoreCase(content_type, "application/gzip") or ascii.eqlIgnoreCase(content_type, "application/x-gzip") or ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) @@ -805,22 +846,21 @@ const ReadableResource = struct { // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz // whose content-disposition header is: 'attachment; filename="-.tar.gz"' const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse - return report.fail(dep.location_tok, "Missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); + return report.fail(dep_location_tok, "missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); break :ty getAttachmentType(content_disposition) orelse - return report.fail(dep.location_tok, "Unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); - } else return report.fail(dep.location_tok, "Unrecognized value for 'Content-Type' header: {s}", .{content_type}); + return report.fail(dep_location_tok, "unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); + } else return report.fail(dep_location_tok, "unrecognized value for 'Content-Type' header: {s}", .{content_type}); }, .git_fetch_stream => return .git_pack, + .dir => unreachable, } } fn fileTypeFromPath(file_path: []const u8) ?FileType { - return if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) - .@"tar.gz" - else if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) - .@"tar.xz" - else - null; + if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar; + if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) return .@"tar.gz"; + if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz"; + return null; } fn getAttachmentType(content_disposition: []const u8) ?FileType { @@ -847,6 +887,7 @@ const ReadableResource = struct { .file => |file| file.close(), .http_request => |*req| req.deinit(), .git_fetch_stream => |*stream| stream.deinit(), + .dir => |*dir| dir.close(), } rr.* = undefined; } @@ -908,7 +949,7 @@ fn ProgressReader(comptime ReaderType: type) type { } }, } - self.prog_node.context.maybeRefresh(); + self.prog_node.activate(); return amt; } @@ -993,7 +1034,7 @@ fn getDirectoryModule( if (all_modules.get(hex_digest)) |mod| return .{ mod.?, true }; var pkg_dir = directory.handle.openDir(fetch_location.directory, .{}) catch |err| switch (err) { - error.FileNotFound => return report.fail(dep.location_tok, "File not found: {s}", .{fetch_location.directory}), + error.FileNotFound => return report.fail(dep.location_tok, "file not found: {s}", .{fetch_location.directory}), else => |e| return e, }; defer pkg_dir.close(); @@ -1032,12 +1073,18 @@ fn fetchAndUnpack( var pkg_prog_node = root_prog_node.start(name_for_prog, 0); defer pkg_prog_node.end(); pkg_prog_node.activate(); - pkg_prog_node.context.refresh(); - var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep, report); + var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep.location_tok, report); defer readable_resource.deinit(gpa); - var package_location = try readable_resource.unpack(gpa, thread_pool, global_cache_directory, dep, report, &pkg_prog_node); + var package_location = try readable_resource.unpack( + gpa, + thread_pool, + global_cache_directory, + dep.location_tok, + report, + &pkg_prog_node, + ); defer package_location.deinit(gpa); const actual_hex = Manifest.hexDigest(package_location.hash); @@ -1048,16 +1095,13 @@ fn fetchAndUnpack( }); } } else { - const file_path = try report.directory.join(gpa, &.{Manifest.basename}); - defer gpa.free(file_path); - - const eb = report.error_bundle; const notes_len = 1; - try Report.addErrorMessage(report.ast.*, file_path, eb, notes_len, .{ + try report.addErrorWithNotes(notes_len, .{ .tok = dep.location_tok, .off = 0, .msg = "dependency is missing hash field", }); + const eb = report.error_bundle; const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), @@ -1080,18 +1124,22 @@ fn fetchAndUnpack( return module; } -fn unpackTarball( +fn unpackTarballCompressed( gpa: Allocator, reader: anytype, out_dir: fs.Dir, - comptime compression: type, + comptime Compression: type, ) !void { var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); - var decompress = try compression.decompress(gpa, br.reader()); + var decompress = try Compression.decompress(gpa, br.reader()); defer decompress.deinit(); - try std.tar.pipeToFileSystem(out_dir, decompress.reader(), .{ + return unpackTarball(decompress.reader(), out_dir); +} + +fn unpackTarball(reader: anytype, out_dir: fs.Dir) !void { + try std.tar.pipeToFileSystem(out_dir, reader, .{ .strip_components = 1, // TODO: we would like to set this to executable_bit_only, but two // things need to happen before that: @@ -1126,7 +1174,6 @@ fn unpackGitPack( var index_prog_node = reader.prog_node.start("Index pack", 0); defer index_prog_node.end(); index_prog_node.activate(); - index_prog_node.context.refresh(); var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); try git.indexPack(gpa, pack_file, index_buffered_writer.writer()); try index_buffered_writer.flush(); @@ -1137,7 +1184,6 @@ fn unpackGitPack( var checkout_prog_node = reader.prog_node.start("Checkout", 0); defer checkout_prog_node.end(); checkout_prog_node.activate(); - checkout_prog_node.context.refresh(); var repository = try git.Repository.init(gpa, pack_file, index_file); defer repository.deinit(); try repository.checkout(out_dir, want_oid); diff --git a/src/Package/hash.zig b/src/Package/hash.zig index f8afa61e22fc..b14ec70244a3 100644 --- a/src/Package/hash.zig +++ b/src/Package/hash.zig @@ -16,6 +16,11 @@ pub fn compute(thread_pool: *ThreadPool, pkg_dir: fs.IterableDir) ![Hash.digest_ defer arena_instance.deinit(); const arena = arena_instance.allocator(); + // TODO: delete files not included in the package prior to computing the package hash. + // for example, if the ini file has directives to include/not include certain files, + // apply those rules directly to the filesystem right here. This ensures that files + // not protected by the hash are not present on the file system. + // Collect all files, recursively, then sort. var all_files = std.ArrayList(*HashedFile).init(gpa); defer all_files.deinit(); @@ -30,16 +35,18 @@ pub fn compute(thread_pool: *ThreadPool, pkg_dir: fs.IterableDir) ![Hash.digest_ defer wait_group.wait(); while (try walker.next()) |entry| { - switch (entry.kind) { + const kind: HashedFile.Kind = switch (entry.kind) { .directory => continue, - .file => {}, + .file => .file, + .sym_link => .sym_link, else => return error.IllegalFileTypeInPackage, - } + }; const hashed_file = try arena.create(HashedFile); const fs_path = try arena.dupe(u8, entry.path); hashed_file.* = .{ .fs_path = fs_path, .normalized_path = try normalizePath(arena, fs_path), + .kind = kind, .hash = undefined, // to be populated by the worker .failure = undefined, // to be populated by the worker }; @@ -70,8 +77,15 @@ const HashedFile = struct { normalized_path: []const u8, hash: [Hash.digest_length]u8, failure: Error!void, + kind: Kind, - const Error = fs.File.OpenError || fs.File.ReadError || fs.File.StatError; + const Error = + fs.File.OpenError || + fs.File.ReadError || + fs.File.StatError || + fs.Dir.ReadLinkError; + + const Kind = enum { file, sym_link }; fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { _ = context; @@ -104,15 +118,23 @@ fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { var buf: [8000]u8 = undefined; - var file = try dir.openFile(hashed_file.fs_path, .{}); - defer file.close(); var hasher = Hash.init(.{}); hasher.update(hashed_file.normalized_path); - hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) }); - while (true) { - const bytes_read = try file.read(&buf); - if (bytes_read == 0) break; - hasher.update(buf[0..bytes_read]); + switch (hashed_file.kind) { + .file => { + var file = try dir.openFile(hashed_file.fs_path, .{}); + defer file.close(); + hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) }); + while (true) { + const bytes_read = try file.read(&buf); + if (bytes_read == 0) break; + hasher.update(buf[0..bytes_read]); + } + }, + .sym_link => { + const link_name = try dir.readLink(hashed_file.fs_path, &buf); + hasher.update(link_name); + }, } hasher.final(&hashed_file.hash); } diff --git a/src/main.zig b/src/main.zig index 26b08a9d47ab..e26efc325117 100644 --- a/src/main.zig +++ b/src/main.zig @@ -84,6 +84,7 @@ const normal_usage = \\Commands: \\ \\ build Build project from build.zig + \\ fetch Copy a package into global cache and print its hash \\ init-exe Initialize a `zig build` application in the cwd \\ init-lib Initialize a `zig build` library in the cwd \\ @@ -303,6 +304,8 @@ pub fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi return cmdFmt(gpa, arena, cmd_args); } else if (mem.eql(u8, cmd, "objcopy")) { return @import("objcopy.zig").cmdObjCopy(gpa, arena, cmd_args); + } else if (mem.eql(u8, cmd, "fetch")) { + return cmdFetch(gpa, arena, cmd_args); } else if (mem.eql(u8, cmd, "libc")) { return cmdLibC(gpa, cmd_args); } else if (mem.eql(u8, cmd, "init-exe")) { @@ -6589,3 +6592,127 @@ fn parseRcIncludes(arg: []const u8) Compilation.RcIncludes { return std.meta.stringToEnum(Compilation.RcIncludes, arg) orelse fatal("unsupported rc includes type: '{s}'", .{arg}); } + +pub const usage_fetch = + \\Usage: zig fetch [options] + \\Usage: zig fetch [options] + \\ + \\ Copy a package into the global cache and print its hash. + \\ + \\Options: + \\ -h, --help Print this help and exit + \\ --global-cache-dir [path] Override path to global Zig cache directory + \\ +; + +fn cmdFetch( + gpa: Allocator, + arena: Allocator, + args: []const []const u8, +) !void { + var opt_url: ?[]const u8 = null; + var override_global_cache_dir: ?[]const u8 = try optionalStringEnvVar(arena, "ZIG_GLOBAL_CACHE_DIR"); + + { + var i: usize = 0; + while (i < args.len) : (i += 1) { + const arg = args[i]; + if (mem.startsWith(u8, arg, "-")) { + if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) { + const stdout = io.getStdOut().writer(); + try stdout.writeAll(usage_fetch); + return cleanExit(); + } else if (mem.eql(u8, arg, "--global-cache-dir")) { + if (i + 1 >= args.len) fatal("expected argument after '{s}'", .{arg}); + i += 1; + override_global_cache_dir = args[i]; + continue; + } else { + fatal("unrecognized parameter: '{s}'", .{arg}); + } + } else if (opt_url != null) { + fatal("unexpected extra parameter: '{s}'", .{arg}); + } else { + opt_url = arg; + } + } + } + + const url = opt_url orelse fatal("missing url or path parameter", .{}); + + var thread_pool: ThreadPool = undefined; + try thread_pool.init(.{ .allocator = gpa }); + defer thread_pool.deinit(); + + var http_client: std.http.Client = .{ .allocator = gpa }; + defer http_client.deinit(); + + var progress: std.Progress = .{ .dont_print_on_dumb = true }; + const root_prog_node = progress.start("Fetch", 0); + defer root_prog_node.end(); + + var report: Package.Report = .{ + .ast = null, + .directory = undefined, + .error_bundle = undefined, + }; + + var global_cache_directory: Compilation.Directory = l: { + const p = override_global_cache_dir orelse try introspect.resolveGlobalCacheDir(arena); + break :l .{ + .handle = try fs.cwd().makeOpenPath(p, .{}), + .path = p, + }; + }; + defer global_cache_directory.handle.close(); + + var readable_resource: Package.ReadableResource = rr: { + if (fs.cwd().openIterableDir(url, .{})) |dir| { + break :rr .{ + .path = try gpa.dupe(u8, url), + .resource = .{ .dir = dir }, + }; + } else |dir_err| { + const file_err = if (dir_err == error.NotDir) e: { + if (fs.cwd().openFile(url, .{})) |f| { + break :rr .{ + .path = try gpa.dupe(u8, url), + .resource = .{ .file = f }, + }; + } else |err| break :e err; + } else dir_err; + + const uri = std.Uri.parse(url) catch |uri_err| { + fatal("'{s}' could not be recognized as a file path ({s}) or an URL ({s})", .{ + url, @errorName(file_err), @errorName(uri_err), + }); + }; + const fetch_location = try Package.FetchLocation.initUri(uri, 0, report); + const cwd: Cache.Directory = .{ + .handle = fs.cwd(), + .path = null, + }; + break :rr try fetch_location.fetch(gpa, cwd, &http_client, 0, report); + } + }; + defer readable_resource.deinit(gpa); + + var package_location = try readable_resource.unpack( + gpa, + &thread_pool, + global_cache_directory, + 0, + report, + root_prog_node, + ); + defer package_location.deinit(gpa); + + const hex_digest = Package.Manifest.hexDigest(package_location.hash); + + progress.done = true; + progress.refresh(); + + try io.getStdOut().writeAll(hex_digest ++ "\n"); + + return cleanExit(); +} From 21181181bf1060d5e55738651c109d7c47647633 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 2 Oct 2023 17:00:45 -0700 Subject: [PATCH 5/6] zig fetch: enhanced error reporting * Package: use std.tar diagnostics to give detailed error messages * std.tar: add diagnostic for unsupported file type --- lib/std/tar.zig | 36 +++++++---- lib/std/zig/ErrorBundle.zig | 24 ++++---- src/Package.zig | 119 ++++++++++++++++++++++++++++-------- src/main.zig | 24 ++++++-- 4 files changed, 148 insertions(+), 55 deletions(-) diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 5ed6deed6aee..b71c118d52b0 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -29,6 +29,10 @@ pub const Options = struct { file_name: []const u8, link_name: []const u8, }, + unsupported_file_type: struct { + file_name: []const u8, + file_type: Header.FileType, + }, }; pub fn deinit(d: *Diagnostics) void { @@ -38,6 +42,9 @@ pub const Options = struct { d.allocator.free(info.file_name); d.allocator.free(info.link_name); }, + .unsupported_file_type => |info| { + d.allocator.free(info.file_name); + }, } } d.errors.deinit(d.allocator); @@ -50,6 +57,7 @@ pub const Header = struct { bytes: *const [512]u8, pub const FileType = enum(u8) { + normal_alias = 0, normal = '0', hard_link = '1', symbolic_link = '2', @@ -105,8 +113,9 @@ pub const Header = struct { } pub fn fileType(header: Header) FileType { - const result = @as(FileType, @enumFromInt(header.bytes[156])); - return if (result == @as(FileType, @enumFromInt(0))) .normal else result; + const result: FileType = @enumFromInt(header.bytes[156]); + if (result == .normal_alias) return .normal; + return result; } fn str(header: Header, start: usize, end: usize) []const u8 { @@ -268,18 +277,21 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi const link_name = header.linkName(); dir.symLink(link_name, file_name, .{}) catch |err| { - if (options.diagnostics) |d| { - try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{ - .code = err, - .file_name = try d.allocator.dupe(u8, file_name), - .link_name = try d.allocator.dupe(u8, link_name), - } }); - } else { - return error.UnableToCreateSymLink; - } + const d = options.diagnostics orelse return error.UnableToCreateSymLink; + try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{ + .code = err, + .file_name = try d.allocator.dupe(u8, file_name), + .link_name = try d.allocator.dupe(u8, link_name), + } }); }; }, - else => return error.TarUnsupportedFileType, + else => |file_type| { + const d = options.diagnostics orelse return error.TarUnsupportedFileType; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, unstripped_file_name), + .file_type = file_type, + } }); + }, } } } diff --git a/lib/std/zig/ErrorBundle.zig b/lib/std/zig/ErrorBundle.zig index 141cdb119531..cbde70bd7076 100644 --- a/lib/std/zig/ErrorBundle.zig +++ b/lib/std/zig/ErrorBundle.zig @@ -202,7 +202,7 @@ fn renderErrorMessageToWriter( try counting_stderr.writeAll(": "); // This is the length of the part before the error message: // e.g. "file.zig:4:5: error: " - const prefix_len = @as(usize, @intCast(counting_stderr.context.bytes_written)); + const prefix_len: usize = @intCast(counting_stderr.context.bytes_written); try ttyconf.setColor(stderr, .reset); try ttyconf.setColor(stderr, .bold); if (err_msg.count == 1) { @@ -356,7 +356,7 @@ pub const Wip = struct { } const compile_log_str_index = if (compile_log_text.len == 0) 0 else str: { - const str = @as(u32, @intCast(wip.string_bytes.items.len)); + const str: u32 = @intCast(wip.string_bytes.items.len); try wip.string_bytes.ensureUnusedCapacity(gpa, compile_log_text.len + 1); wip.string_bytes.appendSliceAssumeCapacity(compile_log_text); wip.string_bytes.appendAssumeCapacity(0); @@ -364,8 +364,8 @@ pub const Wip = struct { }; wip.setExtra(0, ErrorMessageList{ - .len = @as(u32, @intCast(wip.root_list.items.len)), - .start = @as(u32, @intCast(wip.extra.items.len)), + .len = @intCast(wip.root_list.items.len), + .start = @intCast(wip.extra.items.len), .compile_log_text = compile_log_str_index, }); try wip.extra.appendSlice(gpa, @as([]const u32, @ptrCast(wip.root_list.items))); @@ -385,7 +385,7 @@ pub const Wip = struct { pub fn addString(wip: *Wip, s: []const u8) !u32 { const gpa = wip.gpa; - const index = @as(u32, @intCast(wip.string_bytes.items.len)); + const index: u32 = @intCast(wip.string_bytes.items.len); try wip.string_bytes.ensureUnusedCapacity(gpa, s.len + 1); wip.string_bytes.appendSliceAssumeCapacity(s); wip.string_bytes.appendAssumeCapacity(0); @@ -394,7 +394,7 @@ pub const Wip = struct { pub fn printString(wip: *Wip, comptime fmt: []const u8, args: anytype) !u32 { const gpa = wip.gpa; - const index = @as(u32, @intCast(wip.string_bytes.items.len)); + const index: u32 = @intCast(wip.string_bytes.items.len); try wip.string_bytes.writer(gpa).print(fmt, args); try wip.string_bytes.append(gpa, 0); return index; @@ -406,15 +406,15 @@ pub const Wip = struct { } pub fn addErrorMessage(wip: *Wip, em: ErrorMessage) !MessageIndex { - return @as(MessageIndex, @enumFromInt(try addExtra(wip, em))); + return @enumFromInt(try addExtra(wip, em)); } pub fn addErrorMessageAssumeCapacity(wip: *Wip, em: ErrorMessage) MessageIndex { - return @as(MessageIndex, @enumFromInt(addExtraAssumeCapacity(wip, em))); + return @enumFromInt(addExtraAssumeCapacity(wip, em)); } pub fn addSourceLocation(wip: *Wip, sl: SourceLocation) !SourceLocationIndex { - return @as(SourceLocationIndex, @enumFromInt(try addExtra(wip, sl))); + return @enumFromInt(try addExtra(wip, sl)); } pub fn addReferenceTrace(wip: *Wip, rt: ReferenceTrace) !void { @@ -430,7 +430,7 @@ pub const Wip = struct { const other_list = other.getMessages(); // The ensureUnusedCapacity call above guarantees this. - const notes_start = wip.reserveNotes(@as(u32, @intCast(other_list.len))) catch unreachable; + const notes_start = wip.reserveNotes(@intCast(other_list.len)) catch unreachable; for (notes_start.., other_list) |note, message| { wip.extra.items[note] = @intFromEnum(wip.addOtherMessage(other, message) catch unreachable); } @@ -455,7 +455,7 @@ pub const Wip = struct { try wip.extra.ensureUnusedCapacity(wip.gpa, notes_len + notes_len * @typeInfo(ErrorBundle.ErrorMessage).Struct.fields.len); wip.extra.items.len += notes_len; - return @as(u32, @intCast(wip.extra.items.len - notes_len)); + return @intCast(wip.extra.items.len - notes_len); } fn addOtherMessage(wip: *Wip, other: ErrorBundle, msg_index: MessageIndex) !MessageIndex { @@ -510,7 +510,7 @@ pub const Wip = struct { fn addExtraAssumeCapacity(wip: *Wip, extra: anytype) u32 { const fields = @typeInfo(@TypeOf(extra)).Struct.fields; - const result = @as(u32, @intCast(wip.extra.items.len)); + const result: u32 = @intCast(wip.extra.items.len); wip.extra.items.len += fields.len; setExtra(wip, result, extra); return result; diff --git a/src/Package.zig b/src/Package.zig index 9b3edf0d28d8..37e5d2cf305e 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -285,7 +285,8 @@ pub fn fetchAndAddDependencies( if (manifest.errors.len > 0) { const file_path = try directory.join(arena, &.{Manifest.basename}); for (manifest.errors) |msg| { - try Report.addErrorMessage(ast, file_path, error_bundle, 0, msg); + const str = try error_bundle.addString(msg.msg); + try Report.addErrorMessage(&ast, file_path, error_bundle, 0, str, msg.tok, msg.off); } return error.PackageFetchFailed; } @@ -465,20 +466,31 @@ pub const Report = struct { comptime fmt_string: []const u8, fmt_args: anytype, ) error{ PackageFetchFailed, OutOfMemory } { - const ast = report.ast orelse main.fatal(fmt_string, fmt_args); + const msg = try report.error_bundle.printString(fmt_string, fmt_args); + return failMsg(report, tok, msg); + } + + fn failMsg( + report: Report, + tok: std.zig.Ast.TokenIndex, + msg: u32, + ) error{ PackageFetchFailed, OutOfMemory } { const gpa = report.error_bundle.gpa; const file_path = try report.directory.join(gpa, &.{Manifest.basename}); defer gpa.free(file_path); - const msg = try std.fmt.allocPrint(gpa, fmt_string, fmt_args); - defer gpa.free(msg); + const eb = report.error_bundle; - try addErrorMessage(ast.*, file_path, report.error_bundle, 0, .{ - .tok = tok, - .off = 0, - .msg = msg, - }); + if (report.ast) |ast| { + try addErrorMessage(ast, file_path, eb, 0, msg, tok, 0); + } else { + try eb.addRootErrorMessage(.{ + .msg = msg, + .src_loc = .none, + .notes_len = 0, + }); + } return error.PackageFetchFailed; } @@ -488,31 +500,42 @@ pub const Report = struct { notes_len: u32, msg: Manifest.ErrorMessage, ) error{OutOfMemory}!void { - const ast = report.ast orelse main.fatal("{s}", .{msg.msg}); - const gpa = report.error_bundle.gpa; - const file_path = try report.directory.join(gpa, &.{Manifest.basename}); - defer gpa.free(file_path); - return addErrorMessage(ast.*, file_path, report.error_bundle, notes_len, msg); + const eb = report.error_bundle; + const msg_str = try eb.addString(msg.msg); + if (report.ast) |ast| { + const gpa = eb.gpa; + const file_path = try report.directory.join(gpa, &.{Manifest.basename}); + defer gpa.free(file_path); + return addErrorMessage(ast, file_path, eb, notes_len, msg_str, msg.tok, msg.off); + } else { + return eb.addRootErrorMessage(.{ + .msg = msg_str, + .src_loc = .none, + .notes_len = notes_len, + }); + } } fn addErrorMessage( - ast: std.zig.Ast, + ast: *const std.zig.Ast, file_path: []const u8, eb: *std.zig.ErrorBundle.Wip, notes_len: u32, - msg: Manifest.ErrorMessage, + msg_str: u32, + msg_tok: std.zig.Ast.TokenIndex, + msg_off: u32, ) error{OutOfMemory}!void { const token_starts = ast.tokens.items(.start); - const start_loc = ast.tokenLocation(0, msg.tok); + const start_loc = ast.tokenLocation(0, msg_tok); try eb.addRootErrorMessage(.{ - .msg = try eb.addString(msg.msg), + .msg = msg_str, .src_loc = try eb.addSourceLocation(.{ .src_path = try eb.addString(file_path), - .span_start = token_starts[msg.tok], - .span_end = @as(u32, @intCast(token_starts[msg.tok] + ast.tokenSlice(msg.tok).len)), - .span_main = token_starts[msg.tok] + msg.off, - .line = @as(u32, @intCast(start_loc.line)), + .span_start = token_starts[msg_tok], + .span_end = @as(u32, @intCast(token_starts[msg_tok] + ast.tokenSlice(msg_tok).len)), + .span_main = token_starts[msg_tok] + msg_off, + .line = @intCast(start_loc.line), .column = @as(u32, @intCast(start_loc.column)), .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]), }), @@ -752,9 +775,9 @@ pub const ReadableResource = struct { }; switch (try rr.getFileType(dep_location_tok, report)) { - .tar => try unpackTarball(prog_reader.reader(), tmp_directory.handle), - .@"tar.gz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, std.compress.gzip), - .@"tar.xz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, std.compress.xz), + .tar => try unpackTarball(allocator, prog_reader.reader(), tmp_directory.handle, dep_location_tok, report), + .@"tar.gz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.gzip), + .@"tar.xz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.xz), .git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle), } } else { @@ -1128,6 +1151,8 @@ fn unpackTarballCompressed( gpa: Allocator, reader: anytype, out_dir: fs.Dir, + dep_location_tok: std.zig.Ast.TokenIndex, + report: Report, comptime Compression: type, ) !void { var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); @@ -1135,11 +1160,21 @@ fn unpackTarballCompressed( var decompress = try Compression.decompress(gpa, br.reader()); defer decompress.deinit(); - return unpackTarball(decompress.reader(), out_dir); + return unpackTarball(gpa, decompress.reader(), out_dir, dep_location_tok, report); } -fn unpackTarball(reader: anytype, out_dir: fs.Dir) !void { +fn unpackTarball( + gpa: Allocator, + reader: anytype, + out_dir: fs.Dir, + dep_location_tok: std.zig.Ast.TokenIndex, + report: Report, +) !void { + var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = gpa }; + defer diagnostics.deinit(); + try std.tar.pipeToFileSystem(out_dir, reader, .{ + .diagnostics = &diagnostics, .strip_components = 1, // TODO: we would like to set this to executable_bit_only, but two // things need to happen before that: @@ -1148,6 +1183,36 @@ fn unpackTarball(reader: anytype, out_dir: fs.Dir) !void { // bit on Windows from the ACLs (see the isExecutable function). .mode_mode = .ignore, }); + + if (diagnostics.errors.items.len > 0) { + const notes_len: u32 = @intCast(diagnostics.errors.items.len); + try report.addErrorWithNotes(notes_len, .{ + .tok = dep_location_tok, + .off = 0, + .msg = "unable to unpack tarball", + }); + const eb = report.error_bundle; + const notes_start = try eb.reserveNotes(notes_len); + for (diagnostics.errors.items, notes_start..) |item, note_i| { + switch (item) { + .unable_to_create_sym_link => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ + info.file_name, info.link_name, @errorName(info.code), + }), + })); + }, + .unsupported_file_type => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ + info.file_name, @intFromEnum(info.file_type), + }), + })); + }, + } + } + return error.InvalidTarball; + } } fn unpackGitPack( diff --git a/src/main.zig b/src/main.zig index e26efc325117..30911b57ba6b 100644 --- a/src/main.zig +++ b/src/main.zig @@ -6610,6 +6610,7 @@ fn cmdFetch( arena: Allocator, args: []const []const u8, ) !void { + const color: Color = .auto; var opt_url: ?[]const u8 = null; var override_global_cache_dir: ?[]const u8 = try optionalStringEnvVar(arena, "ZIG_GLOBAL_CACHE_DIR"); @@ -6651,10 +6652,17 @@ fn cmdFetch( const root_prog_node = progress.start("Fetch", 0); defer root_prog_node.end(); + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + var report: Package.Report = .{ .ast = null, - .directory = undefined, - .error_bundle = undefined, + .directory = .{ + .handle = fs.cwd(), + .path = null, + }, + .error_bundle = &wip_errors, }; var global_cache_directory: Compilation.Directory = l: { @@ -6697,14 +6705,22 @@ fn cmdFetch( }; defer readable_resource.deinit(gpa); - var package_location = try readable_resource.unpack( + var package_location = readable_resource.unpack( gpa, &thread_pool, global_cache_directory, 0, report, root_prog_node, - ); + ) catch |err| { + if (wip_errors.root_list.items.len > 0) { + var errors = try wip_errors.toOwnedBundle(""); + defer errors.deinit(gpa); + errors.renderToStdErr(renderOptions(color)); + process.exit(1); + } + fatal("unable to unpack '{s}': {s}", .{ url, @errorName(err) }); + }; defer package_location.deinit(gpa); const hex_digest = Package.Manifest.hexDigest(package_location.hash); From 573a13f8be24276a02761c30396fd75fc10ebdb0 Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Mon, 2 Oct 2023 20:59:00 -0400 Subject: [PATCH 6/6] Support symlinks for git+http(s) dependencies --- src/Package.zig | 31 +++++++++++++++++++++++++++-- src/git.zig | 53 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/src/Package.zig b/src/Package.zig index 37e5d2cf305e..14052e3de4be 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -778,7 +778,7 @@ pub const ReadableResource = struct { .tar => try unpackTarball(allocator, prog_reader.reader(), tmp_directory.handle, dep_location_tok, report), .@"tar.gz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.gzip), .@"tar.xz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.xz), - .git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle), + .git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle, dep_location_tok, report), } } else { // Recursive directory copy. @@ -1220,6 +1220,8 @@ fn unpackGitPack( reader: anytype, want_oid: git.Oid, out_dir: fs.Dir, + dep_location_tok: std.zig.Ast.TokenIndex, + report: Report, ) !void { // The .git directory is used to store the packfile and associated index, but // we do not attempt to replicate the exact structure of a real .git @@ -1251,7 +1253,32 @@ fn unpackGitPack( checkout_prog_node.activate(); var repository = try git.Repository.init(gpa, pack_file, index_file); defer repository.deinit(); - try repository.checkout(out_dir, want_oid); + var diagnostics: git.Diagnostics = .{ .allocator = gpa }; + defer diagnostics.deinit(); + try repository.checkout(out_dir, want_oid, &diagnostics); + + if (diagnostics.errors.items.len > 0) { + const notes_len: u32 = @intCast(diagnostics.errors.items.len); + try report.addErrorWithNotes(notes_len, .{ + .tok = dep_location_tok, + .off = 0, + .msg = "unable to unpack packfile", + }); + const eb = report.error_bundle; + const notes_start = try eb.reserveNotes(notes_len); + for (diagnostics.errors.items, notes_start..) |item, note_i| { + switch (item) { + .unable_to_create_sym_link => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ + info.file_name, info.link_name, @errorName(info.code), + }), + })); + }, + } + } + return error.InvalidGitPack; + } } } diff --git a/src/git.zig b/src/git.zig index c8d1040d602e..fd35c5c33e8a 100644 --- a/src/git.zig +++ b/src/git.zig @@ -38,6 +38,32 @@ test parseOid { try testing.expectError(error.InvalidOid, parseOid("HEAD")); } +pub const Diagnostics = struct { + allocator: Allocator, + errors: std.ArrayListUnmanaged(Error) = .{}, + + pub const Error = union(enum) { + unable_to_create_sym_link: struct { + code: anyerror, + file_name: []const u8, + link_name: []const u8, + }, + }; + + pub fn deinit(d: *Diagnostics) void { + for (d.errors.items) |item| { + switch (item) { + .unable_to_create_sym_link => |info| { + d.allocator.free(info.file_name); + d.allocator.free(info.link_name); + }, + } + } + d.errors.deinit(d.allocator); + d.* = undefined; + } +}; + pub const Repository = struct { odb: Odb, @@ -55,6 +81,7 @@ pub const Repository = struct { repository: *Repository, worktree: std.fs.Dir, commit_oid: Oid, + diagnostics: *Diagnostics, ) !void { try repository.odb.seekOid(commit_oid); const tree_oid = tree_oid: { @@ -62,7 +89,7 @@ pub const Repository = struct { if (commit_object.type != .commit) return error.NotACommit; break :tree_oid try getCommitTree(commit_object.data); }; - try repository.checkoutTree(worktree, tree_oid); + try repository.checkoutTree(worktree, tree_oid, "", diagnostics); } /// Checks out the tree at `tree_oid` to `worktree`. @@ -70,6 +97,8 @@ pub const Repository = struct { repository: *Repository, dir: std.fs.Dir, tree_oid: Oid, + current_path: []const u8, + diagnostics: *Diagnostics, ) !void { try repository.odb.seekOid(tree_oid); const tree_object = try repository.odb.readObject(); @@ -87,7 +116,9 @@ pub const Repository = struct { try dir.makeDir(entry.name); var subdir = try dir.openDir(entry.name, .{}); defer subdir.close(); - try repository.checkoutTree(subdir, entry.oid); + const sub_path = try std.fs.path.join(repository.odb.allocator, &.{ current_path, entry.name }); + defer repository.odb.allocator.free(sub_path); + try repository.checkoutTree(subdir, entry.oid, sub_path, diagnostics); }, .file => { var file = try dir.createFile(entry.name, .{}); @@ -98,7 +129,23 @@ pub const Repository = struct { try file.writeAll(file_object.data); try file.sync(); }, - .symlink => return error.SymlinkNotSupported, + .symlink => { + try repository.odb.seekOid(entry.oid); + var symlink_object = try repository.odb.readObject(); + if (symlink_object.type != .blob) return error.InvalidFile; + const link_name = symlink_object.data; + dir.symLink(link_name, entry.name, .{}) catch |e| { + const file_name = try std.fs.path.join(diagnostics.allocator, &.{ current_path, entry.name }); + errdefer diagnostics.allocator.free(file_name); + const link_name_dup = try diagnostics.allocator.dupe(u8, link_name); + errdefer diagnostics.allocator.free(link_name_dup); + try diagnostics.errors.append(diagnostics.allocator, .{ .unable_to_create_sym_link = .{ + .code = e, + .file_name = file_name, + .link_name = link_name_dup, + } }); + }; + }, .gitlink => { // Consistent with git archive behavior, create the directory but // do nothing else