From c8862a133f06cac8d89892553935821f6c49c950 Mon Sep 17 00:00:00 2001 From: Jonathan Marler Date: Sat, 20 Apr 2024 16:52:02 -0600 Subject: [PATCH] add std.zip and support zip files in build.zig.zon fixes #17408 Improved by helpful reviews from Josh Wolfe and Auguste Rame. Co-authored-by: Joel Gustafson --- lib/std/io.zig | 4 + lib/std/io/Writer.zig | 12 ++ lib/std/mem.zig | 7 +- lib/std/std.zig | 1 + lib/std/zip.zig | 438 ++++++++++++++++++++++++++++++++++++++++++ lib/std/zip/test.zig | 105 ++++++++++ src/Package/Fetch.zig | 136 +++++++++++++ 7 files changed, 702 insertions(+), 1 deletion(-) create mode 100644 lib/std/zip.zig create mode 100644 lib/std/zip/test.zig diff --git a/lib/std/io.zig b/lib/std/io.zig index ab891140006d..aa8270698de5 100644 --- a/lib/std/io.zig +++ b/lib/std/io.zig @@ -344,6 +344,10 @@ pub fn GenericWriter( return @errorCast(self.any().writeStruct(value)); } + pub inline fn writeStructEndian(self: Self, value: anytype, endian: std.builtin.Endian) Error!void { + return @errorCast(self.any().writeStructEndian(value, endian)); + } + pub inline fn any(self: *const Self) AnyWriter { return .{ .context = @ptrCast(&self.context), diff --git a/lib/std/io/Writer.zig b/lib/std/io/Writer.zig index dfcae48b1eb8..0c80393e4527 100644 --- a/lib/std/io/Writer.zig +++ b/lib/std/io/Writer.zig @@ -1,6 +1,7 @@ const std = @import("../std.zig"); const assert = std.debug.assert; const mem = std.mem; +const native_endian = @import("builtin").target.cpu.arch.endian(); context: *const anyopaque, writeFn: *const fn (context: *const anyopaque, bytes: []const u8) anyerror!usize, @@ -59,6 +60,17 @@ pub fn writeStruct(self: Self, value: anytype) anyerror!void { return self.writeAll(mem.asBytes(&value)); } +pub fn writeStructEndian(self: Self, value: anytype, endian: std.builtin.Endian) anyerror!void { + // TODO: make sure this value is not a reference type + if (native_endian == endian) { + return self.writeStruct(value); + } else { + var copy = value; + mem.byteSwapAllFields(@TypeOf(value), ©); + return self.writeStruct(copy); + } +} + pub fn writeFile(self: Self, file: std.fs.File) anyerror!void { // TODO: figure out how to adjust std lib abstractions so that this ends up // doing sendfile or maybe even copy_file_range under the right conditions. diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 1ea67d2cce94..11e9c8571ed7 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -2008,7 +2008,12 @@ pub fn byteSwapAllFields(comptime S: type, ptr: *S) void { .Struct => { inline for (std.meta.fields(S)) |f| { switch (@typeInfo(f.type)) { - .Struct, .Array => byteSwapAllFields(f.type, &@field(ptr, f.name)), + .Struct => |struct_info| if (struct_info.backing_integer) |Int| { + @field(ptr, f.name) = @bitCast(@byteSwap(@as(Int, @bitCast(@field(ptr, f.name))))); + } else { + byteSwapAllFields(f.type, &@field(ptr, f.name)); + }, + .Array => byteSwapAllFields(f.type, &@field(ptr, f.name)), .Enum => { @field(ptr, f.name) = @enumFromInt(@byteSwap(@intFromEnum(@field(ptr, f.name)))); }, diff --git a/lib/std/std.zig b/lib/std/std.zig index 8aa12ff31a47..2837d2336010 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -104,6 +104,7 @@ pub const unicode = @import("unicode.zig"); pub const valgrind = @import("valgrind.zig"); pub const wasm = @import("wasm.zig"); pub const zig = @import("zig.zig"); +pub const zip = @import("zip.zig"); pub const start = @import("start.zig"); const root = @import("root"); diff --git a/lib/std/zip.zig b/lib/std/zip.zig new file mode 100644 index 000000000000..60b152263ca8 --- /dev/null +++ b/lib/std/zip.zig @@ -0,0 +1,438 @@ +/// The .ZIP File Format Specification is found here: +/// https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt +const builtin = @import("builtin"); +const std = @import("std"); +const testing = std.testing; + +pub const File = @import("zip/test.zig").File; +pub const FileCache = @import("zip/test.zig").FileCache; +pub const writeFile = @import("zip/test.zig").writeFile; + +pub const CompressionMethod = enum(u16) { + store = 0, + deflate = 8, + _, +}; + +pub const central_file_header_sig = [4]u8{ 'P', 'K', 1, 2 }; +pub const local_file_header_sig = [4]u8{ 'P', 'K', 3, 4 }; +pub const end_of_central_directory_sig = [4]u8{ 'P', 'K', 5, 6 }; + +const GeneralPurposeFlags = packed struct(u16) { + encrypted: bool, + _: u15, +}; + +pub const LocalFileHeader = extern struct { + signature: [4]u8 align(1), + minimum_version: u16 align(1), + flags: GeneralPurposeFlags align(1), + compression_method: CompressionMethod align(1), + last_modification_time: u16 align(1), + last_modification_date: u16 align(1), + crc32: u32 align(1), + compressed_size: u32 align(1), + uncompressed_size: u32 align(1), + filename_len: u16 align(1), + extra_len: u16 align(1), +}; + +pub const CentralDirectoryFileHeader = extern struct { + signature: [4]u8 align(1), + version: u16 align(1), + minimum_version: u16 align(1), + flags: GeneralPurposeFlags align(1), + compression_method: CompressionMethod align(1), + last_modification_time: u16 align(1), + last_modification_date: u16 align(1), + crc32: u32 align(1), + compressed_size: u32 align(1), + uncompressed_size: u32 align(1), + filename_len: u16 align(1), + extra_len: u16 align(1), + comment_len: u16 align(1), + disk_number: u16 align(1), + internal_file_attributes: u16 align(1), + external_file_attributes: u32 align(1), + local_file_header_offset: u32 align(1), +}; + +pub const EndOfCentralDirectoryRecord = extern struct { + signature: [4]u8 align(1), + disk_number: u16 align(1), + central_directory_disk_number: u16 align(1), + record_count_disk: u16 align(1), + record_count_total: u16 align(1), + central_directory_size: u32 align(1), + central_directory_offset: u32 align(1), + comment_len: u16 align(1), +}; + +pub fn findEocdr(file: std.fs.File) !EndOfCentralDirectoryRecord { + // The EOCD record can contain a variable-length comment at the end, + // which makes ZIP file parsing ambiguous in general, since a valid + // comment could contain the bytes of another valid EOCD record. + // Here we just search backwards for the first instance of the EOCD + // signature, and return an error if a valid EOCD record doesn't follow. + + // TODO: make this more efficient + // we need a backward_buffered_reader + const file_size = try file.getEndPos(); + + const record_len = @sizeOf(EndOfCentralDirectoryRecord); + var record_value: EndOfCentralDirectoryRecord = undefined; + const record_bytes: *[record_len]u8 = @ptrCast(&record_value); + if (file_size < record_len) + return error.ZipTruncated; + try file.seekFromEnd(-record_len); + { + const len = try file.readAll(record_bytes); + if (len != record_len) + return error.ZipTruncated; + } + + var comment_len: u16 = 0; + while (true) { + if (std.mem.eql(u8, record_bytes[0..4], &end_of_central_directory_sig) and + std.mem.readInt(u16, record_bytes[20..22], .little) == comment_len) + { + break; + } + + if (comment_len == std.math.maxInt(u16)) + return error.ZipMissingEocdr; + std.mem.copyBackwards(u8, record_bytes[1..], record_bytes[0 .. record_bytes.len - 1]); + comment_len += 1; + + if (@as(u64, record_len) + @as(u64, comment_len) > file_size) + return error.ZipMissingEocdr; + + try file.seekFromEnd(-record_len - @as(i64, comment_len)); + { + const len = try file.readAll(record_bytes[0..1]); + if (len != 1) + return error.ZipTruncated; + } + } + + if (builtin.target.cpu.arch.endian() != .little) { + std.mem.byteSwapAllFields(@TypeOf(record_value), &record_value); + } + return record_value; +} + +/// `decompress` returns the actual CRC-32 of the decompressed bytes, +/// which should be validated against the expected entry.crc32 value. +/// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method. +pub fn decompress( + method: CompressionMethod, + uncompressed_size: u32, + reader: anytype, + writer: anytype, +) !u32 { + var hash = std.hash.Crc32.init(); + + var total_uncompressed: u32 = 0; + switch (method) { + .store => { + var buf: [std.mem.page_size]u8 = undefined; + while (true) { + const len = try reader.read(&buf); + if (len == 0) break; + try writer.writeAll(buf[0..len]); + hash.update(buf[0..len]); + total_uncompressed += @intCast(len); + } + }, + .deflate => { + var br = std.io.bufferedReader(reader); + var decompressor = std.compress.flate.decompressor(br.reader()); + while (try decompressor.next()) |chunk| { + try writer.writeAll(chunk); + hash.update(chunk); + total_uncompressed += @intCast(chunk.len); + if (total_uncompressed > uncompressed_size) + return error.ZipUncompressSizeTooSmall; + } + if (br.end != br.start) + return error.ZipDeflateTruncated; + }, + _ => return error.UnsupportedCompressionMethod, + } + if (total_uncompressed != uncompressed_size) + return error.ZipUncompressSizeMismatch; + + return hash.final(); +} + +fn isBadFilename(filename: []const u8) bool { + if (filename.len == 0 or filename[0] == '/') + return true; + + var it = std.mem.splitScalar(u8, filename, '/'); + while (it.next()) |part| { + if (std.mem.eql(u8, part, "..")) + return true; + } + + return false; +} + +pub const Iterator = struct { + file: std.fs.File, + eocdr: EndOfCentralDirectoryRecord, + next_central_header_index: u16, + next_central_header_offset: u64, + + pub fn init(file: std.fs.File) !Iterator { + const eocdr = try findEocdr(file); + + // Don't support multi-disk archives. + if (eocdr.disk_number != 0 or + eocdr.central_directory_disk_number != 0 or + eocdr.record_count_disk != eocdr.record_count_total) + { + return error.ZipUnsupportedMultiDisk; + } + + return .{ + .file = file, + .eocdr = eocdr, + .next_central_header_offset = 0, + .next_central_header_index = 0, + }; + } + + pub fn next(self: *Iterator) !?Entry { + if (self.next_central_header_index >= self.eocdr.record_count_total) { + return null; + } + + const header_file_offset: u64 = @as(u64, self.eocdr.central_directory_offset) + self.next_central_header_offset; + const header = blk: { + try self.file.seekTo(header_file_offset); + break :blk try self.file.reader().readStructEndian(CentralDirectoryFileHeader, .little); + }; + if (!std.mem.eql(u8, &header.signature, ¢ral_file_header_sig)) + return error.ZipHeader; + + self.next_central_header_index += 1; + self.next_central_header_offset += @sizeOf(CentralDirectoryFileHeader) + header.filename_len + header.extra_len + header.comment_len; + + if (header.flags.encrypted) + return error.ZipEncryptionUnsupported; + if (header.disk_number != 0) + return error.ZipUnsupportedMultiDisk; + return .{ + .header_file_offset = header_file_offset, + .header = header, + }; + } + + pub const Entry = struct { + header_file_offset: u64, + header: CentralDirectoryFileHeader, + + pub fn extract( + self: Entry, + zip_file: std.fs.File, + options: ExtractOptions, + filename_buf: []u8, + dest: std.fs.Dir, + ) !u32 { + if (filename_buf.len < self.header.filename_len) + return error.ZipInsufficientBuffer; + const filename = filename_buf[0..self.header.filename_len]; + + try zip_file.seekTo(self.header_file_offset + @sizeOf(CentralDirectoryFileHeader)); + + { + const len = try zip_file.readAll(filename); + if (len != filename.len) + return error.ZipTruncated; + } + + const local_data_header_offset: u64 = local_data_header_offset: { + const local_header = blk: { + try zip_file.seekTo(self.header.local_file_header_offset); + break :blk try zip_file.reader().readStructEndian(LocalFileHeader, .little); + }; + if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig)) + return error.ZipHeader; + // TODO: verify minimum_version + // TODO: verify flags + // TODO: verify compression method + // TODO: verify last_mod_time + // TODO: verify last_mod_date + // TODO: verify filename_len and filename? + // TODO: extra? + + if (@as(u16, @bitCast(local_header.flags)) != @as(u16, @bitCast(self.header.flags))) + return error.ZipRedundancyFail; + if (local_header.crc32 != 0 and local_header.crc32 != self.header.crc32) + return error.ZipRedundancyFail; + if (local_header.compressed_size != 0 and + local_header.compressed_size != self.header.compressed_size) + return error.ZipRedundancyFail; + if (local_header.uncompressed_size != 0 and + local_header.uncompressed_size != self.header.uncompressed_size) + return error.ZipRedundancyFail; + + break :local_data_header_offset @as(u64, local_header.filename_len) + + @as(u64, local_header.extra_len); + }; + + if (isBadFilename(filename)) + return error.ZipBadFilename; + + if (options.allow_backslashes) { + std.mem.replaceScalar(u8, filename, '\\', '/'); + } else { + if (std.mem.indexOfScalar(u8, filename, '\\')) |_| + return error.ZipFilenameHasBackslash; + } + + // TODO: as an enhancement, we could normalize backslashes to forward slashes + // ourselves. We could make this an option to do this or not. + + // All entries that end in '/' are directories + if (filename[filename.len - 1] == '/') { + if (self.header.uncompressed_size != 0) + return error.ZipBadDirectorySize; + try dest.makePath(filename[0 .. filename.len - 1]); + return std.hash.Crc32.hash(&.{}); + } + + const out_file = blk: { + if (std.fs.path.dirname(filename)) |dirname| { + var parent_dir = try dest.makeOpenPath(dirname, .{}); + defer parent_dir.close(); + + const basename = std.fs.path.basename(filename); + break :blk try parent_dir.createFile(basename, .{ .exclusive = true }); + } + break :blk try dest.createFile(filename, .{ .exclusive = true }); + }; + defer out_file.close(); + const local_data_file_offset: u64 = + @as(u64, self.header.local_file_header_offset) + + @as(u64, @sizeOf(LocalFileHeader)) + + local_data_header_offset; + try zip_file.seekTo(local_data_file_offset); + var limited_reader = std.io.limitedReader(zip_file.reader(), self.header.compressed_size); + const crc = try decompress( + self.header.compression_method, + self.header.uncompressed_size, + limited_reader.reader(), + out_file.writer(), + ); + if (limited_reader.bytes_left != 0) + return error.ZipDecompressTruncated; + return crc; + } + }; +}; + +pub const ExtractOptions = struct { + /// Allow filenames within the zip to use backslashes. Back slashes are normalized + /// to forward slashes before forwarding them to platform APIs. + allow_backslashes: bool = false, +}; + +/// Extract the given zip `file` to the given `dest` directory. This function +/// requires a file rather than a generic `reader` because zip files should +/// almost always be extracted from a file. This is because they must be +/// processed back to front and can be too large to reasonably fit in memory. +pub fn extract(dest: std.fs.Dir, file: std.fs.File, options: ExtractOptions) !void { + var iter = try Iterator.init(file); + + var filename_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + while (try iter.next()) |entry| { + const crc32 = try entry.extract(file, options, &filename_buf, dest); + if (crc32 != entry.header.crc32) + return error.ZipCrcMismatch; + } +} + +fn testZip(options: ExtractOptions, comptime files: []const File) !void { + var cache: [files.len]FileCache = undefined; + try testZipWithCache(options, files, &cache); +} +fn testZipWithCache(options: ExtractOptions, files: []const File, cache: []FileCache) !void { + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + const dir = tmp.dir; + + { + var file = try dir.createFile("zip", .{}); + defer file.close(); + try writeFile(file, files, cache); + } + + var zip_file = try dir.openFile("zip", .{}); + defer zip_file.close(); + try extract(dir, zip_file, options); + + for (files) |test_file| { + var normalized_sub_path_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + const normalized_sub_path = normalized_sub_path_buf[0..test_file.name.len]; + @memcpy(normalized_sub_path, test_file.name); + std.mem.replaceScalar(u8, normalized_sub_path, '\\', '/'); + var file = try dir.openFile(normalized_sub_path, .{}); + defer file.close(); + var buf: [4096]u8 = undefined; + const n = try file.reader().readAll(&buf); + try testing.expectEqualStrings(test_file.content, buf[0..n]); + } +} +fn testZipError(expected_error: anyerror, test_file: File, options: ExtractOptions) !void { + var tmp = testing.tmpDir(.{ .no_follow = true }); + defer tmp.cleanup(); + const dir = tmp.dir; + { + var file = try dir.createFile("zip", .{}); + defer file.close(); + var cache: [1]FileCache = undefined; + try writeFile(file, &[_]File{test_file}, &cache); + } + var zip_file = try dir.openFile("zip", .{}); + defer zip_file.close(); + try testing.expectError(expected_error, extract(dir, zip_file, options)); +} + +test "zip one file" { + try testZip(.{}, &[_]File{ + .{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store }, + }); +} +test "zip multiple files" { + try testZip(.{ .allow_backslashes = true }, &[_]File{ + .{ .name = "foo", .content = "a foo file\n", .compression = .store }, + .{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store }, + .{ .name = "subdir\\whoa", .content = "you can do backslashes", .compression = .store }, + .{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store }, + }); +} +test "zip deflated" { + try testZip(.{}, &[_]File{ + .{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate }, + // TODO: re-enable this if/when we add support for deflate64 + //.{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 }, + .{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store }, + }); +} +test "zip verify filenames" { + // no empty filenames + try testZipError(error.ZipBadFilename, .{ .name = "", .content = "", .compression = .store }, .{}); + // no absolute paths + try testZipError(error.ZipBadFilename, .{ .name = "/", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "/foo", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "/foo/bar", .content = "", .compression = .store }, .{}); + // no '..' components + try testZipError(error.ZipBadFilename, .{ .name = "..", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "foo/..", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/..", .content = "", .compression = .store }, .{}); + try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/../", .content = "", .compression = .store }, .{}); + // no backslashes + try testZipError(error.ZipFilenameHasBackslash, .{ .name = "foo\\bar", .content = "", .compression = .store }, .{}); +} diff --git a/lib/std/zip/test.zig b/lib/std/zip/test.zig new file mode 100644 index 000000000000..977fa5e46f85 --- /dev/null +++ b/lib/std/zip/test.zig @@ -0,0 +1,105 @@ +const std = @import("std"); +const zip = @import("../zip.zig"); + +pub const File = struct { + name: []const u8, + content: []const u8, + compression: zip.CompressionMethod, +}; +pub const FileCache = struct { + offset: u32, + crc: u32, + compressed_size: u32, +}; + +pub fn writeFile( + out_file: std.fs.File, + files: []const File, + cache: []FileCache, +) !void { + if (cache.len < files.len) return error.FileCacheTooSmall; + + var bw = std.io.bufferedWriter(out_file.writer()); + var counting = std.io.countingWriter(bw.writer()); + const writer = counting.writer(); + + for (files, 0..) |file, i| { + cache[i].offset = @intCast(counting.bytes_written); + cache[i].crc = std.hash.Crc32.hash(file.content); + + { + const hdr: zip.LocalFileHeader = .{ + .signature = zip.local_file_header_sig, + .minimum_version = 0, + .flags = .{ .encrypted = false, ._ = 0 }, + .compression_method = file.compression, + .last_modification_time = 0, + .last_modification_date = 0, + .crc32 = cache[i].crc, + .compressed_size = 0, + .uncompressed_size = @intCast(file.content.len), + .filename_len = @intCast(file.name.len), + .extra_len = 0, + }; + try writer.writeStructEndian(hdr, .little); + } + try writer.writeAll(file.name); + switch (file.compression) { + .store => { + try writer.writeAll(file.content); + cache[i].compressed_size = @intCast(file.content.len); + }, + .deflate => { + const offset = counting.bytes_written; + var fbs = std.io.fixedBufferStream(file.content); + try std.compress.flate.deflate.compress(.raw, fbs.reader(), writer, .{}); + std.debug.assert(fbs.pos == file.content.len); + cache[i].compressed_size = @intCast(counting.bytes_written - offset); + }, + else => unreachable, + } + } + + const cd_offset = counting.bytes_written; + for (files, 0..) |file, i| { + { + const hdr: zip.CentralDirectoryFileHeader = .{ + .signature = zip.central_file_header_sig, + .version = 0, + .minimum_version = 0, + .flags = .{ .encrypted = false, ._ = 0 }, + .compression_method = file.compression, + .last_modification_time = 0, + .last_modification_date = 0, + .crc32 = cache[i].crc, + .compressed_size = cache[i].compressed_size, + .uncompressed_size = @intCast(file.content.len), + .filename_len = @intCast(file.name.len), + .extra_len = 0, + .comment_len = 0, + .disk_number = 0, + .internal_file_attributes = 0, + .external_file_attributes = 0, + .local_file_header_offset = cache[i].offset, + }; + try writer.writeStructEndian(hdr, .little); + } + try writer.writeAll(file.name); + } + const cd_end = counting.bytes_written; + + { + const hdr: zip.EndOfCentralDirectoryRecord = .{ + .signature = zip.end_of_central_directory_sig, + .disk_number = 0, + .central_directory_disk_number = 0, + .record_count_disk = @intCast(files.len), + .record_count_total = @intCast(files.len), + .central_directory_size = @intCast(cd_end - cd_offset), + .central_directory_offset = @intCast(cd_offset), + .comment_len = 0, + }; + try writer.writeStructEndian(hdr, .little); + } + try bw.flush(); +} diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 506075e9213e..4764c1d9610d 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -840,6 +840,7 @@ const FileType = enum { @"tar.xz", @"tar.zst", git_pack, + zip, fn fromPath(file_path: []const u8) ?FileType { if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar; @@ -849,6 +850,7 @@ const FileType = enum { if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz"; if (ascii.endsWithIgnoreCase(file_path, ".tzst")) return .@"tar.zst"; if (ascii.endsWithIgnoreCase(file_path, ".tar.zst")) return .@"tar.zst"; + if (ascii.endsWithIgnoreCase(file_path, ".zip")) return .zip; return null; } @@ -1157,6 +1159,7 @@ fn unpackResource( .{@errorName(e)}, )), }, + .zip => return try unzip(f, tmp_directory.handle, resource.reader()), } } @@ -1190,6 +1193,101 @@ fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackRes return res; } +fn unzip(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!UnpackResult { + // We write the entire contents to a file first because zip files + // must be processed starting from the end. + const eb = &f.error_bundle; + + { + var zip_file = out_dir.createFile("zip", .{}) catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to create zip file in temporary directory: {s}", + .{@errorName(err)}, + )); + defer zip_file.close(); + var buf: [std.mem.page_size]u8 = undefined; + while (true) { + const len = reader.readAll(&buf) catch |err| return f.fail(f.location_tok, try eb.printString( + "read zip stream failed: {s}", + .{@errorName(err)}, + )); + if (len == 0) break; + zip_file.writer().writeAll(buf[0..len]) catch |err| return f.fail(f.location_tok, try eb.printString( + "write temporary zip file failed: {s}", + .{@errorName(err)}, + )); + } + } + + { + out_dir.makeDir("tmp") catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to create temporary zip directory: {s}", + .{@errorName(err)}, + )); + var tmp_dir = out_dir.openDir("tmp", .{}) catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to open temporary zip directory: {s}", + .{@errorName(err)}, + )); + defer tmp_dir.close(); + + var zip_file = out_dir.openFile("zip", .{}) catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to open temporary zip file: {s}", + .{@errorName(err)}, + )); + defer zip_file.close(); + + std.zip.extract( + tmp_dir, + zip_file, + .{ .allow_backslashes = true }, + ) catch |err| return f.fail(f.location_tok, try eb.printString( + "unable to unzip to temporary directory: {s}", + .{@errorName(err)}, + )); + } + + out_dir.deleteFile("zip") catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to delete temporary zip file: {s}", + .{@errorName(err)}, + )); + + const tmp_dir_name: []const u8 = "tmp"; + while (true) { + var tmp_dir = out_dir.openDir(tmp_dir_name, .{ .iterate = true }) catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to open temporary zip directory: {s}", + .{@errorName(err)}, + )); + defer tmp_dir.close(); + + var it = tmp_dir.iterate(); + while (it.next() catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to iterate temporary zip directory: {s}", + .{@errorName(err)}, + ))) |entry| { + if (std.mem.eql(u8, entry.name, tmp_dir_name)) { + std.debug.panic("TODO: rename tmp_dir '{s}' to avoid conflict", .{tmp_dir_name}); + } + std.posix.renameat( + tmp_dir.fd, + entry.name, + out_dir.fd, + entry.name, + ) catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to move temporary zip child to parent: {s}", + .{@errorName(err)}, + )); + } + break; + } + + out_dir.deleteDir(tmp_dir_name) catch |err| return f.fail(f.location_tok, try eb.printString( + "failed to delete temporary zip directory: {s}", + .{@errorName(err)}, + )); + + const res: UnpackResult = .{}; + return res; +} + fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource) anyerror!UnpackResult { const arena = f.arena.allocator(); const gpa = f.arena.child_allocator; @@ -1895,6 +1993,44 @@ const UnpackResult = struct { } }; +test "zip" { + const gpa = std.testing.allocator; + var tmp = std.testing.tmpDir(.{}); + defer tmp.cleanup(); + + const test_files = [_]std.zip.File{ + .{ .name = "foo", .content = "this is just foo\n", .compression = .store }, + .{ .name = "bar", .content = "another file\n", .compression = .deflate }, + }; + + { + var file = try tmp.dir.createFile("test.zip", .{}); + defer file.close(); + var cache: [test_files.len]std.zip.FileCache = undefined; + try std.zip.writeFile(file, &test_files, &cache); + } + + const zip_path = try std.fmt.allocPrint(gpa, "zig-cache/tmp/{s}/test.zip", .{tmp.sub_path}); + defer gpa.free(zip_path); + + var fb: TestFetchBuilder = undefined; + var fetch = try fb.build(gpa, tmp.dir, zip_path); + defer fb.deinit(); + + try fetch.run(); + + var out = try fb.packageDir(); + defer out.close(); + + for (test_files) |test_file| { + var file = try out.openFile(test_file.name, .{}); + defer file.close(); + var buf: [4096]u8 = undefined; + const n = try file.reader().readAll(&buf); + try std.testing.expectEqualStrings(test_file.content, buf[0..n]); + } +} + test "tarball with duplicate paths" { // This tarball has duplicate path 'dir1/file1' to simulate case sensitve // file system on any file sytstem.