forked from ziglang/zig
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add std.zip and support zip files in build.zig.zon
fixes ziglang#17408 Co-authored-by: Joel Gustafson <joelg@mit.edu>
- Loading branch information
1 parent
c7ffdbc
commit 7f40093
Showing
6 changed files
with
606 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,352 @@ | ||
/// The .ZIP File Format Specification is found here: | ||
/// https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt | ||
const builtin = @import("builtin"); | ||
const std = @import("std"); | ||
const testing = std.testing; | ||
|
||
pub const File = @import("zip/test.zig").File; | ||
pub const FileCache = @import("zip/test.zig").FileCache; | ||
pub const writeFile = @import("zip/test.zig").writeFile; | ||
|
||
pub const CompressionMethod = enum(u16) { | ||
store = 0, | ||
deflate = 8, | ||
_, | ||
}; | ||
|
||
pub const central_file_header_sig = [4]u8{ 'P', 'K', 1, 2 }; | ||
pub const local_file_header_sig = [4]u8{ 'P', 'K', 3, 4 }; | ||
pub const end_of_central_directory_sig = [4]u8{ 'P', 'K', 5, 6 }; | ||
|
||
pub const LocalFileHeader = extern struct { | ||
signature: [4]u8 align(1), | ||
minimum_version: u16 align(1), | ||
flags: u16, | ||
compression_method: CompressionMethod align(1), | ||
last_modification_time: u16 align(1), | ||
last_modification_date: u16 align(1), | ||
crc32: u32 align(1), | ||
compressed_size: u32 align(1), | ||
uncompressed_size: u32 align(1), | ||
filename_len: u16 align(1), | ||
extra_len: u16 align(1), | ||
}; | ||
|
||
pub const CentralDirectoryFileHeader = extern struct { | ||
signature: [4]u8 align(1), | ||
version: u16 align(1), | ||
minimum_version: u16 align(1), | ||
flags: u16 align(1), | ||
compression_method: CompressionMethod align(1), | ||
last_modification_time: u16 align(1), | ||
last_modification_date: u16 align(1), | ||
crc32: u32 align(1), | ||
compressed_size: u32 align(1), | ||
uncompressed_size: u32 align(1), | ||
filename_len: u16 align(1), | ||
extra_len: u16 align(1), | ||
comment_len: u16 align(1), | ||
disk_number: u16 align(1), | ||
internal_file_attributes: u16 align(1), | ||
external_file_attributes: u32 align(1), | ||
local_file_header_offset: u32 align(1), | ||
}; | ||
|
||
pub const EndOfCentralDirectoryRecord = extern struct { | ||
signature: [4]u8 align(1), | ||
disk_number: u16 align(1), | ||
central_directory_disk_number: u16 align(1), | ||
record_count_disk: u16 align(1), | ||
record_count_total: u16 align(1), | ||
central_directory_size: u32 align(1), | ||
central_directory_offset: u32 align(1), | ||
comment_len: u16 align(1), | ||
}; | ||
|
||
pub fn findEocdr(file: std.fs.File) !EndOfCentralDirectoryRecord { | ||
// The EOCD record can contain a variable-length comment at the end, | ||
// which makes ZIP file parsing ambiguous in general, since a valid | ||
// comment could contain the bytes of another valid EOCD record. | ||
// Here we just search backwards for the first instance of the EOCD | ||
// signature, and return an error if a valid EOCD record doesn't follow. | ||
|
||
// TODO: make this more efficient | ||
// we need a backward_buffered_reader | ||
const file_size = try file.getEndPos(); | ||
|
||
const record_len = @sizeOf(EndOfCentralDirectoryRecord); | ||
var record_value: EndOfCentralDirectoryRecord = undefined; | ||
const record_bytes: *[record_len]u8 = @ptrCast(&record_value); | ||
if (file_size < record_len) | ||
return error.ZipTruncated; | ||
try file.seekFromEnd(-record_len); | ||
{ | ||
const len = try file.readAll(record_bytes); | ||
if (len != record_len) | ||
return error.ZipTruncated; | ||
} | ||
|
||
var comment_len: u16 = 0; | ||
while (true) { | ||
if (std.mem.eql(u8, record_bytes[0..4], &end_of_central_directory_sig) and | ||
std.mem.readInt(u16, record_bytes[20..22], .little) == comment_len) | ||
{ | ||
break; | ||
} | ||
|
||
if (comment_len == std.math.maxInt(u16)) | ||
return error.ZipMissingEocdr; | ||
std.mem.copyBackwards(u8, record_bytes[1..], record_bytes[0 .. record_bytes.len - 1]); | ||
comment_len += 1; | ||
|
||
if (@as(u64, record_len) + @as(u64, comment_len) > file_size) | ||
return error.ZipMissingEocdr; | ||
|
||
try file.seekFromEnd(-record_len - @as(i64, comment_len)); | ||
{ | ||
const len = try file.readAll(record_bytes[0..1]); | ||
if (len != 1) | ||
return error.ZipTruncated; | ||
} | ||
} | ||
|
||
if (builtin.target.cpu.arch.endian() != .little) { | ||
std.mem.byteSwapAllFields(@TypeOf(record_value), &record_value); | ||
} | ||
return record_value; | ||
} | ||
|
||
/// `decompress` returns the actual CRC-32 of the decompressed bytes, | ||
/// which should be validated against the expected entry.crc32 value. | ||
/// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method. | ||
pub fn decompress( | ||
method: CompressionMethod, | ||
uncompressed_size: u32, | ||
reader: anytype, | ||
writer: anytype, | ||
) !u32 { | ||
var hash = std.hash.Crc32.init(); | ||
|
||
switch (method) { | ||
.store => { | ||
var buf: [std.mem.page_size]u8 = undefined; | ||
while (true) { | ||
const len = try reader.read(&buf); | ||
if (len == 0) break; | ||
try writer.writeAll(buf[0..len]); | ||
hash.update(buf[0..len]); | ||
} | ||
}, | ||
.deflate => { | ||
var br = std.io.bufferedReader(reader); | ||
var total_uncompressed: u32 = 0; | ||
var decompressor = std.compress.flate.decompressor(br.reader()); | ||
while (try decompressor.next()) |chunk| { | ||
try writer.writeAll(chunk); | ||
hash.update(chunk); | ||
total_uncompressed += @intCast(chunk.len); | ||
} | ||
if (br.end != br.start) | ||
return error.ZipDeflateTruncated; | ||
if (total_uncompressed != uncompressed_size) | ||
return error.ZipUncompressSizeMismatch; | ||
}, | ||
_ => return error.UnsupportedCompressionMethod, | ||
} | ||
|
||
return hash.final(); | ||
} | ||
|
||
pub const Iterator = struct { | ||
file: std.fs.File, | ||
eocdr: EndOfCentralDirectoryRecord, | ||
next_central_header_index: u16, | ||
next_central_header_offset: u64, | ||
|
||
pub fn init(file: std.fs.File) !Iterator { | ||
const eocdr = try findEocdr(file); | ||
|
||
// Don't support multi-disk archives. | ||
if (eocdr.disk_number != 0 or | ||
eocdr.central_directory_disk_number != 0 or | ||
eocdr.record_count_disk != eocdr.record_count_total) | ||
{ | ||
return error.ZipUnsupportedMultiDisk; | ||
} | ||
|
||
return .{ | ||
.file = file, | ||
.eocdr = eocdr, | ||
.next_central_header_offset = 0, | ||
.next_central_header_index = 0, | ||
}; | ||
} | ||
|
||
pub fn next(self: *Iterator) !?Entry { | ||
if (self.next_central_header_index >= self.eocdr.record_count_total) { | ||
return null; | ||
} | ||
|
||
const header_file_offset: u64 = @as(u64, self.eocdr.central_directory_offset) + self.next_central_header_offset; | ||
const header = blk: { | ||
try self.file.seekTo(header_file_offset); | ||
break :blk try self.file.reader().readStructEndian(CentralDirectoryFileHeader, .little); | ||
}; | ||
if (!std.mem.eql(u8, &header.signature, ¢ral_file_header_sig)) | ||
return error.ZipHeader; | ||
|
||
self.next_central_header_index += 1; | ||
self.next_central_header_offset += @sizeOf(CentralDirectoryFileHeader) + header.filename_len + header.extra_len + header.comment_len; | ||
|
||
if (header.disk_number != 0) | ||
return error.ZipUnsupportedMultiDisk; | ||
return .{ | ||
.header_file_offset = header_file_offset, | ||
.header = header, | ||
}; | ||
} | ||
|
||
pub const Entry = struct { | ||
header_file_offset: u64, | ||
header: CentralDirectoryFileHeader, | ||
|
||
pub fn extract(self: Entry, zip_file: std.fs.File, filename_buf: []u8, dest: std.fs.Dir) !u32 { | ||
if (filename_buf.len < self.header.filename_len) | ||
return error.ZipInsufficientBuffer; | ||
const filename = filename_buf[0..self.header.filename_len]; | ||
|
||
try zip_file.seekTo(self.header_file_offset + @sizeOf(CentralDirectoryFileHeader)); | ||
{ | ||
const len = try zip_file.readAll(filename); | ||
if (len != filename.len) | ||
return error.ZipTruncated; | ||
} | ||
|
||
const local_data_header_offset: u64 = local_data_header_offset: { | ||
const local_header = blk: { | ||
try zip_file.seekTo(self.header.local_file_header_offset); | ||
break :blk try zip_file.reader().readStructEndian(LocalFileHeader, .little); | ||
}; | ||
if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig)) | ||
return error.ZipHeader; | ||
// TODO: verify minimum_version | ||
// TODO: verify flags | ||
// TODO: verify compression method | ||
// TODO: verify last_mod_time | ||
// TODO: verify last_mod_date | ||
// TODO: verify filename_len and filename? | ||
// TODO: extra? | ||
|
||
if (local_header.crc32 != 0 and local_header.crc32 != self.header.crc32) | ||
return error.ZipRedundancyFail; | ||
if (local_header.compressed_size != 0 and | ||
local_header.compressed_size != self.header.compressed_size) | ||
return error.ZipRedundancyFail; | ||
if (local_header.uncompressed_size != 0 and | ||
local_header.uncompressed_size != self.header.uncompressed_size) | ||
return error.ZipRedundancyFail; | ||
|
||
break :local_data_header_offset @as(u64, local_header.filename_len) + | ||
@as(u64, local_header.extra_len); | ||
}; | ||
|
||
if (filename.len == 0 or filename[0] == '/') | ||
return error.ZipBadFilename; | ||
|
||
// All entries that end in '/' are directories | ||
if (filename[filename.len - 1] == '/') { | ||
if (self.header.uncompressed_size != 0) | ||
return error.ZipBadDirectorySize; | ||
try dest.makePath(filename[0 .. filename.len - 1]); | ||
return std.hash.Crc32.hash(&.{}); | ||
} | ||
|
||
const out_file = blk: { | ||
if (std.fs.path.dirname(filename)) |dirname| { | ||
var parent_dir = try dest.makeOpenPath(dirname, .{}); | ||
defer parent_dir.close(); | ||
|
||
const basename = std.fs.path.basename(filename); | ||
break :blk try parent_dir.createFile(basename, .{ .exclusive = true }); | ||
} | ||
break :blk try dest.createFile(filename, .{ .exclusive = true }); | ||
}; | ||
defer out_file.close(); | ||
const local_data_file_offset: u64 = | ||
@as(u64, self.header.local_file_header_offset) + | ||
@as(u64, @sizeOf(LocalFileHeader)) + | ||
local_data_header_offset; | ||
try zip_file.seekTo(local_data_file_offset); | ||
var limited_reader = std.io.limitedReader(zip_file.reader(), self.header.compressed_size); | ||
const crc = try decompress( | ||
self.header.compression_method, | ||
self.header.uncompressed_size, | ||
limited_reader.reader(), | ||
out_file.writer(), | ||
); | ||
if (limited_reader.bytes_left != 0) | ||
return error.ZipDecompressTruncated; | ||
return crc; | ||
} | ||
}; | ||
}; | ||
|
||
pub fn pipeToFileSystem(dest: std.fs.Dir, file: std.fs.File) !void { | ||
var iter = try Iterator.init(file); | ||
|
||
var filename_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; | ||
while (try iter.next()) |entry| { | ||
const crc32 = try entry.extract(file, &filename_buf, dest); | ||
if (crc32 != entry.header.crc32) | ||
return error.ZipCrcMismatch; | ||
} | ||
} | ||
|
||
fn testZip(comptime files: []const File) !void { | ||
var cache: [files.len]FileCache = undefined; | ||
try testZipWithCache(files, &cache); | ||
} | ||
fn testZipWithCache(files: []const File, cache: []FileCache) !void { | ||
var tmp = testing.tmpDir(.{ .no_follow = true }); | ||
defer tmp.cleanup(); | ||
const dir = tmp.dir; | ||
|
||
{ | ||
var file = try dir.createFile("zip", .{}); | ||
defer file.close(); | ||
try writeFile(file, files, cache); | ||
} | ||
|
||
var zip_file = try dir.openFile("zip", .{}); | ||
defer zip_file.close(); | ||
try pipeToFileSystem(dir, zip_file); | ||
|
||
for (files) |test_file| { | ||
var file = try dir.openFile(test_file.name, .{}); | ||
defer file.close(); | ||
var buf: [4096]u8 = undefined; | ||
const n = try file.reader().readAll(&buf); | ||
try testing.expectEqualStrings(test_file.content, buf[0..n]); | ||
} | ||
} | ||
|
||
test "zip one file" { | ||
try testZip(&[_]File{ | ||
.{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store }, | ||
}); | ||
} | ||
test "zip multiple files" { | ||
try testZip(&[_]File{ | ||
.{ .name = "foo", .content = "a foo file\n", .compression = .store }, | ||
.{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store }, | ||
.{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store }, | ||
}); | ||
} | ||
test "zip deflated" { | ||
try testZip(&[_]File{ | ||
.{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate }, | ||
// TODO: re-enable this if/when we add support for deflate64 | ||
//.{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 }, | ||
.{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store }, | ||
}); | ||
} |
Oops, something went wrong.