From deae1ffc3b9a74818ce4f1fbe2f0bfe6945460cf Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 4 Jul 2024 15:47:47 -0700 Subject: [PATCH] frontend: TrackedInst stores FileIndex instead of path digest The purpose of using path digest was to reference a file in a serializable manner. Now that there is a stable index associated with files, it is a superior way to accomplish that goal, since removes one layer of indirection, and makes TrackedInst 8 bytes instead of 20. The saved Zig Compiler State file for "hello world" goes from 1.3M to 1.2M with this change. --- src/Compilation.zig | 20 +++++----- src/InternPool.zig | 25 +++++++++++-- src/Sema.zig | 17 ++++----- src/Type.zig | 2 +- src/Zcu.zig | 91 ++++++++++++++++++++++----------------------- 5 files changed, 85 insertions(+), 70 deletions(-) diff --git a/src/Compilation.zig b/src/Compilation.zig index cfe7a21041e7..eda5f63a589e 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2649,7 +2649,7 @@ fn reportMultiModuleErrors(zcu: *Zcu) !void { .import => |import| try Module.ErrorMsg.init( gpa, .{ - .base_node_inst = try ip.trackZir(gpa, zcu.filePathDigest(import.file), .main_struct_inst), + .base_node_inst = try ip.trackZir(gpa, import.file, .main_struct_inst), .offset = .{ .token_abs = import.token }, }, "imported from module {s}", @@ -2658,7 +2658,7 @@ fn reportMultiModuleErrors(zcu: *Zcu) !void { .root => |pkg| try Module.ErrorMsg.init( gpa, .{ - .base_node_inst = try ip.trackZir(gpa, zcu.filePathDigest(file_index), .main_struct_inst), + .base_node_inst = try ip.trackZir(gpa, file_index, .main_struct_inst), .offset = .entire_file, }, "root of module {s}", @@ -2672,7 +2672,7 @@ fn reportMultiModuleErrors(zcu: *Zcu) !void { notes[num_notes] = try Module.ErrorMsg.init( gpa, .{ - .base_node_inst = try ip.trackZir(gpa, zcu.filePathDigest(file_index), .main_struct_inst), + .base_node_inst = try ip.trackZir(gpa, file_index, .main_struct_inst), .offset = .entire_file, }, "{} more references omitted", @@ -2684,7 +2684,7 @@ fn reportMultiModuleErrors(zcu: *Zcu) !void { const err = try Module.ErrorMsg.create( gpa, .{ - .base_node_inst = try ip.trackZir(gpa, zcu.filePathDigest(file_index), .main_struct_inst), + .base_node_inst = try ip.trackZir(gpa, file_index, .main_struct_inst), .offset = .entire_file, }, "file exists in multiple modules", @@ -2786,7 +2786,7 @@ pub fn saveState(comp: *Compilation) !void { .first_dependency_len = @intCast(ip.first_dependency.count()), .dep_entries_len = @intCast(ip.dep_entries.items.len), .free_dep_entries_len = @intCast(ip.free_dep_entries.items.len), - .files_len = @intCast(zcu.files.entries.len), + .files_len = @intCast(ip.files.entries.len), }, }; addBuf(&bufs_list, &bufs_len, mem.asBytes(&header)); @@ -2811,8 +2811,8 @@ pub fn saveState(comp: *Compilation) !void { addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.dep_entries.items)); addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.free_dep_entries.items)); - addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(zcu.files.keys())); - addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(zcu.files.values())); + addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.files.keys())); + addBuf(&bufs_list, &bufs_len, mem.sliceAsBytes(ip.files.values())); // TODO: compilation errors // TODO: namespaces @@ -4060,7 +4060,7 @@ fn workerAstGenFile( defer child_prog_node.end(); const zcu = comp.module.?; - zcu.astGenFile(file, path_digest, root_decl) catch |err| switch (err) { + zcu.astGenFile(file, file_index, path_digest, root_decl) catch |err| switch (err) { error.AnalysisFail => return, else => { file.status = .retryable_failure; @@ -4477,11 +4477,11 @@ fn reportRetryableAstGenError( const src_loc: Module.LazySrcLoc = switch (src) { .root => .{ - .base_node_inst = try zcu.intern_pool.trackZir(gpa, zcu.filePathDigest(file_index), .main_struct_inst), + .base_node_inst = try zcu.intern_pool.trackZir(gpa, file_index, .main_struct_inst), .offset = .entire_file, }, .import => |info| .{ - .base_node_inst = try zcu.intern_pool.trackZir(gpa, zcu.filePathDigest(info.importing_file), .main_struct_inst), + .base_node_inst = try zcu.intern_pool.trackZir(gpa, info.importing_file, .main_struct_inst), .offset = .{ .token_abs = info.import_tok }, }, }; diff --git a/src/InternPool.zig b/src/InternPool.zig index e79de2651620..18cd21d08dec 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -92,12 +92,27 @@ dep_entries: std.ArrayListUnmanaged(DepEntry) = .{}, /// garbage collection pass. free_dep_entries: std.ArrayListUnmanaged(DepEntry.Index) = .{}, +/// Elements are ordered identically to the `import_table` field of `Zcu`. +/// +/// Unlike `import_table`, this data is serialized as part of incremental +/// compilation state. +/// +/// Key is the hash of the path to this file, used to store +/// `InternPool.TrackedInst`. +/// +/// Value is the `Decl` of the struct that represents this `File`. +files: std.AutoArrayHashMapUnmanaged(Cache.BinDigest, OptionalDeclIndex) = .{}, + +pub const FileIndex = enum(u32) { + _, +}; + pub const TrackedInst = extern struct { - path_digest: Cache.BinDigest, + file: FileIndex, inst: Zir.Inst.Index, comptime { // The fields should be tightly packed. See also serialiation logic in `Compilation.saveState`. - assert(@sizeOf(@This()) == Cache.bin_digest_len + @sizeOf(Zir.Inst.Index)); + assert(@sizeOf(@This()) == @sizeOf(FileIndex) + @sizeOf(Zir.Inst.Index)); } pub const Index = enum(u32) { _, @@ -126,11 +141,11 @@ pub const TrackedInst = extern struct { pub fn trackZir( ip: *InternPool, gpa: Allocator, - path_digest: Cache.BinDigest, + file: FileIndex, inst: Zir.Inst.Index, ) Allocator.Error!TrackedInst.Index { const key: TrackedInst = .{ - .path_digest = path_digest, + .file = file, .inst = inst, }; const gop = try ip.tracked_insts.getOrPut(gpa, key); @@ -4597,6 +4612,8 @@ pub fn deinit(ip: *InternPool, gpa: Allocator) void { ip.dep_entries.deinit(gpa); ip.free_dep_entries.deinit(gpa); + ip.files.deinit(gpa); + ip.* = undefined; } diff --git a/src/Sema.zig b/src/Sema.zig index b2d4fd9a2428..40fe11af3a67 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -839,8 +839,7 @@ pub const Block = struct { const zcu = sema.mod; const ip = &zcu.intern_pool; const file_index = block.getFileScopeIndex(zcu); - const path_digest = zcu.filePathDigest(file_index); - return ip.trackZir(gpa, path_digest, inst); + return ip.trackZir(gpa, file_index, inst); } }; @@ -993,7 +992,7 @@ fn analyzeBodyInner( try sema.inst_map.ensureSpaceForInstructions(sema.gpa, body); - const mod = sema.mod; + const zcu = sema.mod; const map = &sema.inst_map; const tags = sema.code.instructions.items(.tag); const datas = sema.code.instructions.items(.data); @@ -1013,9 +1012,9 @@ fn analyzeBodyInner( // The hashmap lookup in here is a little expensive, and LLVM fails to optimize it away. if (build_options.enable_logging) { std.log.scoped(.sema_zir).debug("sema ZIR {s} %{d}", .{ sub_file_path: { - const path_digest = block.src_base_inst.resolveFull(&mod.intern_pool).path_digest; - const index = mod.files.getIndex(path_digest).?; - break :sub_file_path mod.import_table.values()[index].sub_file_path; + const file_index = block.src_base_inst.resolveFull(&zcu.intern_pool).file; + const file = zcu.fileByIndex(file_index); + break :sub_file_path file.sub_file_path; }, inst }); } @@ -1776,9 +1775,9 @@ fn analyzeBodyInner( const inline_body = sema.code.bodySlice(extra.end, extra.data.body_len); const err_union = try sema.resolveInst(extra.data.operand); const err_union_ty = sema.typeOf(err_union); - if (err_union_ty.zigTypeTag(mod) != .ErrorUnion) { + if (err_union_ty.zigTypeTag(zcu) != .ErrorUnion) { return sema.fail(block, operand_src, "expected error union type, found '{}'", .{ - err_union_ty.fmt(mod), + err_union_ty.fmt(zcu), }); } const is_non_err = try sema.analyzeIsNonErrComptimeOnly(block, operand_src, err_union); @@ -6003,7 +6002,7 @@ fn zirCImport(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileEr const path_digest = zcu.filePathDigest(result.file_index); const root_decl = zcu.fileRootDecl(result.file_index); - zcu.astGenFile(result.file, path_digest, root_decl) catch |err| + zcu.astGenFile(result.file, result.file_index, path_digest, root_decl) catch |err| return sema.fail(&child_block, src, "C import failed: {s}", .{@errorName(err)}); try zcu.ensureFileAnalyzed(result.file_index); diff --git a/src/Type.zig b/src/Type.zig index 8c70e34fceb0..49f127607057 100644 --- a/src/Type.zig +++ b/src/Type.zig @@ -3455,7 +3455,7 @@ pub fn typeDeclSrcLine(ty: Type, zcu: *const Zcu) ?u32 { else => return null, }; const info = tracked.resolveFull(&zcu.intern_pool); - const file = zcu.import_table.values()[zcu.files.getIndex(info.path_digest).?]; + const file = zcu.fileByIndex(info.file); assert(file.zir_loaded); const zir = file.zir; const inst = zir.instructions.get(@intFromEnum(info.inst)); diff --git a/src/Zcu.zig b/src/Zcu.zig index 50bbeefdf226..203238c6334a 100644 --- a/src/Zcu.zig +++ b/src/Zcu.zig @@ -105,19 +105,6 @@ multi_exports: std.AutoArrayHashMapUnmanaged(AnalUnit, extern struct { /// Indexes correspond 1:1 to `files`. import_table: std.StringArrayHashMapUnmanaged(*File) = .{}, -/// Elements are ordered identically to `import_table`. -/// -/// Unlike `import_table`, this data is serialized as part of incremental -/// compilation state. -/// -/// Key is the hash of the path to this file, used to store -/// `InternPool.TrackedInst`. -/// -/// Value is the `Decl` of the struct that represents this `File`. -/// -/// Protected by Compilation's mutex. -files: std.AutoArrayHashMapUnmanaged(Cache.BinDigest, Decl.OptionalIndex) = .{}, - /// The set of all the files which have been loaded with `@embedFile` in the Module. /// We keep track of this in order to iterate over it and check which files have been /// modified on the file system when an update is requested, as well as to cache @@ -572,19 +559,20 @@ pub const Decl = struct { } pub fn navSrcLine(decl: Decl, zcu: *Zcu) u32 { + const ip = &zcu.intern_pool; const tracked = decl.zir_decl_index.unwrap() orelse inst: { // generic instantiation assert(decl.has_tv); assert(decl.owns_tv); - const generic_owner_func = switch (zcu.intern_pool.indexToKey(decl.val.toIntern())) { + const generic_owner_func = switch (ip.indexToKey(decl.val.toIntern())) { .func => |func| func.generic_owner, else => return 0, // TODO: this is probably a `variable` or something; figure this out when we finish sorting out `Decl`. }; const generic_owner_decl = zcu.declPtr(zcu.funcInfo(generic_owner_func).owner_decl); break :inst generic_owner_decl.zir_decl_index.unwrap().?; }; - const info = tracked.resolveFull(&zcu.intern_pool); - const file = zcu.import_table.values()[zcu.files.getIndex(info.path_digest).?]; + const info = tracked.resolveFull(ip); + const file = zcu.fileByIndex(info.file); assert(file.zir_loaded); const zir = file.zir; const inst = zir.instructions.get(@intFromEnum(info.inst)); @@ -969,9 +957,7 @@ pub const File = struct { } } - pub const Index = enum(u32) { - _, - }; + pub const Index = InternPool.FileIndex; }; pub const EmbedFile = struct { @@ -2351,14 +2337,12 @@ pub const LazySrcLoc = struct { }; pub fn resolveBaseNode(base_node_inst: InternPool.TrackedInst.Index, zcu: *Zcu) struct { *File, Ast.Node.Index } { - const want_path_digest, const zir_inst = inst: { - const info = base_node_inst.resolveFull(&zcu.intern_pool); - break :inst .{ info.path_digest, info.inst }; - }; - const file = file: { - const index = zcu.files.getIndex(want_path_digest).?; - break :file zcu.import_table.values()[index]; + const ip = &zcu.intern_pool; + const file_index, const zir_inst = inst: { + const info = base_node_inst.resolveFull(ip); + break :inst .{ info.file, info.inst }; }; + const file = zcu.fileByIndex(file_index); assert(file.zir_loaded); const zir = file.zir; @@ -2429,7 +2413,6 @@ pub fn deinit(zcu: *Zcu) void { zcu.destroyFile(file_index); } zcu.import_table.deinit(gpa); - zcu.files.deinit(gpa); for (zcu.embed_table.keys(), zcu.embed_table.values()) |path, embed_file| { gpa.free(path); @@ -2596,7 +2579,16 @@ comptime { } } -pub fn astGenFile(zcu: *Zcu, file: *File, path_digest: Cache.BinDigest, opt_root_decl: Zcu.Decl.OptionalIndex) !void { +pub fn astGenFile( + zcu: *Zcu, + file: *File, + /// This parameter is provided separately from `file` because it is not + /// safe to access `import_table` without a lock, and this index is needed + /// in the call to `updateZirRefs`. + file_index: File.Index, + path_digest: Cache.BinDigest, + opt_root_decl: Zcu.Decl.OptionalIndex, +) !void { assert(!file.mod.isBuiltin()); const tracy = trace(@src()); @@ -2850,7 +2842,7 @@ pub fn astGenFile(zcu: *Zcu, file: *File, path_digest: Cache.BinDigest, opt_root } if (file.prev_zir) |prev_zir| { - try updateZirRefs(zcu, file, prev_zir.*, path_digest); + try updateZirRefs(zcu, file, file_index, prev_zir.*); // No need to keep previous ZIR. prev_zir.deinit(gpa); gpa.destroy(prev_zir); @@ -2939,7 +2931,7 @@ fn loadZirCacheBody(gpa: Allocator, header: Zir.Header, cache_file: std.fs.File) /// This is called from the AstGen thread pool, so must acquire /// the Compilation mutex when acting on shared state. -fn updateZirRefs(zcu: *Module, file: *File, old_zir: Zir, path_digest: Cache.BinDigest) !void { +fn updateZirRefs(zcu: *Module, file: *File, file_index: File.Index, old_zir: Zir) !void { const gpa = zcu.gpa; const new_zir = file.zir; @@ -2955,7 +2947,7 @@ fn updateZirRefs(zcu: *Module, file: *File, old_zir: Zir, path_digest: Cache.Bin // iterating over this full set for every updated file. for (zcu.intern_pool.tracked_insts.keys(), 0..) |*ti, idx_raw| { const ti_idx: InternPool.TrackedInst.Index = @enumFromInt(idx_raw); - if (!std.mem.eql(u8, &ti.path_digest, &path_digest)) continue; + if (ti.file != file_index) continue; const old_inst = ti.inst; ti.inst = inst_map.get(ti.inst) orelse { // Tracking failed for this instruction. Invalidate associated `src_hash` deps. @@ -3849,7 +3841,7 @@ fn getFileRootStruct( const decls = file.zir.bodySlice(extra_index, decls_len); extra_index += decls_len; - const tracked_inst = try ip.trackZir(gpa, zcu.filePathDigest(file_index), .main_struct_inst); + const tracked_inst = try ip.trackZir(gpa, file_index, .main_struct_inst); const wip_ty = switch (try ip.getStructType(gpa, .{ .layout = .auto, .fields_len = fields_len, @@ -4151,7 +4143,7 @@ fn semaDecl(zcu: *Zcu, decl_index: Decl.Index) !SemaDeclResult { // Every Decl (other than file root Decls, which do not have a ZIR index) has a dependency on its own source. try sema.declareDependency(.{ .src_hash = try ip.trackZir( gpa, - zcu.filePathDigest(decl.getFileScopeIndex(zcu)), + decl.getFileScopeIndex(zcu), decl_inst, ) }); @@ -4391,17 +4383,19 @@ pub fn importPkg(zcu: *Zcu, mod: *Package.Module) !ImportFileResult { }; } - try zcu.files.ensureUnusedCapacity(gpa, 1); + const ip = &zcu.intern_pool; + + try ip.files.ensureUnusedCapacity(gpa, 1); if (mod.builtin_file) |builtin_file| { keep_resolved_path = true; // It's now owned by import_table. gop.value_ptr.* = builtin_file; try builtin_file.addReference(zcu.*, .{ .root = mod }); const path_digest = computePathDigest(zcu, mod, builtin_file.sub_file_path); - zcu.files.putAssumeCapacityNoClobber(path_digest, .none); + ip.files.putAssumeCapacityNoClobber(path_digest, .none); return .{ .file = builtin_file, - .file_index = @enumFromInt(zcu.files.entries.len - 1), + .file_index = @enumFromInt(ip.files.entries.len - 1), .is_new = false, .is_pkg = true, }; @@ -4431,10 +4425,10 @@ pub fn importPkg(zcu: *Zcu, mod: *Package.Module) !ImportFileResult { const path_digest = computePathDigest(zcu, mod, sub_file_path); try new_file.addReference(zcu.*, .{ .root = mod }); - zcu.files.putAssumeCapacityNoClobber(path_digest, .none); + ip.files.putAssumeCapacityNoClobber(path_digest, .none); return .{ .file = new_file, - .file_index = @enumFromInt(zcu.files.entries.len - 1), + .file_index = @enumFromInt(ip.files.entries.len - 1), .is_new = true, .is_pkg = true, }; @@ -4486,7 +4480,9 @@ pub fn importFile( .is_pkg = false, }; - try zcu.files.ensureUnusedCapacity(gpa, 1); + const ip = &zcu.intern_pool; + + try ip.files.ensureUnusedCapacity(gpa, 1); const new_file = try gpa.create(File); errdefer gpa.destroy(new_file); @@ -4528,10 +4524,10 @@ pub fn importFile( }; const path_digest = computePathDigest(zcu, mod, sub_file_path); - zcu.files.putAssumeCapacityNoClobber(path_digest, .none); + ip.files.putAssumeCapacityNoClobber(path_digest, .none); return .{ .file = new_file, - .file_index = @enumFromInt(zcu.files.entries.len - 1), + .file_index = @enumFromInt(ip.files.entries.len - 1), .is_new = true, .is_pkg = false, }; @@ -4892,7 +4888,7 @@ fn scanDecl(iter: *ScanDeclIter, decl_inst: Zir.Inst.Index) Allocator.Error!void } const parent_file_scope_index = iter.parent_decl.getFileScopeIndex(zcu); - const tracked_inst = try ip.trackZir(gpa, zcu.filePathDigest(parent_file_scope_index), decl_inst); + const tracked_inst = try ip.trackZir(gpa, parent_file_scope_index, decl_inst); // We create a Decl for it regardless of analysis status. @@ -5743,7 +5739,7 @@ fn reportRetryableFileError( const err_msg = try ErrorMsg.create( gpa, .{ - .base_node_inst = try ip.trackZir(gpa, zcu.filePathDigest(file_index), .main_struct_inst), + .base_node_inst = try ip.trackZir(gpa, file_index, .main_struct_inst), .offset = .entire_file, }, format, @@ -6601,13 +6597,16 @@ pub fn fileByIndex(zcu: *const Zcu, i: File.Index) *File { /// Returns the `Decl` of the struct that represents this `File`. pub fn fileRootDecl(zcu: *const Zcu, i: File.Index) Decl.OptionalIndex { - return zcu.files.values()[@intFromEnum(i)]; + const ip = &zcu.intern_pool; + return ip.files.values()[@intFromEnum(i)]; } pub fn setFileRootDecl(zcu: *Zcu, i: File.Index, root_decl: Decl.OptionalIndex) void { - zcu.files.values()[@intFromEnum(i)] = root_decl; + const ip = &zcu.intern_pool; + ip.files.values()[@intFromEnum(i)] = root_decl; } pub fn filePathDigest(zcu: *const Zcu, i: File.Index) Cache.BinDigest { - return zcu.files.keys()[@intFromEnum(i)]; + const ip = &zcu.intern_pool; + return ip.files.keys()[@intFromEnum(i)]; }