From 5843ac52d5c28084bff3fb7a3b3194745c0724a7 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 17 Mar 2025 16:54:36 -0400 Subject: [PATCH 01/20] fix _apply_pure deprecation (#57800) Fix typo from #57532 and ensure "inference world" code has the permitted ability to inspect the max world age, since it needs that info in order to cache results correctly. (cherry picked from commit 7ee404ccc5293a10062c876d2e2c87f9591fdeee) --- base/boot.jl | 2 +- src/gf.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/base/boot.jl b/base/boot.jl index 8cd032817cebe..32975e96af583 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -1017,7 +1017,7 @@ _setparser!(parser) = setglobal!(Core, :_parse, parser) # support for deprecated uses of builtin functions _apply(x...) = _apply_iterate(Main.Base.iterate, x...) -_apply_pure(x...) = invoke_in_world_total(typemax_UInt, x...) +const _apply_pure = _apply const _call_latest = invokelatest const _call_in_world = invoke_in_world diff --git a/src/gf.c b/src/gf.c index cc3966da5f393..f8d88c4e44e38 100644 --- a/src/gf.c +++ b/src/gf.c @@ -532,7 +532,10 @@ JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs) jl_task_t *ct = jl_current_task; size_t last_age = ct->world_age; ct->world_age = jl_typeinf_world; + int last_pure = ct->ptls->in_pure_callback; + ct->ptls->in_pure_callback = 0; jl_value_t *ret = jl_apply(args, nargs); + ct->ptls->in_pure_callback = last_pure; ct->world_age = last_age; return ret; } From 665cf7c7696db8275be6812c6d087cd2fbf44cd5 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Wed, 26 Mar 2025 15:40:00 -0400 Subject: [PATCH 02/20] precompile_utils: Don't auto-enqueue `macro` methods for pre-compilation (#57833) Despite disabling these from being compiled in `gf.c` for dynamic invocations, the pre-compilation code was adding `macro` Methods anyway to the workqueue. Replaces https://github.com/JuliaLang/julia/pull/57782 (cherry picked from commit 6ce51d3c1792c7756c88bda26d75388cb7371540) --- src/precompile_utils.c | 4 ++++ test/precompile.jl | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/src/precompile_utils.c b/src/precompile_utils.c index 281dbe0163586..c602a15c1fb74 100644 --- a/src/precompile_utils.c +++ b/src/precompile_utils.c @@ -170,6 +170,10 @@ static void jl_compile_all_defs(jl_array_t *mis, int all) size_t i, l = jl_array_nrows(allmeths); for (i = 0; i < l; i++) { jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i); + int is_macro_method = jl_symbol_name(m->name)[0] == '@'; + if (is_macro_method && !all) + continue; // Avoid inference / pre-compilation for macros + if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) { // method has a single compilable specialization, e.g. its definition // signature is concrete. in this case we can just hint it. diff --git a/test/precompile.jl b/test/precompile.jl index 7c5c63a277e27..f7b31c125014c 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -2416,4 +2416,9 @@ precompile_test_harness("Package top-level load itself") do load_path end end +# Verify that inference / caching was not performed for any macros in the sysimage +let m = only(methods(Base.var"@big_str")) + @test m.specializations === Core.svec() || !isdefined(m.specializations, :cache) +end + finish_precompile_test!() From 383a8b8bd926b4b9a76b51ea73328e598aaac932 Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Thu, 27 Mar 2025 15:12:20 +0100 Subject: [PATCH 03/20] `Compiler`: `abstract_eval_invoke_inst`: type assert `Expr` (#57860) Should make the code less vulnerable to invalidation. (cherry picked from commit ed3fccc0be0f3cf48ba8f0910e5a5faa58280787) --- Compiler/src/ssair/irinterp.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Compiler/src/ssair/irinterp.jl b/Compiler/src/ssair/irinterp.jl index 084f28f0aa523..3d72da72625be 100644 --- a/Compiler/src/ssair/irinterp.jl +++ b/Compiler/src/ssair/irinterp.jl @@ -32,7 +32,7 @@ function concrete_eval_invoke(interp::AbstractInterpreter, ci::CodeInstance, arg end function abstract_eval_invoke_inst(interp::AbstractInterpreter, inst::Instruction, irsv::IRInterpretationState) - stmt = inst[:stmt] + stmt = inst[:stmt]::Expr ci = stmt.args[1] if ci isa MethodInstance world = frame_world(irsv) From 4ffd73574d42defa32d9bdd7dbacfc864ce6c33c Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Thu, 27 Mar 2025 19:29:55 +0100 Subject: [PATCH 04/20] `Compiler`: `walk_to_defs`, `collect_leaves`: specialize for `predecessors` (#57859) Should make the code less vulnerable to invalidation. (cherry picked from commit d9441ac9bc055562ba9009357b465e69c16ade68) --- Compiler/src/ssair/passes.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Compiler/src/ssair/passes.jl b/Compiler/src/ssair/passes.jl index 14fc0ab20913c..46ed299167060 100644 --- a/Compiler/src/ssair/passes.jl +++ b/Compiler/src/ssair/passes.jl @@ -183,7 +183,7 @@ function find_def_for_use( end function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice, - predecessors = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing)) + predecessors::Pre = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing)) where {Pre} if isa(val, Union{OldSSAValue, SSAValue}) val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint) end @@ -271,7 +271,7 @@ Starting at `val` walk use-def chains to get all the leaves feeding into this `v `predecessors(def, compact)` is a callback which should return the set of possible predecessors for a "phi-like" node (PhiNode or Core.ifelse) or `nothing` otherwise. """ -function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors, 𝕃ₒ::AbstractLattice) +function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors::Pre, 𝕃ₒ::AbstractLattice) where {Pre} visited_philikes = AnySSAValue[] isa(defssa, AnySSAValue) || return Any[defssa], visited_philikes def = compact[defssa][:stmt] From a2460d320043d7d0afec2fb4915981c465110621 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Fri, 28 Mar 2025 01:56:26 -0300 Subject: [PATCH 05/20] only update fragmentation data for pages that are not lazily freed (#57907) We are including data from lazily freed pages into this metric, which makes it inaccurate. Thanks @qinsoon for spotting it. (cherry picked from commit d6fdbf5212aaf270749138db2e285141e4de1dc6) --- src/gc-stock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-stock.c b/src/gc-stock.c index 3b49b82caf530..66a1724ecf4ce 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -948,6 +948,7 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ done: if (re_use_page) { + gc_update_page_fragmentation_data(pg); push_lf_back(allocd, pg); } else { @@ -956,7 +957,6 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ push_lf_back(&global_page_pool_lazily_freed, pg); } gc_page_profile_write_to_file(s); - gc_update_page_fragmentation_data(pg); gc_time_count_page(freedall, pg_skpd); jl_ptls_t ptls = jl_current_task->ptls; // Note that we aggregate the `pool_live_bytes` over all threads before returning this From 19abebf9dea7f5ddc4914da3b087da69aeb87154 Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Fri, 28 Mar 2025 12:26:13 +0100 Subject: [PATCH 06/20] `Random`: `show` method for `MersenneTwister`: invalidation resistance (#57913) Avoid using or constructing the vector with nonconcrete element type. Should make the sysimage more resistant to method invalidation. (cherry picked from commit 8e03cb11c8460b14ccb1553cb11343c3f39a6774) --- stdlib/Random/src/RNGs.jl | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 7782de88ba537..2ea2fb3a684df 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -147,21 +147,26 @@ function show(io::IO, rng::MersenneTwister) end print(io, MersenneTwister, "(", repr(rng.seed), ", (") # state - adv = Integer[rng.adv_jump, rng.adv] + sep = ", " + show(io, rng.adv_jump) + print(io, sep) + show(io, rng.adv) if rng.adv_vals != -1 || rng.adv_ints != -1 - if rng.adv_vals == -1 - @assert rng.idxF == MT_CACHE_F - push!(adv, 0, 0) # "(0, 0)" is nicer on the eyes than (-1, 1002) - else - push!(adv, rng.adv_vals, rng.idxF) - end + # "(0, 0)" is nicer on the eyes than (-1, 1002) + s = rng.adv_vals != -1 + print(io, sep) + show(io, s ? rng.adv_vals : zero(rng.adv_vals)) + print(io, sep) + show(io, s ? rng.idxF : zero(rng.idxF)) end if rng.adv_ints != -1 idxI = (length(rng.ints)*16 - rng.idxI) / 8 # 8 represents one Int64 idxI = Int(idxI) # idxI should always be an integer when using public APIs - push!(adv, rng.adv_ints, idxI) + print(io, sep) + show(io, rng.adv_ints) + print(io, sep) + show(io, idxI) end - join(io, adv, ", ") print(io, "))") end From 5a7f1b5ae823c109b3689fc1d9f603916d231471 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 28 Mar 2025 11:36:50 -0300 Subject: [PATCH 07/20] Teach alloc-opt to handle atomics a bit better (#57208) Fixes https://github.com/JuliaLang/julia/issues/57190 The fact that this passes thinks memcpy is a potential issue is quite annoying so it deserves a decent refactor, which flows through the type information from julia instead of trying to regenerate it on site, specially given that opaque pointers means we can't really instrospect into pointers at all (cherry picked from commit 2c7527b1aa27b8bf523af95289767b66eac5796e) --- src/llvm-alloc-helpers.cpp | 1 + src/llvm-alloc-opt.cpp | 5 +- test/atomics.jl | 11 ++ test/llvmpasses/alloc-opt-pass.ll | 180 +++++++++++++++++++++++++----- 4 files changed, 171 insertions(+), 26 deletions(-) diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index 194c6837860ca..a1ed66a190190 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -214,6 +214,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r } if (auto call = dyn_cast(inst)) { // TODO handle `memcmp` + // TODO handle `memcpy` which is used a lot more often since opaque pointers // None of the intrinsics should care if the memory is stack or heap allocated. auto callee = call->getCalledOperand(); if (auto II = dyn_cast(call)) { diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 7dd794a4d8847..ce1d22f42d0ae 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -742,7 +742,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF auto replace_inst = [&] (Instruction *user) { Instruction *orig_i = cur.orig_i; Instruction *new_i = cur.new_i; - if (isa(user) || isa(user)) { + if (isa(user) || isa(user) || + isa(user) || isa(user)) { + // TODO: these atomics are likely removable if the user is the first argument user->replaceUsesOfWith(orig_i, new_i); } else if (auto call = dyn_cast(user)) { @@ -1111,6 +1113,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst) return; } else if (isa(user) || isa(user)) { + // TODO: Downgrade atomics here potentially auto slot_idx = find_slot(offset); auto &slot = slots[slot_idx]; assert(slot.offset <= offset && slot.offset + slot.size >= offset); diff --git a/test/atomics.jl b/test/atomics.jl index 7e9f29c23ca10..2d4a713b1d30d 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1099,3 +1099,14 @@ test_once_undef(Any) test_once_undef(Union{Nothing,Integer}) test_once_undef(UndefComplex{Any}) test_once_undef(UndefComplex{UndefComplex{Any}}) + +mutable struct Atomic57190 + @atomic x::Int +end + + +function add_one57190!() + @atomic (Atomic57190(0).x) += 1 +end + +@test add_one57190!() == 1 diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll index 665687e86835d..83f2118412cc1 100644 --- a/test/llvmpasses/alloc-opt-pass.ll +++ b/test/llvmpasses/alloc-opt-pass.ll @@ -73,6 +73,11 @@ L3: ; preds = %L2, %L1, %0 } ; CHECK-LABEL: }{{$}} +declare void @external_function() + +declare ptr addrspace(10) @external_function2() + + ; CHECK-LABEL: @legal_int_types ; CHECK: alloca [12 x i8] ; CHECK-NOT: alloca i96 @@ -89,21 +94,6 @@ define void @legal_int_types() { } ; CHECK-LABEL: }{{$}} -declare void @external_function() - -declare ptr addrspace(10) @external_function2() - -declare ptr @julia.ptls_states() - -declare ptr @julia.get_pgcstack() - -declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) - -declare ptr @julia.pointer_from_objref(ptr addrspace(11)) - -declare token @llvm.julia.gc_preserve_begin(...) - -declare void @llvm.julia.gc_preserve_end(token) ; CHECK-LABEL: @memref_collision ; OPAQUE: call ptr @julia.ptls_states() @@ -171,13 +161,13 @@ define void @initializers() { %pgcstack = call ptr @julia.get_pgcstack() %ptls = call ptr @julia.ptls_states() %ptls_i8 = bitcast ptr %ptls to ptr - %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1 + %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #4 %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11) %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2) - %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2 + %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #7 %var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11) %var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5) - %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3 + %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #1 %var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11) %var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8) ret void @@ -203,14 +193,154 @@ union_move9: ; No predecessors! } ; CHECK-LABEL: }{{$}} +@0 = private unnamed_addr constant ptr inttoptr (i64 4373799056 to ptr), !julia.constgv !0 +@1 = private unnamed_addr constant i64 0, align 8 + +; CHECK-LABEL: @cmpxchg +; CHECK: alloca +; CHECK: alloca +; CHECK: %20 = cmpxchg ptr %2, +define swiftcc i64 @"cmpxchg"(ptr nonnull swiftself %0) #0 { + %2 = alloca i64, align 16 + %3 = call ptr @julia.get_pgcstack() + %4 = getelementptr inbounds i8, ptr %3, i32 -152 + %5 = getelementptr inbounds i8, ptr %4, i32 168 + %6 = load ptr, ptr %5, align 8, !tbaa !4 + %7 = getelementptr inbounds i8, ptr %6, i32 16 + %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0 + fence syncscope("singlethread") seq_cst + call void @julia.safepoint(ptr %8) + fence syncscope("singlethread") seq_cst + %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19 + %10 = ptrtoint ptr %9 to i64 + %11 = inttoptr i64 %10 to ptr + %12 = getelementptr inbounds i8, ptr %3, i32 -152 + %13 = addrspacecast ptr %11 to ptr addrspace(10) + call void @llvm.lifetime.start.p0(i64 8, ptr %2) + %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7 + %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24 + %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24 + br label %19 + +18: ; preds = %19 + ret i64 %21 + +19: ; preds = %19, %1 + %20 = phi i64 [ %17, %1 ], [ %23, %19 ] + %21 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %20, i64 signext 1) + %22 = cmpxchg ptr addrspace(11) %16, i64 %20, i64 %21 seq_cst monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24 + %23 = extractvalue { i64, i1 } %22, 0 + %24 = extractvalue { i64, i1 } %22, 1 + br i1 %24, label %18, label %19 +} + +; CHECK-LABEL: }{{$}} +; CHECK-LABEL: @atomicrmw +; CHECK: alloca +; CHECK: alloca +; CHECK: atomicrmw xchg ptr %2, +define swiftcc i64 @"atomicrmw"(ptr nonnull swiftself %0) #0 { + %2 = alloca i64, align 16 + %3 = call ptr @julia.get_pgcstack() + %4 = getelementptr inbounds i8, ptr %3, i32 -152 + %5 = getelementptr inbounds i8, ptr %4, i32 168 + %6 = load ptr, ptr %5, align 8, !tbaa !4 + %7 = getelementptr inbounds i8, ptr %6, i32 16 + %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0 + fence syncscope("singlethread") seq_cst + call void @julia.safepoint(ptr %8) + fence syncscope("singlethread") seq_cst + %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19 + %10 = ptrtoint ptr %9 to i64 + %11 = inttoptr i64 %10 to ptr + %12 = getelementptr inbounds i8, ptr %3, i32 -152 + %13 = addrspacecast ptr %11 to ptr addrspace(10) + call void @llvm.lifetime.start.p0(i64 8, ptr %2) + %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7 + %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24 + %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24 + %18 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %17, i64 signext 1) + %19 = atomicrmw xchg ptr addrspace(11) %16, i64 %18 seq_cst, align 8, !tbaa !25, !alias.scope !23, !noalias !24 ; preds = %19 + ret i64 %19 +} + +declare ptr @julia.ptls_states() + +declare ptr @julia.pointer_from_objref(ptr addrspace(11)) + +declare token @llvm.julia.gc_preserve_begin(...) + +declare void @llvm.julia.gc_preserve_end(token) + +declare ptr @julia.get_pgcstack() + +; Function Attrs: mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) +declare nonnull align 8 dereferenceable(8) ptr addrspace(10) @ijl_box_int64(i64 signext) #2 + +; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) +declare void @julia.safepoint(ptr) #3 + +; Function Attrs: mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) +declare noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) #4 + ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5 + ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #5 + ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5 + +declare swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself, i64 signext, i64 signext) #0 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6 + +attributes #0 = { "probe-stack"="inline-asm" } +attributes #1 = { nounwind willreturn allockind("alloc,zeroed") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) } +attributes #2 = { mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +attributes #4 = { mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) } +attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #7 = { nounwind willreturn allockind("alloc,uninitialized") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) } +attributes #8 = { nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.module.flags = !{!1, !2, !3} + +!0 = !{} +!1 = !{i32 2, !"Dwarf Version", i32 4} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 2, !"julia.optlevel", i32 2} +!4 = !{!5, !5, i64 0} +!5 = !{!"jtbaa_gcframe", !6, i64 0} +!6 = !{!"jtbaa", !7, i64 0} +!7 = !{!"jtbaa"} +!8 = !{!9, !9, i64 0, i64 1} +!9 = !{!"jtbaa_const", !6, i64 0} +!10 = !{!11} +!11 = !{!"jnoalias_const", !12} +!12 = !{!"jnoalias"} +!13 = !{!14, !15, !16, !17} +!14 = !{!"jnoalias_gcframe", !12} +!15 = !{!"jnoalias_stack", !12} +!16 = !{!"jnoalias_data", !12} +!17 = !{!"jnoalias_typemd", !12} +!18 = !{i64 56} +!19 = !{i64 16} +!20 = !{!21, !21, i64 0} +!21 = !{!"jtbaa_value", !22, i64 0} +!22 = !{!"jtbaa_data", !6, i64 0} +!23 = !{!16} +!24 = !{!14, !15, !17, !11} +!25 = !{!26, !26, i64 0} +!26 = !{!"jtbaa_mutab", !21, i64 0} -attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -attributes #1 = { allockind("alloc") } -attributes #2 = { allockind("alloc,uninitialized") } -attributes #3 = { allockind("alloc,zeroed") } From 42e5e4724b27ef5e66e4dfae1e3ed8eef3293b38 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 28 Mar 2025 19:07:23 +0100 Subject: [PATCH 08/20] Refactor IOBuffer code (#57570) I've been a little frustrated with the IOBuffer code. It contains a whole bunch of implicit invariants, and is poorly commented. It also has several bugs that ultimately stems from the code being unclear about its own assumptions. This is a refactoring of IOBuffer. The primary goals are: * Comment the code more heavily * Test the code more thoroughly The secondary goals are * Fix a few outstanding bugs * Add some minor performance improvements This is a purely internal refactoring with be no change in behaviour of `IOBuffer`, except straight up bugfixes. However, note that previous code may have relied on buggy behaviour. Fixing bugs may therefore cause breakage. ## Current changes ### **BEHAVIOUR CHANGES** * The following code used to not throw an error, but now does: `IOBuffer(b"abc"; maxsize=2)`. I consider this a bugfix. It should not be possible to construct an IOBuffer with a buffersize larger than `maxsize`. * It used to be possible to write to indices higher than `maxindex`, which could trigger a bug causing data loss. The bug has been fixed, but as a result, some IOBuffers may reach full capacity faster (really: reach it at the correct point), changing writing behaviour. ### Bugfixes * Do not corrupt data on `copyline` on a non-appending buffer * Respect `maxsize` even after `take!` (fix #57549) * Fix bug when copying from an appending iobuffer to itself * Fix bug where re-allocating the buffer may cause it to shrink, discarding data. * Fix bug where `truncate` may throw a wrong BoundsError * Fix a bug where truncating a buffer may not correctly removed mark at position that has been deleted * Fix a bug where initializing an IOBuffer without an explicit buffer and with `truncate=false` makes it contain the full buffer * Current behaviour for `reset` and `position` did not work for `PipeBuffer`. Fix that ### Changes to brittle code * Removed some tests that explicitly tested internal code and internal behaviour. Some of that behaviour has changed. * Changed some internal `PipeBuffer` behaviour, which did not respect writable IOBuffer's ownership of their buffer and therefore failed spuriously ### Performance improvements * Writing to dense IOBuffer now uses memmove and is up to 10x faster for long writes. * Minor optimisations (about ten percent) for writing to IOBuffers in general Closes #57549 Co-authored-by: Jameson Nash (cherry picked from commit 54197132f530bd7e7dbc84cfe56daa0b43bcfb0c) --- base/iobuffer.jl | 770 +++++++++++++++++++++++++++++++++-------------- base/stream.jl | 13 +- test/iobuffer.jl | 450 ++++++++++++++++++++++----- 3 files changed, 925 insertions(+), 308 deletions(-) diff --git a/base/iobuffer.jl b/base/iobuffer.jl index 144b0a20568e9..5e08a21d53186 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -1,45 +1,168 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -## work with AbstractVector{UInt8} via I/O primitives ## +# IOBuffer is a Memory{UInt8} backed IO type for in-memory IO. + +# Here, u represents used bytes (already read), X represents bytes still to read, +# - represents bytes uninitialized data but which can be written to later. +# . represents bytes before offset, which the buffer will not touch, until +# a write operation happens. + +# .....uuuuuuuuuuuuuXXXXXXXXXXXXX------------ +# | | | | | | +# | offset ptr size | maxsize +# 1 lastindex(data) + +# N.B: `mark` does not correspond to any index in the buffer. Instead, it stores +# the mark at virtual offset in the buffer. + +# AFTER COMPACTION + +# XXXXXXXXXXXXX-------------------------- +# || | | | | +# |1 ptr size | maxsize +# | lastindex(data) +# offset (set to zero) + +# * The underlying array is always 1-indexed +# * The IOBuffer has full control (ownership) of the underlying array, only when +# buffer.write == true. +# * Unreachable data can be deleted in the buffer's data, shifting the whole thing to the left +# to make room for more data, without replacing or resizing data. +# This can be done only if the buffer is not seekable -# Stateful string mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO - data::T # T should support: getindex, setindex!, length, copyto!, similar, and (optionally) resize! - reinit::Bool # if true, data needs to be re-allocated (after take!) + # T should support: getindex, setindex!, length, copyto!, similar, size and (optionally) resize! + data::T + + # The user can take control of `data` out of this struct. When that happens, instead of eagerly allocating + # a new array, we set `.reinit` to true, and then allocate a new one when needed. + # If reinit is true, the buffer is writable, and offset_or_compacted and size is zero. See `take!` + reinit::Bool readable::Bool writable::Bool - seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data - append::Bool # add data at end instead of at pointer - size::Int # end pointer (and write pointer if append == true) + offset - maxsize::Int # fixed array size (typically pre-allocated) - ptr::Int # read (and maybe write) pointer + offset - offset::Int # offset of ptr and size from actual start of data and actual size - mark::Int # reset mark location for ptr (or <0 for no mark) - function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool, - maxsize::Integer) where T<:AbstractVector{UInt8} - require_one_based_indexing(data) - return new(data, false, readable, writable, seekable, append, length(data), maxsize, 1, 0, -1) - end + # If not seekable, implementation is free to destroy (compact) data before ptr, unless + # it can be recovered using the mark by using `reset`. + # If it IS seekable, the user may always recover any data in 1:size by seeking, + # so no data can be destroyed. + # Non-seekable IOBuffers can only be constructed with `PipeBuffer`, which are writable, + # readable and append. + seekable::Bool + + # If true, write new data to the index size+1 instead of the index ptr. + append::Bool + + # Last index of `data` that has been written to. Data in size+1:end has not yet been used, + # and may contain arbitrary values. + # This value is always in 0 : lastindex(data) + size::Int + + # When the buffer is resized, or a new buffer allocated, this is the maximum size of the buffer. + # A new GenericIOBuffer may be constructed with an existing data larger than `maxsize`. + # When that happensm we must make sure to not have more than `maxsize` bytes in the buffer, + # else reallocating will lose data. So, never write to indices > `maxsize + get_offset(io)` + # This value is always in 0:typemax(Int). + maxsize::Int + + # Data is read/written from/to ptr, except in situations where append is true, in which case + # data is still read from ptr, but written to size+1. + # This value is always in offset + 1 : size+1 + ptr::Int + + # This field has two distinct meanings: + # If the value is positive, it encodes an offset of the start of the data in `data`. + # This is used if the buffer is instantiated from a Vector with non-zero memory offset. + # Then, the IOBuffer stores the underlying memory, and so the first data in the buffer + # is not at index 1. + # If the value is negative, then `-io.offset_or_compacted` gets the number of compacted + # bytes. That's the number of unused bytes deleted from a non-seekable stream to make space. + # We need to keep track of it in order to make `mark` and `position` etc work, that is, + # we need to know the virtual position of the mark even when an arbitrary number + # of unused bytes has been deleted due to compaction. + # Since compaction will move data in the buffer and thereby zero the offset, either the + # offset or the number of compacted bytes will be zero at any point, so both can be + # stored in one field. + # If offset: Value is always in 0:lastindex(data) + # If compacted: Value is in typemin(Int):0 + offset_or_compacted::Int + + # The mark is -1 if not set, else the zero-indexed virtual position of ptr in the buffer. + # Due to compaction and offset, this value is not an index into the buffer, but may be translated + # to an index. + # This value is in -1:typemax(Int) + mark::Int + + # Unsafe constructor which does not do any checking + global function _new_generic_iobuffer( + ::Type{T}, + data::T, + readable::Bool, + writable::Bool, + seekable::Bool, + append::Bool, + maxsize::Int, + ) where T<:AbstractVector{UInt8} + len = Int(length(data))::Int + return new{T}(data, false, readable, writable, seekable, append, len, maxsize, 1, 0, -1) + end +end + +function GenericIOBuffer{T}( + data::T, + readable::Bool, + writable::Bool, + seekable::Bool, + append::Bool, + maxsize::Integer, + truncate::Bool, + ) where T<:AbstractVector{UInt8} + require_one_based_indexing(data) + mz = Int(maxsize)::Int + len = Int(length(data))::Int + if !truncate && mz < len + throw(ArgumentError("maxsize must not be smaller than data length")) + end + buf = _new_generic_iobuffer(T, data, readable, writable, seekable, append, mz) + if truncate + buf.size = buf.offset_or_compacted + end + buf end const IOBuffer = GenericIOBuffer{Memory{UInt8}} function GenericIOBuffer(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool, - maxsize::Integer) where T<:AbstractVector{UInt8} - GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize) + maxsize::Integer, truncate::Bool) where T<:AbstractVector{UInt8} + GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize, truncate) end + +# For this method, we use the underlying Memory of the vector. Therefore, we need to set the, +# ptr and size accordingly, so the buffer only uses the part of the memory that the vector does. function GenericIOBuffer(data::Vector{UInt8}, readable::Bool, writable::Bool, seekable::Bool, append::Bool, - maxsize::Integer) + maxsize::Integer, truncate::Bool) ref = data.ref - buf = GenericIOBuffer(ref.mem, readable, writable, seekable, append, maxsize) + mem = ref.mem offset = memoryrefoffset(ref) - 1 - buf.ptr += offset - buf.size = length(data) + offset - buf.offset = offset + # The user may pass a vector of length <= maxsize, but where the underlying memory + # is larger than maxsize. Don't throw an error in that case. + mz = Int(maxsize)::Int + if !truncate && mz < length(data) + throw(ArgumentError("maxsize must not be smaller than data length")) + end + buf = _new_generic_iobuffer(Memory{UInt8}, mem, readable, writable, seekable, append, mz) + buf.offset_or_compacted = offset + buf.ptr = offset + 1 + if truncate + buf.size = offset + else + buf.size = length(data) + offset + end return buf end +get_offset(io::GenericIOBuffer) = max(0, io.offset_or_compacted) +get_compacted(io::GenericIOBuffer) = max(0, -io.offset_or_compacted) + # allocate Vector{UInt8}s for IOBuffer storage that can efficiently become Strings StringMemory(n::Integer) = unsafe_wrap(Memory{UInt8}, _string_n(n)) StringVector(n::Integer) = wrap(Array, StringMemory(n)) @@ -111,17 +234,11 @@ function IOBuffer( truncate::Union{Bool,Nothing}=nothing, maxsize::Integer=typemax(Int), sizehint::Union{Integer,Nothing}=nothing) - if maxsize < 0 - throw(ArgumentError("negative maxsize")) - end if sizehint !== nothing sizehint!(data, sizehint) end flags = open_flags(read=read, write=write, append=append, truncate=truncate) - buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, Int(maxsize)) - if flags.truncate - buf.size = buf.offset - end + buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, maxsize, flags.truncate) return buf end @@ -131,17 +248,23 @@ function IOBuffer(; append::Union{Bool,Nothing}=nothing, truncate::Union{Bool,Nothing}=true, maxsize::Integer=typemax(Int), - sizehint::Union{Integer,Nothing}=nothing) - size = sizehint !== nothing ? Int(sizehint) : maxsize != typemax(Int) ? Int(maxsize) : 32 + sizehint::Union{Integer,Nothing}=nothing, + ) + mz = Int(maxsize)::Int + if mz < 0 + throw(ArgumentError("negative maxsize")) + end + size = if sizehint !== nothing + # Allow negative sizehint, just like `sizehint!` does + min(mz, max(0, Int(sizehint)::Int)) + else + min(mz, 32) + end flags = open_flags(read=read, write=write, append=append, truncate=truncate) - buf = IOBuffer( - StringMemory(size), - read=flags.read, - write=flags.write, - append=flags.append, - truncate=flags.truncate, - maxsize=maxsize) - fill!(buf.data, 0) + # A common usecase of IOBuffer is to incrementally construct strings. By using StringMemory + # as the default storage, we can turn the result into a string without copying. + buf = _new_generic_iobuffer(Memory{UInt8}, StringMemory(size), flags.read, flags.write, true, flags.append, mz) + buf.size = 0 return buf end @@ -158,21 +281,53 @@ If `data` is given, creates a `PipeBuffer` to operate on a data vector, optionally specifying a size beyond which the underlying `Array` may not be grown. """ PipeBuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int)) = - GenericIOBuffer(data, true, true, false, true, maxsize) + GenericIOBuffer(data, true, true, false, true, maxsize, false) PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringMemory(maxsize), maxsize = maxsize); x.size = 0; x) +# Internal method where truncation IS supported +function _truncated_pipebuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int)) + buf = PipeBuffer(data) + buf.size = get_offset(buf) + buf.maxsize = maxsize + buf +end + _similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len) _similar_data(b::IOBuffer, len::Int) = StringMemory(len) -function copy(b::GenericIOBuffer) - ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ? - copyto!(_similar_data(b, length(b.data)), b.data) : b.data, - b.readable, b.writable, b.seekable, b.append, b.maxsize) - ret.size = b.size - ret.ptr = b.ptr - ret.mark = b.mark - ret.offset = b.offset - return ret +# Note: Copying may change the value of the position (and mark) for un-seekable streams. +# However, these values are not stable anyway due to compaction. + +function copy(b::GenericIOBuffer{T}) where T + if b.reinit + # If buffer is used up, allocate a new size-zero buffer + # Reinit implies writable, and that ptr, size, offset and mark are already the default values + return typeof(b)(_similar_data(b, 0), b.readable, b.writable, b.seekable, b.append, b.maxsize, false) + elseif b.writable + # Else, we just copy the reachable bytes. If buffer is seekable, all bytes + # after offset are reachable, since they can be seeked to + used_span = get_used_span(b) + compacted = first(used_span) - get_offset(b) - 1 + len = length(used_span) + data = copyto!(_similar_data(b, len), view(b.data, used_span)) + ret = typeof(b)(data, b.readable, b.writable, b.seekable, b.append, b.maxsize, false) + ret.size = len + # Copying data over implicitly compacts, and may add compaction + ret.offset_or_compacted = -get_compacted(b) - compacted + ret.ptr = b.ptr - first(used_span) + 1 + ret.mark = b.mark + return ret + else + # When the buffer is just readable, they can share the same data, so we just make + # a shallow copy of the IOBuffer struct. + # Use internal constructor because we want to allow b.maxsize to be larger than data, + # in case that is the case for `b`. + ret = _new_generic_iobuffer(T, b.data, b.readable, b.writable, b.seekable, b.append, b.maxsize) + ret.offset_or_compacted = b.offset_or_compacted + ret.ptr = b.ptr + ret.mark = b.mark + return ret + end end show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ", @@ -180,9 +335,9 @@ show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ", "writable=", b.writable, ", ", "seekable=", b.seekable, ", ", "append=", b.append, ", ", - "size=", b.size - b.offset, ", ", + "size=", b.size - get_offset(b), ", ", "maxsize=", b.maxsize == typemax(Int) ? "Inf" : b.maxsize, ", ", - "ptr=", b.ptr - b.offset, ", ", + "ptr=", b.ptr - get_offset(b), ", ", "mark=", b.mark, ")") @noinline function _throw_not_readable() @@ -192,7 +347,7 @@ end function unsafe_read(from::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt) from.readable || _throw_not_readable() - avail = bytesavailable(from) + avail = bytesavailable(from) % UInt adv = min(avail, nb) unsafe_read!(p, from.data, from.ptr, adv) from.ptr += adv @@ -221,7 +376,45 @@ function unsafe_read!(dest::Ptr{UInt8}, src::DenseBytes, so::Integer, nbytes::UI nothing end -function peek(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}}) +const MultiByteBitNumberType = Union{ + Type{UInt16}, + Type{Int16}, + Type{UInt32}, + Type{Int32}, + Type{UInt64}, + Type{Int64}, + Type{UInt128}, + Type{Int128}, + Type{Float16}, + Type{Float32}, + Type{Float64}, +} + +function load_from_array(T::MultiByteBitNumberType, data::AbstractArray{UInt8}, from::Int) + x = if T <: AbstractFloat + uinttype(T)(0) + else + unsigned(T)(0) + end + for i in 0:sizeof(x)-1 + x |= typeof(x)(data[from + i]) << (8 * i) + end + reinterpret(T, ltoh(x)) +end + +function peek(from::GenericIOBuffer, T::MultiByteBitNumberType) + from.readable || _throw_not_readable() + avail = bytesavailable(from) + nb = sizeof(T) + if nb > avail + throw(EOFError()) + end + return load_from_array(T, from.data, from.ptr) +end + +# This method can use a pointer, since the underlying buffer is dense +# and memory backed +function peek(from::GenericIOBuffer{<:MutableDenseArrayType}, T::MultiByteBitNumberType) from.readable || _throw_not_readable() avail = bytesavailable(from) nb = sizeof(T) @@ -235,29 +428,12 @@ function peek(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int3 return x end -function read(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}}) +function read(from::GenericIOBuffer, T::MultiByteBitNumberType) x = peek(from, T) from.ptr += sizeof(T) return x end -function read_sub(from::GenericIOBuffer, a::AbstractArray{T}, offs, nel) where T - require_one_based_indexing(a) - from.readable || _throw_not_readable() - if offs+nel-1 > length(a) || offs < 1 || nel < 0 - throw(BoundsError()) - end - if isa(a, MutableDenseArrayType{UInt8}) - nb = UInt(nel * sizeof(T)) - GC.@preserve a unsafe_read(from, pointer(a, offs), nb) - else - for i = offs:offs+nel-1 - a[i] = read(from, T) - end - end - return a -end - @inline function read(from::GenericIOBuffer, ::Type{UInt8}) from.readable || _throw_not_readable() ptr = from.ptr @@ -283,20 +459,35 @@ read(from::GenericIOBuffer, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(fro isreadable(io::GenericIOBuffer) = io.readable iswritable(io::GenericIOBuffer) = io.writable -filesize(io::GenericIOBuffer) = (io.seekable ? io.size - io.offset : bytesavailable(io)) +# Number of bytes that can be read from the buffer, if you seek to the start first. +filesize(io::GenericIOBuffer) = (io.seekable ? io.size - get_offset(io) : bytesavailable(io)) + +# Number of bytes that can be read from the buffer. bytesavailable(io::GenericIOBuffer) = io.size - io.ptr + 1 -position(io::GenericIOBuffer) = io.ptr - io.offset - 1 + +# TODO: Document that position for an unmarked and unseekable stream is invalid (and make it error?) +function position(io::GenericIOBuffer) + # Position is zero-indexed, but ptr is one-indexed, hence the -1 + io.ptr - io.offset_or_compacted - 1 +end function skip(io::GenericIOBuffer, n::Integer) skip(io, clamp(n, Int)) end + function skip(io::GenericIOBuffer, n::Int) + # In both cases, the result will never go to before the first position, + # nor beyond the last position, and will not throw an error unless the stream + # is not seekable and try to skip a negative number of bytes. if signbit(n) + # Skipping a negative number of bytes is equivalent to seeking backwards. seekto = clamp(widen(position(io)) + widen(n), Int) seek(io, seekto) # Does error checking else - n_max = io.size + 1 - io.ptr - io.ptr += min(n, n_max) + # Don't use seek in order to allow a non-seekable IO to still skip bytes. + # Handle overflow. + maxptr = io.size + 1 + io.ptr = n > maxptr || io.ptr - n > maxptr ? maxptr : io.ptr + n io end end @@ -304,16 +495,30 @@ end function seek(io::GenericIOBuffer, n::Integer) seek(io, clamp(n, Int)) end + +function translate_seek_position(io::GenericIOBuffer, n::Int) + # If there is an offset (the field F is positive), then there are F unused bytes at the beginning + # of the data, and we need to seek to n + F + 1. (Also compensate for `seek` being zero- + # indexed) + + # If bytes has been compacted (field F is negative), then F bytes has been deleted from + # the buffer, and a virtual position n means a position n + F in the data. + # Remember that F is negative, so n + F is subtracting from n. So we also end up with + # n + F + 1. + clamp(widen(n) + widen(io.offset_or_compacted) + widen(1), Int) +end + function seek(io::GenericIOBuffer, n::Int) if !io.seekable ismarked(io) || throw(ArgumentError("seek failed, IOBuffer is not seekable and is not marked")) n == io.mark || throw(ArgumentError("seek failed, IOBuffer is not seekable and n != mark")) end + # TODO: REPL.jl relies on the fact that this does not throw (by seeking past the beginning or end # of an GenericIOBuffer), so that would need to be fixed in order to throw an error here - #(n < 0 || n > io.size - io.offset) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries.")) - #io.ptr = n + io.offset + 1 - io.ptr = clamp(n, 0, io.size - io.offset) + io.offset + 1 + max_ptr = io.size + 1 + min_ptr = get_offset(io) + 1 + io.ptr = clamp(translate_seek_position(io, n), min_ptr, max_ptr) return io end @@ -322,113 +527,163 @@ function seekend(io::GenericIOBuffer) return io end -# choose a resize strategy based on whether `resize!` is defined: -# for a Vector, we use `resize!`, but for most other types, -# this calls `similar`+copy -function _resize!(io::GenericIOBuffer, sz::Int) - a = io.data - offset = io.offset - if applicable(resize!, a, sz) - if offset != 0 - size = io.size - size > offset && copyto!(a, 1, a, offset + 1, min(sz, size - offset)) - io.ptr -= offset - io.size -= offset - io.offset = 0 - end - resize!(a, sz) +# Resize the io's data to `new_size`, which must not be > io.maxsize. +# Use `resize!` if the data supports it, else reallocate a new one and +# copy the old data over. +# If not `exact` and resizing is not supported, overallocate in order to +# prevent excessive resizing. +function _resize!(io::GenericIOBuffer, new_size::Int, exact::Bool) + old_data = io.data + if applicable(resize!, old_data, new_size) + resize!(old_data, new_size) else - size = io.size - if size >= sz && sz != 0 - b = a - else - b = _similar_data(io, sz == 0 ? 0 : max(overallocation(size - io.offset), sz)) - end - size > offset && copyto!(b, 1, a, offset + 1, min(sz, size - offset)) - io.data = b - io.ptr -= offset - io.size -= offset - io.offset = 0 + new_size = exact ? new_size : min(io.maxsize, overallocation(new_size)) + used_span = get_used_span(io) + deleted = first(used_span) - 1 + compacted = deleted - get_offset(io) + new_data = _similar_data(io, new_size) + io.data = new_data + iszero(new_size) && return io + len_used = length(used_span) + iszero(len_used) || copyto!(new_data, 1, old_data, first(used_span), len_used) + # Copying will implicitly compact, and so compaction must be updated + io.offset_or_compacted = -get_compacted(io) - compacted + io.ptr -= deleted + io.size = len_used end return io end function truncate(io::GenericIOBuffer, n::Integer) io.writable || throw(ArgumentError("truncate failed, IOBuffer is not writeable")) + # Non-seekable buffers can only be constructed with `PipeBuffer`, which is explicitly + # documented to not be truncatable. io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable")) n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n")) n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)")) - n = Int(n) + n = Int(n)::Int + offset = get_offset(io) + current_size = io.size - offset if io.reinit - io.data = _similar_data(io, n) + # If reinit, we don't need to truncate anything but just reinitializes + # the buffer with zeros. Mark, ptr and offset has already been reset. + io.data = fill!(_similar_data(io, n), 0x00) io.reinit = false - elseif n > length(io.data) + io.offset - _resize!(io, n) - end - ismarked(io) && io.mark > n && unmark(io) - n += io.offset - io.data[io.size+1:n] .= 0 - io.size = n - io.ptr = min(io.ptr, n+1) + io.size = n + elseif n < current_size + # Else, if we need to shrink the iobuffer, we simply change the pointers without + # actually shrinking the underlying storage, or copying data. + + # Clear the mark if it points to data that has now been deleted. + if translate_seek_position(io, io.mark) > n+offset + io.mark = -1 + end + io.size = n + offset + io.ptr = min(io.ptr, n + offset + 1) + elseif n > current_size + if n + offset > io.maxsize + compact!(io) + end + _resize!(io, n + get_offset(io), false) + fill!(view(io.data, io.size + 1:min(length(io.data), n + get_offset(io))), 0x00) + io.size = min(length(io.data), n + get_offset(io)) + end return io end -function compact(io::GenericIOBuffer) - io.writable || throw(ArgumentError("compact failed, IOBuffer is not writeable")) - io.seekable && throw(ArgumentError("compact failed, IOBuffer is seekable")) - io.reinit && return - local ptr::Int, bytes_to_move::Int - if ismarked(io) && io.mark < position(io) - io.mark == 0 && return - ptr = io.mark + io.offset - bytes_to_move = bytesavailable(io) + (io.ptr - ptr) - else - ptr = io.ptr - bytes_to_move = bytesavailable(io) +# Ensure that the buffer has room for at least `nshort` more bytes, except when +# doing that would exceed maxsize. +@inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort)) + +@inline function ensureroom(io::GenericIOBuffer, nshort::UInt) + # If the IO is not writable, we call the slow path only to error. + # If reinit, the data has been handed out to the user, and the IOBuffer + # no longer controls it, so we need to allocate a new one. + if !io.writable || io.reinit + return ensureroom_reallocate(io, nshort) + end + # The fast path here usually checks there is already room, then does nothing. + # When append is true, new data is added after io.size, not io.ptr + existing_space = min(lastindex(io.data), io.maxsize + get_offset(io)) - (io.append ? io.size : io.ptr - 1) + if existing_space < nshort % Int + # Outline this function to make it more likely that ensureroom inlines itself + return ensureroom_slowpath(io, nshort, existing_space) end - copyto!(io.data, 1, io.data, ptr, bytes_to_move) - io.size -= ptr - 1 - io.ptr -= ptr - 1 - io.offset = 0 - return + return io end -@noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt) +# Throw error (placed in this function to outline it) or reinit the buffer +@noinline function ensureroom_reallocate(io::GenericIOBuffer, nshort::UInt) io.writable || throw(ArgumentError("ensureroom failed, IOBuffer is not writeable")) - if io.reinit - io.data = _similar_data(io, nshort % Int) - io.reinit = false - end - if !io.seekable - if !ismarked(io) && io.ptr > io.offset+1 && io.size <= io.ptr - 1 - io.ptr = 1 - io.size = 0 - io.offset = 0 - else - datastart = (ismarked(io) ? io.mark : io.ptr - io.offset) - if (io.size-io.offset+nshort > io.maxsize) || - (datastart > 4096 && datastart > io.size - io.ptr) || - (datastart > 262144) - # apply somewhat arbitrary heuristics to decide when to destroy - # old, read data to make more room for new data - compact(io) - end + io.data = _similar_data(io, min(io.maxsize, nshort % Int)) + io.reinit = false + io.offset_or_compacted = -get_compacted(io) + return io +end + +# Here, we already know there is not enough room at the end of the io's data. +@noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt, available_bytes::Int) + reclaimable_bytes = first(get_used_span(io)) - 1 + # Avoid resizing and instead compact the buffer, only if we gain enough bytes from + # doing so (at least 32 bytes and 1/8th of the data length). Also, if we would have + # to resize anyway, there would be no point in compacting, so also check that. + if ( + reclaimable_bytes ≥ 32 && + reclaimable_bytes ≥ length(io.data) >>> 3 && + (reclaimable_bytes + available_bytes) % UInt ≥ nshort + ) + compact!(io) + return io + end + + desired_size = length(io.data) + Int(nshort) - available_bytes + if desired_size > io.maxsize + # If we can't fit all the requested data in the new buffer, we need to + # fit as much as possible, so we must compact + if !iszero(reclaimable_bytes) + desired_size -= compact!(io) + end + # Max out the buffer size if we want more than the buffer size + if length(io.data) < io.maxsize + _resize!(io, io.maxsize, true) end + else + # Else, we request only the requested size, but set `exact` to `false`, + # in order to overallocate to avoid growing the buffer by too little + _resize!(io, desired_size, false) end - return + + return io end -@inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort)) -@inline function ensureroom(io::GenericIOBuffer, nshort::UInt) - if !io.writable || (!io.seekable && io.ptr > io.offset+1) || io.reinit - ensureroom_slowpath(io, nshort) - end - n = min((nshort % Int) + (io.append ? io.size : io.ptr-1) - io.offset, io.maxsize) - l = length(io.data) + io.offset - if n > l - _resize!(io, Int(n)) +# Get the indices in data which cannot be deleted +function get_used_span(io::IOBuffer) + # A seekable buffer can recover data before ptr + return if io.seekable + get_offset(io) + 1 : io.size + # If non-seekable, the mark can be used to recover data before ptr, + # so data at the mark and after must also be saved + elseif io.mark > -1 + min(io.ptr, translate_seek_position(io, io.mark)) : io.size + else + io.ptr : io.size end - return io +end + +# Delete any offset, and also compact data if buffer is not seekable. +# Return the number of bytes deleted +function compact!(io::GenericIOBuffer)::Int + offset = get_offset(io) + used_span = get_used_span(io) + deleted = first(used_span) - 1 + compacted = deleted - offset + iszero(deleted) && return 0 + data = io.data + copyto!(data, 1, data, deleted + 1, length(used_span)) + io.offset_or_compacted = -get_compacted(io) - compacted + io.ptr -= deleted + io.size -= deleted + return deleted end eof(io::GenericIOBuffer) = (io.ptr - 1 >= io.size) @@ -439,17 +694,17 @@ function closewrite(io::GenericIOBuffer) end @noinline function close(io::GenericIOBuffer{T}) where T + if io.writable && !io.reinit + _resize!(io, 0, true) + end io.readable = false io.writable = false io.seekable = false io.size = 0 - io.offset = 0 io.maxsize = 0 io.ptr = 1 io.mark = -1 - if io.writable && !io.reinit - io.data = _resize!(io, 0) - end + io.offset_or_compacted = -get_compacted(io) nothing end @@ -472,31 +727,42 @@ julia> String(take!(io)) ``` """ function take!(io::GenericIOBuffer) - ismarked(io) && unmark(io) + io.mark = -1 if io.seekable - nbytes = io.size - io.offset - data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes) + # If the buffer is seekable, then the previously consumed bytes from ptr+1:size + # must still be output, as they are not truly gone. + # Hence, we output all bytes from 1:io.size + offset = get_offset(io) + nbytes = io.size - offset + data = copyto!(StringVector(nbytes), 1, io.data, offset + 1, nbytes) else + # Else, if not seekable, bytes from 1:ptr-1 are truly gone and should not + # be output. Hence, we output `bytesavailable`, which is ptr:size nbytes = bytesavailable(io) data = read!(io, StringVector(nbytes)) end if io.writable + io.reinit = true io.ptr = 1 io.size = 0 - io.offset = 0 + io.offset_or_compacted = 0 end return data end + +# This method is specialized because we know the underlying data is a Memory, so we can +# e.g. wrap directly in an array without copying. Otherwise the logic is the same as +# the generic method function take!(io::IOBuffer) - ismarked(io) && unmark(io) + io.mark = -1 if io.seekable nbytes = filesize(io) if nbytes == 0 || io.reinit data = StringVector(0) elseif io.writable - data = wrap(Array, memoryref(io.data, io.offset + 1), nbytes) + data = wrap(Array, memoryref(io.data, get_offset(io) + 1), nbytes) else - data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes) + data = copyto!(StringVector(nbytes), 1, io.data, get_offset(io) + 1, nbytes) end else nbytes = bytesavailable(io) @@ -512,7 +778,7 @@ function take!(io::IOBuffer) io.reinit = true io.ptr = 1 io.size = 0 - io.offset = 0 + io.offset_or_compacted = 0 end return data end @@ -529,46 +795,79 @@ state. This should only be used internally for performance-critical It might save an allocation compared to `take!` (if the compiler elides the Array allocation), as well as omits some checks. """ -_unsafe_take!(io::IOBuffer) = - wrap(Array, io.size == io.offset ? - memoryref(Memory{UInt8}()) : - memoryref(io.data, io.offset + 1), - io.size - io.offset) +function _unsafe_take!(io::IOBuffer) + offset = get_offset(io) + mem = if io.size == offset + memoryref(Memory{UInt8}()) + else + memoryref(io.data, offset + 1) + end + wrap(Array, mem, io.size - offset) +end function write(to::IO, from::GenericIOBuffer) - written::Int = bytesavailable(from) + # This would cause an infinite loop, as it should read until the end, but more + # data is being written into it continuously. if to === from - from.ptr = from.size + 1 + throw(ArgumentError("Writing all content fron an IOBuffer into itself in invalid")) else - written = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(written)) - from.ptr += written + available = bytesavailable(from) + written = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(available)) + from.ptr = from.size + 1 end return written end function unsafe_write(to::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt) ensureroom(to, nb) - ptr = (to.append ? to.size+1 : to.ptr) - written = Int(min(nb, Int(length(to.data))::Int - ptr + 1)) - towrite = written - d = to.data - while towrite > 0 - @inbounds d[ptr] = unsafe_load(p) - ptr += 1 + size = to.size + append = to.append + ptr = append ? size+1 : to.ptr + data = to.data + to_write = min(nb, (min(Int(length(data))::Int, to.maxsize + get_offset(to)) - ptr + 1) % UInt) % Int + # Dispatch based on the type of data, to possibly allow using memcpy + _unsafe_write(data, p, ptr, to_write % UInt) + # Update to.size only if the ptr has advanced to higher than + # the previous size. Otherwise, we just overwrote existing data + to.size = max(size, ptr + to_write - 1) + # If to.append, we only update size, not ptr. + if !append + to.ptr = ptr + to_write + end + return to_write +end + +@inline function _unsafe_write(data::AbstractVector{UInt8}, p::Ptr{UInt8}, from::Int, nb::UInt) + for i in 0:nb-1 + data[from + i] = unsafe_load(p) p += 1 - towrite -= 1 end - to.size = max(to.size, ptr - 1) - if !to.append - to.ptr += written +end + +@inline function _unsafe_write(data::MutableDenseArrayType{UInt8}, p::Ptr{UInt8}, from::Int, nb::UInt) + # Calling `unsafe_copyto!` is very efficient for large arrays, but has some overhead + # for small (< 5 bytes) arrays. + # Since a common use case of IOBuffer is to construct strings incrementally, often + # one char at a time, it's crucial to be fast in the case of small arrays. + # This optimization only gives a minor 10% speed boost in the best case. + if nb < 5 + @inbounds for i in UInt(1):nb + data[from + (i % Int) - 1] = unsafe_load(p, i) + end + else + GC.@preserve data begin + ptr = Ptr{UInt8}(pointer(data, from))::Ptr{UInt8} + @inline unsafe_copyto!(ptr, p, nb) + end end - return written end @inline function write(to::GenericIOBuffer, a::UInt8) ensureroom(to, UInt(1)) ptr = (to.append ? to.size+1 : to.ptr) - if ptr > to.maxsize + # We have just ensured there is room for 1 byte, EXCEPT if we were to exceed + # maxsize. So, we just need to check that here. + if ptr > to.maxsize + get_offset(to) return 0 else to.data[ptr] = a @@ -581,31 +880,26 @@ end end readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb)) + function readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb::Int) - nr = min(nb, bytesavailable(io)) - if length(b) < nr - resize!(b, nr) + io.readable || _throw_not_readable() + to_read = min(nb, bytesavailable(io)) + if length(b) < to_read + resize!(b, to_read) end - read_sub(io, b, 1, nr) - return nr + checkbounds(b, 1:to_read) + GC.@preserve b unsafe_read(io, pointer(b), to_read) + to_read end read(io::GenericIOBuffer) = read!(io, StringVector(bytesavailable(io))) + +# For IO buffers, all the data is immediately available. readavailable(io::GenericIOBuffer) = read(io) -read(io::GenericIOBuffer, nb::Integer) = read!(io, StringVector(min(nb, bytesavailable(io)))) -function occursin(delim::UInt8, buf::IOBuffer) - p = pointer(buf.data, buf.ptr) - q = GC.@preserve buf ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, delim, bytesavailable(buf)) - return q != C_NULL -end +read(io::GenericIOBuffer, nb::Integer) = read!(io, StringVector(min(nb, bytesavailable(io)))) function occursin(delim::UInt8, buf::GenericIOBuffer) - data = buf.data - for i = buf.ptr:buf.size - @inbounds b = data[i] - b == delim && return true - end - return false + return in(delim, view(buf.data, buf.ptr:buf.size)) end function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false) @@ -622,21 +916,45 @@ function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false) end function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false) - copyuntil(out, s, 0x0a, keep=true) - line = out.data - i = out.size # XXX: this is only correct for appended data. if the data was inserted, only ptr should change - if keep || i == out.offset || line[i] != 0x0a + # If the data is copied into the middle of the buffer of `out` instead of appended to the end, + # and !keep, and the line copied ends with \r\n, then the copyuntil (even if keep=false) + # will overwrite one too many bytes with the new \r byte. + # Work around this by making a new temporary buffer. + # Could perhaps be done better + if !out.append && out.ptr < out.size + 1 + newbuf = IOBuffer() + copyuntil(newbuf, s, 0x0a, keep=true) + v = take!(newbuf) + # Remove \r\n or \n if present + if !keep + if length(v) > 1 && last(v) == UInt8('\n') + pop!(v) + end + if length(v) > 1 && last(v) == UInt8('\r') + pop!(v) + end + end + write(out, v) return out - elseif i < 2 || line[i-1] != 0x0d - i -= 1 else - i -= 2 - end - out.size = i - if !out.append - out.ptr = i+1 + # Else, we can just copy the data directly into the buffer, and then + # subtract the last one or two bytes depending on `keep`. + copyuntil(out, s, 0x0a, keep=true) + line = out.data + i = out.size + if keep || i == out.offset_or_compacted || line[i] != 0x0a + return out + elseif i < 2 || line[i-1] != 0x0d + i -= 1 + else + i -= 2 + end + out.size = i + if !out.append + out.ptr = i+1 + end + return out end - return out end function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) @@ -644,6 +962,7 @@ function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) # note: findfirst + copyto! is much faster than a single loop # except for nout ≲ 20. A single loop is 2x faster for nout=5. nout = nread = something(findfirst(==(0x0a), data), length(data))::Int + # Remove the 0x0a (newline) if not keep, and also remove the 0x0d (\r) if it is there if !keep && nout > 0 && data[nout] == 0x0a nout -= 1 nout > 0 && data[nout] == 0x0d && (nout -= 1) @@ -652,6 +971,7 @@ function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) io.ptr += nread return out end + copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep) copyline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep) diff --git a/base/stream.jl b/base/stream.jl index 33d884018d5ad..5732a62c2153b 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -615,9 +615,9 @@ end ## BUFFER ## ## Allocate space in buffer (for immediate use) function alloc_request(buffer::IOBuffer, recommended_size::UInt) - ensureroom(buffer, Int(recommended_size)) + ensureroom(buffer, recommended_size) ptr = buffer.append ? buffer.size + 1 : buffer.ptr - nb = min(length(buffer.data)-buffer.offset, buffer.maxsize) + buffer.offset - ptr + 1 + nb = min(length(buffer.data), buffer.maxsize + get_offset(buffer)) - ptr + 1 return (Ptr{Cvoid}(pointer(buffer.data, ptr)), nb) end @@ -942,8 +942,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int) nread = readbytes!(sbuf, a, nb) else initsize = length(a) - newbuf = PipeBuffer(a, maxsize=nb) - newbuf.size = newbuf.offset # reset the write pointer to the beginning + newbuf = _truncated_pipebuffer(a; maxsize=nb) nread = try s.buffer = newbuf write(newbuf, sbuf) @@ -990,8 +989,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt) if bytesavailable(sbuf) >= nb unsafe_read(sbuf, p, nb) else - newbuf = PipeBuffer(unsafe_wrap(Array, p, nb), maxsize=Int(nb)) - newbuf.size = newbuf.offset # reset the write pointer to the beginning + newbuf = _truncated_pipebuffer(unsafe_wrap(Array, p, nb); maxsize=Int(nb)) try s.buffer = newbuf write(newbuf, sbuf) @@ -1599,8 +1597,7 @@ function readbytes!(s::BufferStream, a::Vector{UInt8}, nb::Int) nread = readbytes!(sbuf, a, nb) else initsize = length(a) - newbuf = PipeBuffer(a, maxsize=nb) - newbuf.size = newbuf.offset # reset the write pointer to the beginning + newbuf = _truncated_pipebuffer(a; maxsize=nb) nread = try s.buffer = newbuf write(newbuf, sbuf) diff --git a/test/iobuffer.jl b/test/iobuffer.jl index a9d58f4b7871e..7ed5c1f5b3ed6 100644 --- a/test/iobuffer.jl +++ b/test/iobuffer.jl @@ -6,6 +6,267 @@ ioslength(io::IOBuffer) = (io.seekable ? io.size : bytesavailable(io)) bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size) +# Julia Base's internals uses the PipeBuffer, which is an unseekable IOBuffer. +# There are no public constructors to build such a buffer, but we need to test +# it anyway. +# I make a new method here such that if the implementation of Base.PipeBuffer +# changes, these tests will still work. +new_unseekable_buffer() = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, typemax(Int), false) + +@testset "Basic tests" begin + @test_throws ArgumentError IOBuffer(;maxsize=-1) + @test_throws ArgumentError IOBuffer([0x01]; maxsize=-1) + + # Test that sizehint actually will sizehint the vector, + v = UInt8[] + buf = IOBuffer(v; sizehint=64, write=true) + @test length(v.ref.mem) >= 64 + + # Test that you can't make an IOBuffer with a maxsize + # smaller than the size you actually give it + @test_throws ArgumentError IOBuffer([0x01, 0x02]; maxsize=1) + @test_throws ArgumentError IOBuffer(b"abcdefghij"; maxsize=8) +end + +@testset "Basic reading" begin + # Readavailable is equal to read + buf = IOBuffer("abcdef") + @test read(buf, UInt8) == UInt8('a') + @test bytesavailable(buf) == 5 + @test readavailable(buf) == b"bcdef" + + # Reading less than all the bytes + buf = IOBuffer(b"ABCDEFGHIJ") + @test read(buf, 1) == b"A" + @test read(buf, 3) == b"BCD" + + # Reading more bytes than available will not error + @test read(buf, 100) == b"EFGHIJ" + + # Passing truncate=false will still truncate an IOBuffer with no + # initialized data + @test isempty(read(IOBuffer(;sizehint=34, truncate=false))) +end + +@testset "Byte occursin GenericIOBuffer" begin + buf = IOBuffer(@view(collect(0x1f:0x3d)[1:end])) + @test occursin(0x1f, buf) + @test occursin(0x3d, buf) + @test occursin(0x2a, buf) + + @test !occursin(0xff, buf) + @test !occursin(0x00, buf) + + v = Vector{UInt8}("bcdefg") + pushfirst!(v, UInt8('a')) + buf = IOBuffer(v) + @test occursin(UInt8('a'), buf) + read(buf, UInt8) + @test !occursin(UInt8('a'), buf) + @test !occursin(0x00, buf) + + buf = IOBuffer("abcdefg") + @test occursin(UInt8('a'), buf) +end + +@testset "Non-Memory backed IOBuffer" begin + buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true) + @test read(buf) == 0x02:0x0d + + buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true) + @test read(buf, UInt8) == 0x02 + @test read(buf) == 0x03:0x0d + + v = view(collect(UInt8('a'):UInt8('z')), 4:10) + buf = IOBuffer(v, read=true, write=true) + @test read(buf, UInt8) == UInt8('d') + @test read(buf) == UInt8('e'):UInt8('j') + seekstart(buf) + @test read(buf, UInt8) == UInt8('d') + write(buf, UInt8('x')) + write(buf, "ABC") + seekstart(buf) + @test read(buf) == b"dxABCij" +end + +@testset "Copying" begin + # Test offset is preserved when copying + v = UInt8[] + pushfirst!(v, UInt8('a'), UInt8('b'), UInt8('c')) + buf = IOBuffer(v; write=true, read=true, append=true) + write(buf, "def") + read(buf, UInt16) + buf2 = copy(buf) + @test String(read(buf)) == "cdef" + @test String(read(buf2)) == "cdef" + + # Test copying with non-Memory backed GenericIOBuffer + buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true) + @test read(buf, UInt16) == 0x0302 + buf2 = copy(buf) + @test isreadable(buf2) + @test !iswritable(buf2) + @test read(buf2) == 0x04:0x0d + + # Test copying a non-seekable stream + buf = new_unseekable_buffer() + write(buf, "abcdef") + read(buf, UInt16) + mark(buf) + read(buf, UInt16) + buf2 = copy(buf) + @test read(buf2) == b"ef" + reset(buf2) + @test read(buf2) == b"cdef" + + # Test copying seekable stream + buf = IOBuffer() + write(buf, "abcdef") + seekstart(buf) + read(buf) + mark(buf) + buf2 = copy(buf) + @test reset(buf2) == 6 + seekstart(buf2) + @test read(buf2) == b"abcdef" + + # Test copying a taken buffer + buf = IOBuffer() + write(buf, "abcdef") + take!(buf) + buf2 = copy(buf) + @test eof(buf2) + seekstart(buf2) + @test eof(buf2) +end + +@testset "copyuntil" begin + a = IOBuffer(b"abcdeajdgabdfg") + b = IOBuffer(collect(b"xx"); write=true, read=true, append=true) + copyuntil(b, a, UInt8('a')) + @test read(b) == b"xx" + seekstart(b) + copyuntil(b, a, UInt8('a'); keep=true) + @test read(b) == b"xxbcdea" + seekstart(b) + copyuntil(b, a, UInt('w')) + @test read(b) == b"xxbcdeajdgabdfg" +end + +@testset "copyline" begin + a = IOBuffer(b"abcde\nabc\r\nabc\n\r\nac") + b = IOBuffer() + copyline(b, a) + @test take!(copy(b)) == b"abcde" + copyline(b, a) + @test take!(copy(b)) == b"abcdeabc" + copyline(b, a; keep=true) + @test take!(copy(b)) == b"abcdeabcabc\n" + copyline(b, a; keep=false) + @test take!(copy(b)) == b"abcdeabcabc\n" + copyline(b, a; keep=false) + @test take!(copy(b)) == b"abcdeabcabc\nac" + + # Test a current bug in copyline + a = Base.SecretBuffer("abcde\r\n") + b = IOBuffer() + write(b, "xxxxxxxxxx") + seek(b, 2) + copyline(b, a; keep=false) + Base.shred!(a) + @test take!(b) == b"xxabcdexxx" +end + +@testset "take!" begin + a = IOBuffer("abc") + @test take!(a) == b"abc" + + v = UInt8[] + pushfirst!(v, 0x0a) + buf = IOBuffer(v; write=true, append=true) + write(buf, "def") + @test take!(buf) == b"\ndef" + + v = view(collect(b"abcdefghij"), 3:9) + buf = IOBuffer(v; write=true, read=true) + read(buf, UInt8) + write(buf, "xxy") + @test take!(buf) == b"cxxyghi" + + v = view(collect(b"abcdefghij"), 3:9) + buf = IOBuffer(v; write=true, read=true) + + # Take on unseekable buffer does not return used bytes. + buf = new_unseekable_buffer() + write(buf, 0x61) + write(buf, "bcd") + @test read(buf, UInt8) == 0x61 + @test take!(buf) == b"bcd" + + # Compaction is reset after take! + buf = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, 100, false) + write(buf, rand(UInt8, 50)) + read(buf, 40) + write(buf, rand(UInt8, 100)) + mark(buf) + read(buf, 70) + @test position(buf) == 110 + @test length(buf.data) <= 100 + v = take!(buf) + write(buf, 0xf1) + @test position(buf) == 0 + @test !ismarked(buf) +end + +@testset "maxsize is preserved" begin + # After take! + buf = IOBuffer(; maxsize=3) + print(buf, "abcdef") + @test take!(buf) == b"abc" + print(buf, "abcdef") + @test take!(buf) == b"abc" + + # After resizing + buf = IOBuffer(;maxsize=128) + write(buf, collect(0x00:0x10)) + write(buf, collect(0x11:0x30)) + write(buf, collect(0x31:0x98)) + write(buf, collect(0x99:0xff)) + seekstart(buf) + @test read(buf) == 0x00:UInt8(127) + + # Edge case: When passing a Vector, does not error if the + # underlying mem is larger than maxsize + v = pushfirst!([0x01], 0x02) + io = IOBuffer(v; maxsize=2) + @test read(io) == b"\x02\x01" + + # Buffer will not write past maxsize, even if given a larger buffer + # And also even if the data is taken and replaced + v = sizehint!(UInt8[], 128) + io = IOBuffer(v; write=true, read=true, maxsize=12) + write(io, 0x01:0x0f) + seekstart(io) + @test read(io) == 0x01:0x0c + @test write(io, 0x01) == 0 + @test write(io, "abc") == 0 + @test take!(io).ref.mem === v.ref.mem + write(io, 0x01:0x0f) + @test take!(io) == 0x01:0x0c +end + +@testset "Write to self" begin + buffer = IOBuffer() + @test_throws ArgumentError write(buffer, buffer) + + # Write to another IOBuffer with limited size + to = IOBuffer(;maxsize=4) + from = IOBuffer(collect(b"abcdefghi")) + write(to, from) + @test String(take!(to)) == "abcd" + @test eof(from) +end + @testset "Read/write empty IOBuffer" begin io = IOBuffer() @test eof(io) @@ -33,7 +294,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size) @test position(io) == 0 truncate(io, 10) @test position(io) == 0 - @test all(io.data .== 0) + @test all(view(io.data, 1:10) .== 0) @test write(io, Int16[1, 2, 3, 4, 5, 6]) === 12 seek(io, 2) truncate(io, 10) @@ -67,22 +328,89 @@ end @test_throws ArgumentError write(io,UInt8[0]) @test String(take!(io)) == "hamster\nguinea pig\nturtle" @test String(take!(io)) == "hamster\nguinea pig\nturtle" #should be unchanged - @test_throws ArgumentError Base.compact(io) # not writeable close(io) end +@testset "Truncate" begin + # Fails for non-writable and non-seekable + @test_throws ArgumentError truncate(PipeBuffer(), 0) + @test_throws ArgumentError truncate(IOBuffer(b"abcde"), 3) + + # Standard use + buf = IOBuffer(collect(b"abcdef"); write=true, read=true) + truncate(buf, 4) + @test read(buf) == b"abcd" + @test take!(buf) == b"abcd" + + # Mark is removed if beyond the size + buf = IOBuffer() + write(buf, "abcde") + seek(buf, 4) + mark(buf) + truncate(buf, 4) + @test !ismarked(buf) + + # Making it larger + buf = IOBuffer(collect(b"abcdef"); write=true, read=true) + seek(buf, 3) + truncate(buf, 3) + write(buf, 'X') + mark(buf) + truncate(buf, 5) + @test ismarked(buf) + @test reset(buf) == 4 + @test take!(buf) == b"abcX\0" + + # With offset + v = pushfirst!(UInt8[0x62, 0x63, 0x64], 0x61) + buf = IOBuffer(v; write=true, read=true) + seekstart(buf) + read(buf, UInt8) + mark(buf) + truncate(buf, 7) + @test reset(buf) == 1 + @test take!(buf) == b"abcd\0\0\0" +end + +@testset "Position of compactable buffer" begin + # Set maxsize, because otherwise compaction it too hard to reason about, + # and this test will be brittle + io = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, 100, false) + write(io, "abcd") + read(io, UInt16) + @test position(io) == 2 + write(io, "abcde"^80) + @test position(io) == 2 + read(io, 60) + @test position(io) == 62 + mark(io) + # Trigger compaction + write(io, rand(UInt8, 50)) + @test position(io) == 62 + v1 = read(io, 20) + @test position(io) == 82 + @test reset(io) == 62 + @test position(io) == 62 + v2 = read(io, 20) + @test v1 == v2 +end + @testset "PipeBuffer" begin - io = PipeBuffer() + io = new_unseekable_buffer() @test_throws EOFError read(io,UInt8) @test write(io,"pancakes\nwaffles\nblueberries\n") > 0 + + # PipeBuffer is append, so writing to it does not advance the position @test position(io) == 0 @test readline(io) == "pancakes" - Base.compact(io) @test readline(io) == "waffles" @test write(io,"whipped cream\n") > 0 @test readline(io) == "blueberries" + + # Pipebuffers do not support seeking, and therefore do not support truncation. @test_throws ArgumentError seek(io,0) @test_throws ArgumentError truncate(io,0) + @test readline(io) == "whipped cream" @test write(io,"pancakes\nwaffles\nblueberries\n") > 0 @test readlines(io) == String["pancakes", "waffles", "blueberries"] @@ -116,58 +444,6 @@ end end rm(fname) end - - Base.compact(io) - @test position(io) == 0 - @test ioslength(io) == 0 - Base._resize!(io,0) - Base.ensureroom(io,50) - @test position(io) == 0 - @test ioslength(io) == 0 - @test length(io.data) == 50 - Base.ensureroom(io,10) - @test ioslength(io) == 0 - @test length(io.data) == 50 - io.maxsize = 75 - Base.ensureroom(io,100) - @test ioslength(io) == 0 - @test length(io.data) == 75 - seekend(io) - @test ioslength(io) == 0 - @test position(io) == 0 - write(io,zeros(UInt8,200)) - @test ioslength(io) == 75 - @test length(io.data) == 75 - write(io,1) - @test ioslength(io) == 75 - @test length(io.data) == 75 - write(io,[1,2,3]) - @test ioslength(io) == 75 - @test length(io.data) == 75 - skip(io,1) - @test write(io,UInt8(104)) === 1 - skip(io,3) - @test write(io,b"apples") === 3 - skip(io,71) - @test write(io,'y') === 1 - @test read(io, String) == "happy" - @test eof(io) - write(io,zeros(UInt8,73)) - write(io,'a') - write(io,'b') - write(io,'c') - write(io,'d') - write(io,'e') - @test ioslength(io) == 75 - @test length(io.data) == 75 - @test position(io) == 0 - skip(io,72) - @test String(take!(io)) == "\0ab" - @test String(take!(io)) == "" - - # issues 4021 - print(io, true) - close(io) end @testset "issue 5453" begin @@ -248,9 +524,6 @@ end truncate(io2, io2.size - 2) @test read(io2, String) == "goodnightmoonhelloworld" seek(io2, 0) - write(io2, io2) - @test read(io2, String) == "" - @test bufcontents(io2) == "goodnightmoonhelloworld" end # issue #11917 @@ -347,24 +620,42 @@ end @test n == 5 end -@testset "Base.compact" begin - a = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int)) - mark(a) # mark at position 0 - write(a, "Hello!") - @test Base.compact(a) === nothing # because pointer > mark - close(a) - b = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int)) - write(b, "Hello!") - read(b) - mark(b) # mark at position 6 - write(b, "Goodbye!") # now pointer is > mark but mark is > 0 - Base.compact(b) - @test readline(b) == "Goodbye!" - close(b) +@testset "Compacting" begin + # Compacting works + buf = Base.GenericIOBuffer(UInt8[], true, true, false, true, 20, false) + mark(buf) + write(buf, "Hello"^5) + reset(buf) + unmark(buf) + read(buf, UInt8) + read(buf, UInt8) + write(buf, "a!") + @test length(buf.data) == 20 + @test String(take!(buf)) == "llo" * "Hello"^3 * "a!" + + # Compacting does not do anything when mark == 0 + buf = Base.GenericIOBuffer(UInt8[], true, true, false, true, 5, false) + mark(buf) + write(buf, "Hello") + reset(buf) + mark(buf) + read(buf, UInt8) + read(buf, UInt8) + @test write(buf, "a!") == 0 + @test take!(buf) == b"llo" + + # Compacting without maxsize still works + buf = new_unseekable_buffer() + data = repeat(b"abcdefg", 100) + write(buf, data) + read(buf, 600) + data_len = length(buf.data) + write(buf, view(data, 1:500)) + @test length(buf.data) == data_len end @testset "peek(::GenericIOBuffer)" begin - io = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int)) + io = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int), false) write(io, "こんにちは") @test peek(io) == 0xe3 @test peek(io, Char) == 'こ' @@ -381,13 +672,22 @@ end v = @view a[1:2] io = IOBuffer() write(io,1) + write(io,0) seek(io,0) - @test Base.read_sub(io,v,1,1) == [1,0] + @test read!(io, v) == [1, 0] end @testset "with offset" begin b = pushfirst!([0x02], 0x01) @test take!(IOBuffer(b)) == [0x01, 0x02] + + # Read-only buffer does not take control of underlying buffer + v = pushfirst!([0x62, 0x63], 0x61) + buf = IOBuffer(v; write=false) + @test read(buf) == b"abc" + @test v == b"abc" # v is unchanged + + # Truncate end @testset "#54636 reading from non-dense vectors" begin From efc1ba908b5bfd914e61aaa1740b5ae6aa08bac0 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Sat, 29 Mar 2025 12:36:42 -0400 Subject: [PATCH 09/20] Move `eachregion(::AnnotatedString)` implementation to `Base` (#57912) Excising part of https://github.com/JuliaLang/julia/pull/56194/ on the way to reviving that PR. (cherry picked from commit a5e0eabfdbecc40ff9c3d4ba86a4bc76036f352e) --- base/strings/annotated.jl | 258 +++++++++++------------------------ base/strings/annotated_io.jl | 201 +++++++++++++++++++++++++++ base/strings/strings.jl | 1 + test/strings/annotated.jl | 48 +++++++ 4 files changed, 333 insertions(+), 175 deletions(-) create mode 100644 base/strings/annotated_io.jl diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index 0dcac0bf2de3b..1fbbdc1dc44e9 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -460,201 +460,109 @@ function annotated_chartransform(f::Function, str::AnnotatedString, state=nothin AnnotatedString(String(take!(outstr)), annots) end -## AnnotatedIOBuffer - -struct AnnotatedIOBuffer <: AbstractPipe - io::IOBuffer - annotations::Vector{RegionAnnotation} -end - -AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{RegionAnnotation}()) -AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer()) - -function show(io::IO, aio::AnnotatedIOBuffer) - show(io, AnnotatedIOBuffer) - size = filesize(aio.io) - print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ", - length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")") +struct RegionIterator{S <: AbstractString} + str::S + regions::Vector{UnitRange{Int}} + annotations::Vector{Vector{Annotation}} end -pipe_reader(io::AnnotatedIOBuffer) = io.io -pipe_writer(io::AnnotatedIOBuffer) = io.io - -# Useful `IOBuffer` methods that we don't get from `AbstractPipe` -position(io::AnnotatedIOBuffer) = position(io.io) -seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io) -seekend(io::AnnotatedIOBuffer) = (seekend(io.io); io) -skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io) -copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations)) - -annotations(io::AnnotatedIOBuffer) = io.annotations - -annotate!(io::AnnotatedIOBuffer, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) = - (_annotate!(io.annotations, range, label, val); io) - -function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}}) - astr = AnnotatedString(astr) - offset = position(io.io) - eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr)) - _insert_annotations!(io, astr.annotations) - write(io.io, String(astr)) -end +Base.length(si::RegionIterator) = length(si.regions) -write(io::AnnotatedIOBuffer, c::AnnotatedChar) = - write(io, AnnotatedString(string(c), [(region=1:ncodeunits(c), a...) for a in c.annotations])) -write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x) -write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s) -write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b) - -function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer) - destpos = position(dest) - isappending = eof(dest) - srcpos = position(src) - nb = write(dest.io, src.io) - isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb) - srcannots = [setindex(annot, max(1 + srcpos, first(annot.region)):last(annot.region), :region) - for annot in src.annotations if first(annot.region) >= srcpos] - _insert_annotations!(dest, srcannots, destpos - srcpos) - nb +Base.@propagate_inbounds function Base.iterate(si::RegionIterator, i::Integer=1) + if i <= length(si.regions) + @inbounds ((SubString(si.str, si.regions[i]), si.annotations[i]), i+1) + end end -# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers) -# work as expected. -function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}}) - if pipe_writer(io) isa AnnotatedIOBuffer - write(pipe_writer(io), s) - else - invoke(write, Tuple{IO, typeof(s)}, io, s) - end::Int -end -# Can't be part of the `Union` above because it introduces method ambiguities -function write(io::AbstractPipe, c::AnnotatedChar) - if pipe_writer(io) isa AnnotatedIOBuffer - write(pipe_writer(io), c) - else - invoke(write, Tuple{IO, typeof(c)}, io, c) - end::Int -end +Base.eltype(::RegionIterator{S}) where { S <: AbstractString} = + Tuple{SubString{S}, Vector{Annotation}} """ - _clear_annotations_in_region!(annotations::Vector{$RegionAnnotation}, span::UnitRange{Int}) + eachregion(s::AnnotatedString{S}) + eachregion(s::SubString{AnnotatedString{S}}) -Erase the presence of `annotations` within a certain `span`. +Identify the contiguous substrings of `s` with a constant annotations, and return +an iterator which provides each substring and the applicable annotations as a +`Tuple{SubString{S}, Vector{$Annotation}}`. -This operates by removing all elements of `annotations` that are entirely -contained in `span`, truncating ranges that partially overlap, and splitting -annotations that subsume `span` to just exist either side of `span`. +# Examples + +```jldoctest; setup=:(using Base: AnnotatedString, eachregion) +julia> collect(eachregion(AnnotatedString( + "hey there", [(1:3, :face, :bold), + (5:9, :face, :italic)]))) +3-element Vector{Tuple{SubString{String}, Vector{$Annotation}}}: + ("hey", [$Annotation((:face, :bold))]) + (" ", []) + ("there", [$Annotation((:face, :italic))]) +``` """ -function _clear_annotations_in_region!(annotations::Vector{RegionAnnotation}, span::UnitRange{Int}) - # Clear out any overlapping pre-existing annotations. - filter!(ann -> first(ann.region) < first(span) || last(ann.region) > last(span), annotations) - extras = Tuple{Int, RegionAnnotation}[] - for i in eachindex(annotations) - annot = annotations[i] - region = annot.region - # Test for partial overlap - if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region) - annotations[i] = - setindex(annot, - if first(region) < first(span) - first(region):first(span)-1 - else - last(span)+1:last(region) - end, - :region) - # If `span` fits exactly within `region`, then we've only copied over - # the beginning overhang, but also need to conserve the end overhang. - if first(region) < first(span) && last(span) < last(region) - push!(extras, (i, setindex(annot, last(span)+1:last(region), :region))) - end +function eachregion(s::AnnotatedString, subregion::UnitRange{Int}=firstindex(s):lastindex(s)) + isempty(s) || isempty(subregion) && + return RegionIterator(s.string, UnitRange{Int}[], Vector{Annotation}[]) + events = annotation_events(s, subregion) + isempty(events) && return RegionIterator(s.string, [subregion], [Annotation[]]) + annotvals = Annotation[ + (; label, value) for (; label, value) in annotations(s)] + regions = Vector{UnitRange{Int}}() + annots = Vector{Vector{Annotation}}() + pos = first(events).pos + if pos > first(subregion) + push!(regions, thisind(s, first(subregion)):prevind(s, pos)) + push!(annots, []) + end + activelist = Int[] + for event in events + if event.pos != pos + push!(regions, pos:prevind(s, event.pos)) + push!(annots, annotvals[activelist]) + pos = event.pos + end + if event.active + insert!(activelist, searchsortedfirst(activelist, event.index), event.index) + else + deleteat!(activelist, searchsortedfirst(activelist, event.index)) end end - # Insert any extra entries in the appropriate position - for (offset, (i, entry)) in enumerate(extras) - insert!(annotations, i + offset, entry) + if last(events).pos < nextind(s, last(subregion)) + push!(regions, last(events).pos:thisind(s, last(subregion))) + push!(annots, []) end - annotations + RegionIterator(s.string, regions, annots) end -""" - _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{$RegionAnnotation}, offset::Int = position(io)) +function eachregion(s::SubString{<:AnnotatedString}, pos::UnitRange{Int}=firstindex(s):lastindex(s)) + if isempty(s) + RegionIterator(s.string, Vector{UnitRange{Int}}(), Vector{Vector{Annotation}}()) + else + eachregion(s.string, first(pos)+s.offset:last(pos)+s.offset) + end +end -Register new `annotations` in `io`, applying an `offset` to their regions. +""" + annotation_events(string::AbstractString, annots::Vector{$RegionAnnotation}, subregion::UnitRange{Int}) + annotation_events(string::AnnotatedString, subregion::UnitRange{Int}) -The largely consists of simply shifting the regions of `annotations` by `offset` -and pushing them onto `io`'s annotations. However, when it is possible to merge -the new annotations with recent annotations in accordance with the semantics -outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there -is a run of the most recent annotations that are also present as the first -`annotations`, with the same value and adjacent regions, the new annotations are -merged into the existing recent annotations by simply extending their range. +Find all annotation "change events" that occur within a `subregion` of `annots`, +with respect to `string`. When `string` is styled, `annots` is inferred. -This is implemented so that one can say write an `AnnotatedString` to an -`AnnotatedIOBuffer` one character at a time without needlessly producing a -new annotation for each character. +Each change event is given in the form of a `@NamedTuple{pos::Int, active::Bool, +index::Int}` where `pos` is the position of the event, `active` is a boolean +indicating whether the annotation is being activated or deactivated, and `index` +is the index of the annotation in question. """ -function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionAnnotation}, offset::Int = position(io)) - run = 0 - if !isempty(io.annotations) && last(last(io.annotations).region) == offset - for i in reverse(axes(annotations, 1)) - annot = annotations[i] - first(annot.region) == 1 || continue - i <= length(io.annotations) || continue - if annot.label == last(io.annotations).label && annot.value == last(io.annotations).value - valid_run = true - for runlen in 1:i - new = annotations[begin+runlen-1] - old = io.annotations[end-i+runlen] - if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value - valid_run = false - break - end - end - if valid_run - run = i - break - end - end +function annotation_events(s::AbstractString, annots::Vector{RegionAnnotation}, subregion::UnitRange{Int}) + events = Vector{NamedTuple{(:pos, :active, :index), Tuple{Int, Bool, Int}}}() # Position, Active?, Annotation index + for (i, (; region)) in enumerate(annots) + if !isempty(intersect(subregion, region)) + start, stop = max(first(subregion), first(region)), min(last(subregion), last(region)) + start <= stop || continue # Currently can't handle empty regions + push!(events, (pos=thisind(s, start), active=true, index=i)) + push!(events, (pos=nextind(s, stop), active=false, index=i)) end end - for runindex in 0:run-1 - old_index = lastindex(io.annotations) - run + 1 + runindex - old = io.annotations[old_index] - new = annotations[begin+runindex] - io.annotations[old_index] = setindex(old, first(old.region):last(new.region)+offset, :region) - end - for index in run+1:lastindex(annotations) - annot = annotations[index] - start, stop = first(annot.region), last(annot.region) - push!(io.annotations, setindex(annotations[index], start+offset:stop+offset, :region)) - end + sort(events, by=e -> e.pos) end -function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString} - if (start = position(io)) == 0 - AnnotatedString(read(io.io, T), copy(io.annotations)) - else - annots = [setindex(annot, UnitRange{Int}(max(1, first(annot.region) - start), last(annot.region)-start), :region) - for annot in io.annotations if last(annot.region) > start] - AnnotatedString(read(io.io, T), annots) - end -end -read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String}) -read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String}) - -function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar} - pos = position(io) - char = read(io.io, T) - annots = [NamedTuple{(:label, :value)}(annot) for annot in io.annotations if pos+1 in annot.region] - AnnotatedChar(char, annots) -end -read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char}) -read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char}) - -function truncate(io::AnnotatedIOBuffer, size::Integer) - truncate(io.io, size) - filter!(ann -> first(ann.region) <= size, io.annotations) - map!(ann -> setindex(ann, first(ann.region):min(size, last(ann.region)), :region), - io.annotations, io.annotations) - io -end +annotation_events(s::AnnotatedString, subregion::UnitRange{Int}) = + annotation_events(s.string, annotations(s), subregion) diff --git a/base/strings/annotated_io.jl b/base/strings/annotated_io.jl new file mode 100644 index 0000000000000..87db57b8030c9 --- /dev/null +++ b/base/strings/annotated_io.jl @@ -0,0 +1,201 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +## AnnotatedIOBuffer + +struct AnnotatedIOBuffer <: AbstractPipe + io::IOBuffer + annotations::Vector{RegionAnnotation} +end + +AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{RegionAnnotation}()) +AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer()) + +function show(io::IO, aio::AnnotatedIOBuffer) + show(io, AnnotatedIOBuffer) + size = filesize(aio.io) + print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ", + length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")") +end + +pipe_reader(io::AnnotatedIOBuffer) = io.io +pipe_writer(io::AnnotatedIOBuffer) = io.io + +# Useful `IOBuffer` methods that we don't get from `AbstractPipe` +position(io::AnnotatedIOBuffer) = position(io.io) +seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io) +seekend(io::AnnotatedIOBuffer) = (seekend(io.io); io) +skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io) +copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations)) + +annotations(io::AnnotatedIOBuffer) = io.annotations + +annotate!(io::AnnotatedIOBuffer, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) = + (_annotate!(io.annotations, range, label, val); io) + +function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}}) + astr = AnnotatedString(astr) + offset = position(io.io) + eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr)) + _insert_annotations!(io, astr.annotations) + write(io.io, String(astr)) +end + +write(io::AnnotatedIOBuffer, c::AnnotatedChar) = + write(io, AnnotatedString(string(c), [(region=1:ncodeunits(c), a...) for a in c.annotations])) +write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x) +write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s) +write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b) + +function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer) + destpos = position(dest) + isappending = eof(dest) + srcpos = position(src) + nb = write(dest.io, src.io) + isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb) + srcannots = [setindex(annot, max(1 + srcpos, first(annot.region)):last(annot.region), :region) + for annot in src.annotations if first(annot.region) >= srcpos] + _insert_annotations!(dest, srcannots, destpos - srcpos) + nb +end + +# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers) +# work as expected. +function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}}) + if pipe_writer(io) isa AnnotatedIOBuffer + write(pipe_writer(io), s) + else + invoke(write, Tuple{IO, typeof(s)}, io, s) + end::Int +end + +# Can't be part of the `Union` above because it introduces method ambiguities +function write(io::AbstractPipe, c::AnnotatedChar) + if pipe_writer(io) isa AnnotatedIOBuffer + write(pipe_writer(io), c) + else + invoke(write, Tuple{IO, typeof(c)}, io, c) + end::Int +end + +function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString} + if (start = position(io)) == 0 + AnnotatedString(read(io.io, T), copy(io.annotations)) + else + annots = [setindex(annot, UnitRange{Int}(max(1, first(annot.region) - start), last(annot.region)-start), :region) + for annot in io.annotations if last(annot.region) > start] + AnnotatedString(read(io.io, T), annots) + end +end +read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String}) +read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String}) + +function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar} + pos = position(io) + char = read(io.io, T) + annots = [NamedTuple{(:label, :value)}(annot) for annot in io.annotations if pos+1 in annot.region] + AnnotatedChar(char, annots) +end +read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char}) +read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char}) + +function truncate(io::AnnotatedIOBuffer, size::Integer) + truncate(io.io, size) + filter!(ann -> first(ann.region) <= size, io.annotations) + map!(ann -> setindex(ann, first(ann.region):min(size, last(ann.region)), :region), + io.annotations, io.annotations) + io +end + +""" + _clear_annotations_in_region!(annotations::Vector{$RegionAnnotation}, span::UnitRange{Int}) + +Erase the presence of `annotations` within a certain `span`. + +This operates by removing all elements of `annotations` that are entirely +contained in `span`, truncating ranges that partially overlap, and splitting +annotations that subsume `span` to just exist either side of `span`. +""" +function _clear_annotations_in_region!(annotations::Vector{RegionAnnotation}, span::UnitRange{Int}) + # Clear out any overlapping pre-existing annotations. + filter!(ann -> first(ann.region) < first(span) || last(ann.region) > last(span), annotations) + extras = Tuple{Int, RegionAnnotation}[] + for i in eachindex(annotations) + annot = annotations[i] + region = annot.region + # Test for partial overlap + if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region) + annotations[i] = + setindex(annot, + if first(region) < first(span) + first(region):first(span)-1 + else + last(span)+1:last(region) + end, + :region) + # If `span` fits exactly within `region`, then we've only copied over + # the beginning overhang, but also need to conserve the end overhang. + if first(region) < first(span) && last(span) < last(region) + push!(extras, (i, setindex(annot, last(span)+1:last(region), :region))) + end + end + end + # Insert any extra entries in the appropriate position + for (offset, (i, entry)) in enumerate(extras) + insert!(annotations, i + offset, entry) + end + annotations +end + +""" + _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{$RegionAnnotation}, offset::Int = position(io)) + +Register new `annotations` in `io`, applying an `offset` to their regions. + +The largely consists of simply shifting the regions of `annotations` by `offset` +and pushing them onto `io`'s annotations. However, when it is possible to merge +the new annotations with recent annotations in accordance with the semantics +outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there +is a run of the most recent annotations that are also present as the first +`annotations`, with the same value and adjacent regions, the new annotations are +merged into the existing recent annotations by simply extending their range. + +This is implemented so that one can say write an `AnnotatedString` to an +`AnnotatedIOBuffer` one character at a time without needlessly producing a +new annotation for each character. +""" +function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionAnnotation}, offset::Int = position(io)) + run = 0 + if !isempty(io.annotations) && last(last(io.annotations).region) == offset + for i in reverse(axes(annotations, 1)) + annot = annotations[i] + first(annot.region) == 1 || continue + i <= length(io.annotations) || continue + if annot.label == last(io.annotations).label && annot.value == last(io.annotations).value + valid_run = true + for runlen in 1:i + new = annotations[begin+runlen-1] + old = io.annotations[end-i+runlen] + if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value + valid_run = false + break + end + end + if valid_run + run = i + break + end + end + end + end + for runindex in 0:run-1 + old_index = lastindex(io.annotations) - run + 1 + runindex + old = io.annotations[old_index] + new = annotations[begin+runindex] + io.annotations[old_index] = setindex(old, first(old.region):last(new.region)+offset, :region) + end + for index in run+1:lastindex(annotations) + annot = annotations[index] + start, stop = first(annot.region), last(annot.region) + push!(io.annotations, setindex(annotations[index], start+offset:stop+offset, :region)) + end +end diff --git a/base/strings/strings.jl b/base/strings/strings.jl index 8dae311f475b4..32975b6ea3fc7 100644 --- a/base/strings/strings.jl +++ b/base/strings/strings.jl @@ -11,3 +11,4 @@ import .Iterators: PartitionIterator include("strings/util.jl") include("strings/io.jl") +include("strings/annotated_io.jl") diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl index 8658c1b52a2ab..7f53740b9eec1 100644 --- a/test/strings/annotated.jl +++ b/test/strings/annotated.jl @@ -258,3 +258,51 @@ end write(aio, Base.AnnotatedString("hello", [(1:5, :tag, 1)])) @test sprint(show, aio) == "Base.AnnotatedIOBuffer(5 bytes, 1 annotation)" end + +@testset "Eachregion" begin + annregions(str::String, annots::Vector{<:Tuple{UnitRange{Int}, Symbol, <:Any}}) = + [(s, Tuple.(a)) for (s, a) in Base.eachregion(Base.AnnotatedString(str, annots))] + # Regions that do/don't extend to the left/right edges + @test annregions(" abc ", [(2:4, :face, :bold)]) == + [(" ", []), + ("abc", [(:face, :bold)]), + (" ", [])] + @test annregions(" x ", [(2:2, :face, :bold)]) == + [(" ", []), + ("x", [(:face, :bold)]), + (" ", [])] + @test annregions(" x", [(2:2, :face, :bold)]) == + [(" ", []), + ("x", [(:face, :bold)])] + @test annregions("x ", [(1:1, :face, :bold)]) == + [("x", [(:face, :bold)]), + (" ", [])] + @test annregions("x", [(1:1, :face, :bold)]) == + [("x", [(:face, :bold)])] + # Overlapping/nested regions + @test annregions(" abc ", [(2:4, :face, :bold), (3:3, :face, :italic)]) == + [(" ", []), + ("a", [(:face, :bold)]), + ("b", [(:face, :bold), (:face, :italic)]), + ("c", [(:face, :bold)]), + (" ", [])] + @test annregions("abc-xyz", [(1:7, :face, :bold), (1:3, :face, :green), (4:4, :face, :yellow), (4:7, :face, :italic)]) == + [("abc", [(:face, :bold), (:face, :green)]), + ("-", [(:face, :bold), (:face, :yellow), (:face, :italic)]), + ("xyz", [(:face, :bold), (:face, :italic)])] + # Preserving annotation order + @test annregions("abcd", [(1:3, :face, :red), (2:2, :face, :yellow), (2:3, :face, :green), (2:4, :face, :blue)]) == + [("a", [(:face, :red)]), + ("b", [(:face, :red), (:face, :yellow), (:face, :green), (:face, :blue)]), + ("c", [(:face, :red), (:face, :green), (:face, :blue)]), + ("d", [(:face, :blue)])] + @test annregions("abcd", [(2:4, :face, :blue), (1:3, :face, :red), (2:3, :face, :green), (2:2, :face, :yellow)]) == + [("a", [(:face, :red)]), + ("b", [(:face, :blue), (:face, :red), (:face, :green), (:face, :yellow)]), + ("c", [(:face, :blue), (:face, :red), (:face, :green)]), + ("d", [(:face, :blue)])] + # Region starting after a character spanning multiple codepoints. + @test annregions("𝟏x", [(1:4, :face, :red)]) == + [("𝟏", [(:face, :red)]), + ("x", [])] +end From 57b1cd54adbfdd7c9862901e80f75f9886137df0 Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Sun, 30 Mar 2025 08:55:46 +0200 Subject: [PATCH 10/20] `Base`: shell escaping: inference improvement to prevent invalidation (#57915) (cherry picked from commit 4d2a350870b97059432ac8fa8de92b0b161c335b) --- base/shell.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/shell.jl b/base/shell.jl index e07fff128acfe..68925cbd5d5af 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -344,7 +344,7 @@ function shell_escape_csh(io::IO, args::AbstractString...) end shell_escape_csh(args::AbstractString...) = sprint(shell_escape_csh, args...; - sizehint = sum(sizeof.(args)) + length(args) * 3) + sizehint = sum(sizeof, args) + length(args) * 3) """ shell_escape_wincmd(s::AbstractString) @@ -494,4 +494,4 @@ function escape_microsoft_c_args(io::IO, args::AbstractString...) end escape_microsoft_c_args(args::AbstractString...) = sprint(escape_microsoft_c_args, args...; - sizehint = (sum(sizeof.(args)) + 3*length(args))) + sizehint = (sum(sizeof, args) + 3*length(args))) From 4183b2c2e8cb3a3cc120da3b633d8c46dc155046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Belmant?= Date: Tue, 18 Feb 2025 00:53:00 +0100 Subject: [PATCH 11/20] Support adding `CodeInstance`s to JIT for interpreters defining a codegen cache (#57272) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements a way to add `CodeInstance`s compiled by external interpreters to JIT, such that they become legal targets for `invoke` calls. Based on a design proposed by @Keno, the `AbstractInterpreter` interface is extended to support providing a codegen cache that is filled during inference for future use with `add_codeinsts_to_jit!`. This allows `invoke(f, ::CodeInstance, args...)` to work on external interpreters, which is currently failing on `master` (see #57193). --------- Co-authored-by: Cédric Belmant (cherry picked from commit 9d2e9ed8a275f770994488ec3e4bd3f04b9a229e) --- .../CompilerDevTools/src/CompilerDevTools.jl | 6 +- Compiler/src/typeinfer.jl | 86 +++++++++++-------- Compiler/src/types.jl | 17 ++++ Compiler/test/AbstractInterpreter.jl | 14 +++ 4 files changed, 86 insertions(+), 37 deletions(-) diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl index 5d0df5ccaa4e4..dd32564d7fa8d 100644 --- a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl +++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl @@ -9,12 +9,13 @@ struct SplitCacheInterp <: Compiler.AbstractInterpreter inf_params::Compiler.InferenceParams opt_params::Compiler.OptimizationParams inf_cache::Vector{Compiler.InferenceResult} + codegen_cache::IdDict{CodeInstance,CodeInfo} function SplitCacheInterp(; world::UInt = Base.get_world_counter(), inf_params::Compiler.InferenceParams = Compiler.InferenceParams(), opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(), inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[]) - new(world, inf_params, opt_params, inf_cache) + new(world, inf_params, opt_params, inf_cache, IdDict{CodeInstance,CodeInfo}()) end end @@ -23,10 +24,11 @@ Compiler.OptimizationParams(interp::SplitCacheInterp) = interp.opt_params Compiler.get_inference_world(interp::SplitCacheInterp) = interp.world Compiler.get_inference_cache(interp::SplitCacheInterp) = interp.inf_cache Compiler.cache_owner(::SplitCacheInterp) = SplitCacheOwner() +Compiler.codegen_cache(interp::SplitCacheInterp) = interp.codegen_cache import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge @eval @noinline typeinf(::SplitCacheOwner, mi::MethodInstance, source_mode::UInt8) = - Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode) + Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext_toplevel, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode) @eval @noinline function typeinf_edge(::SplitCacheOwner, mi::MethodInstance, parent_frame::Compiler.InferenceState, world::UInt, source_mode::UInt8) # TODO: This isn't quite right, we're just sketching things for now diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl index e07ff4a842e3c..d3c8e65e214a2 100644 --- a/Compiler/src/typeinfer.jl +++ b/Compiler/src/typeinfer.jl @@ -144,9 +144,10 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects), result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, di, edges) engine_reject(interp, ci) - if !discard_src && isdefined(interp, :codegen) && uncompressed isa CodeInfo + codegen = codegen_cache(interp) + if !discard_src && codegen !== nothing && uncompressed isa CodeInfo # record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work - interp.codegen[ci] = uncompressed + codegen[ci] = uncompressed if bootstrapping_compiler && inferred_result == nothing # This is necessary to get decent bootstrapping performance # when compiling the compiler to inject everything eagerly @@ -186,8 +187,9 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any), ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, 0.0, 0.0, 0.0, di, edges) code_cache(interp)[mi] = ci - if isdefined(interp, :codegen) - interp.codegen[ci] = src + codegen = codegen_cache(interp) + if codegen !== nothing + codegen[ci] = src end engine_reject(interp, ci) return nothing @@ -1195,7 +1197,10 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod ci = result.ci # reload from result in case it changed @assert frame.cache_mode != CACHE_MODE_NULL - @assert is_result_constabi_eligible(result) || (!isdefined(interp, :codegen) || haskey(interp.codegen, ci)) + @assert is_result_constabi_eligible(result) || begin + codegen = codegen_cache(interp) + codegen === nothing || haskey(codegen, ci) + end @assert is_result_constabi_eligible(result) == use_const_api(ci) @assert isdefined(ci, :inferred) "interpreter did not fulfill our expectations" if !is_cached(frame) && source_mode == SOURCE_MODE_ABI @@ -1261,44 +1266,55 @@ function collectinvokes!(wq::Vector{CodeInstance}, ci::CodeInfo) end end -# This is a bridge for the C code calling `jl_typeinf_func()` on a single Method match -function typeinf_ext_toplevel(mi::MethodInstance, world::UInt, source_mode::UInt8) - interp = NativeInterpreter(world) - ci = typeinf_ext(interp, mi, source_mode) - if source_mode == SOURCE_MODE_ABI && ci isa CodeInstance && !ci_has_invoke(ci) - inspected = IdSet{CodeInstance}() - tocompile = Vector{CodeInstance}() - push!(tocompile, ci) - while !isempty(tocompile) - # ci_has_real_invoke(ci) && return ci # optimization: cease looping if ci happens to get compiled (not just jl_fptr_wait_for_compiled, but fully jl_is_compiled_codeinst) - callee = pop!(tocompile) - ci_has_invoke(callee) && continue - callee in inspected && continue - src = get(interp.codegen, callee, nothing) +function add_codeinsts_to_jit!(interp::AbstractInterpreter, ci, source_mode::UInt8) + source_mode == SOURCE_MODE_ABI || return ci + ci isa CodeInstance && !ci_has_invoke(ci) || return ci + codegen = codegen_cache(interp) + codegen === nothing && return ci + inspected = IdSet{CodeInstance}() + tocompile = Vector{CodeInstance}() + push!(tocompile, ci) + while !isempty(tocompile) + # ci_has_real_invoke(ci) && return ci # optimization: cease looping if ci happens to get compiled (not just jl_fptr_wait_for_compiled, but fully jl_is_compiled_codeinst) + callee = pop!(tocompile) + ci_has_invoke(callee) && continue + callee in inspected && continue + src = get(codegen, callee, nothing) + if !isa(src, CodeInfo) + src = @atomic :monotonic callee.inferred + if isa(src, String) + src = _uncompressed_ir(callee, src) + end if !isa(src, CodeInfo) - src = @atomic :monotonic callee.inferred - if isa(src, String) - src = _uncompressed_ir(callee, src) - end - if !isa(src, CodeInfo) - newcallee = typeinf_ext(interp, callee.def, source_mode) - if newcallee isa CodeInstance - callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee - push!(tocompile, newcallee) - #else - # println("warning: could not get source code for ", callee.def) - end - continue + newcallee = typeinf_ext(interp, callee.def, source_mode) + if newcallee isa CodeInstance + callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee + push!(tocompile, newcallee) + #else + # println("warning: could not get source code for ", callee.def) end + continue end - push!(inspected, callee) - collectinvokes!(tocompile, src) - ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src) end + push!(inspected, callee) + collectinvokes!(tocompile, src) + ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src) end return ci end +function typeinf_ext_toplevel(interp::AbstractInterpreter, mi::MethodInstance, source_mode::UInt8) + ci = typeinf_ext(interp, mi, source_mode) + ci = add_codeinsts_to_jit!(interp, ci, source_mode) + return ci +end + +# This is a bridge for the C code calling `jl_typeinf_func()` on a single Method match +function typeinf_ext_toplevel(mi::MethodInstance, world::UInt, source_mode::UInt8) + interp = NativeInterpreter(world) + return typeinf_ext_toplevel(interp, mi, source_mode) +end + # This is a bridge for the C code calling `jl_typeinf_func()` on set of Method matches # The trim_mode can be any of: const TRIM_NO = 0 diff --git a/Compiler/src/types.jl b/Compiler/src/types.jl index eb05ba2b8daa6..a04c9e70174fe 100644 --- a/Compiler/src/types.jl +++ b/Compiler/src/types.jl @@ -23,6 +23,10 @@ the following methods to satisfy the `AbstractInterpreter` API requirement: - `get_inference_world(interp::NewInterpreter)` - return the world age for this interpreter - `get_inference_cache(interp::NewInterpreter)` - return the local inference cache - `cache_owner(interp::NewInterpreter)` - return the owner of any new cache entries + +If `CodeInstance`s compiled using `interp::NewInterpreter` are meant to be executed with `invoke`, +a method `codegen_cache(interp::NewInterpreter) -> IdDict{CodeInstance, CodeInfo}` must be defined, +and inference must be triggered via `typeinf_ext_toplevel` with source mode `SOURCE_MODE_ABI`. """ abstract type AbstractInterpreter end @@ -446,6 +450,19 @@ to incorporate customized dispatches for the overridden methods. method_table(interp::AbstractInterpreter) = InternalMethodTable(get_inference_world(interp)) method_table(interp::NativeInterpreter) = interp.method_table +""" + codegen_cache(interp::AbstractInterpreter) -> Union{Nothing, IdDict{CodeInstance, CodeInfo}} + +Optionally return a cache associating a `CodeInfo` to a `CodeInstance` that should be added to the JIT +for future execution via `invoke(f, ::CodeInstance, args...)`. This cache is used during `typeinf_ext_toplevel`, +and may be safely discarded between calls to this function. + +By default, a value of `nothing` is returned indicating that `CodeInstance`s should not be added to the JIT. +Attempting to execute them via `invoke` will result in an error. +""" +codegen_cache(interp::AbstractInterpreter) = nothing +codegen_cache(interp::NativeInterpreter) = interp.codegen + """ By default `AbstractInterpreter` implements the following inference bail out logic: - `bail_out_toplevel_call(::AbstractInterpreter, sig, ::InferenceState)`: bail out from diff --git a/Compiler/test/AbstractInterpreter.jl b/Compiler/test/AbstractInterpreter.jl index 533eaf93937a3..83218d73cad69 100644 --- a/Compiler/test/AbstractInterpreter.jl +++ b/Compiler/test/AbstractInterpreter.jl @@ -534,3 +534,17 @@ let interp = DebugInterp() end @test found end + +@newinterp InvokeInterp +struct InvokeOwner end +codegen = IdDict{CodeInstance, CodeInfo}() +Compiler.cache_owner(::InvokeInterp) = InvokeOwner() +Compiler.codegen_cache(::InvokeInterp) = codegen +let interp = InvokeInterp() + source_mode = Compiler.SOURCE_MODE_ABI + f = (+) + args = (1, 1) + mi = @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance} + ci = Compiler.typeinf_ext_toplevel(interp, mi, source_mode) + @test invoke(f, ci, args...) == 2 +end From bea6fd124f2035a2583ce41802f89928d4d7a5ad Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 28 Mar 2025 10:34:48 -0400 Subject: [PATCH 12/20] inference: add internal SOURCE_MODE_GET_SOURCE mode (#57878) Helps avoids some code duplication and divergence of inference behaviors in some edge cases, also slightly more correct caching in some edge cases. (cherry picked from commit e7ff95d643fbbe5445beda7b1d859d327d34f729) --- Compiler/src/bootstrap.jl | 9 +- Compiler/src/typeinfer.jl | 167 ++++++++++++++++++++---------------- Compiler/src/verifytrim.jl | 17 +++- Compiler/test/verifytrim.jl | 2 +- src/aotcompile.cpp | 14 +++ src/gf.c | 23 +++-- src/jitlayers.cpp | 4 +- src/julia_internal.h | 3 +- 8 files changed, 141 insertions(+), 98 deletions(-) diff --git a/Compiler/src/bootstrap.jl b/Compiler/src/bootstrap.jl index 2671ea114e818..a847d1fb835c7 100644 --- a/Compiler/src/bootstrap.jl +++ b/Compiler/src/bootstrap.jl @@ -67,17 +67,10 @@ function bootstrap!() end mi = specialize_method(m.method, Tuple{params...}, m.sparams) #isa_compileable_sig(mi) || println(stderr, "WARNING: inferring `", mi, "` which isn't expected to be called.") - push!(methods, mi) + typeinf_ext_toplevel(mi, world, isa_compileable_sig(mi) ? SOURCE_MODE_ABI : SOURCE_MODE_NOT_REQUIRED) end end end - codeinfos = typeinf_ext_toplevel(methods, [world], TRIM_NO) - for i = 1:2:length(codeinfos) - ci = codeinfos[i]::CodeInstance - src = codeinfos[i + 1]::CodeInfo - isa_compileable_sig(ci.def) || continue # println(stderr, "WARNING: compiling `", ci.def, "` which isn't expected to be called.") - ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, src) - end endtime = time() println("Base.Compiler ──── ", sub_float(endtime,starttime), " seconds") end diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl index d3c8e65e214a2..59e0fbd8262e5 100644 --- a/Compiler/src/typeinfer.jl +++ b/Compiler/src/typeinfer.jl @@ -153,7 +153,7 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation # when compiling the compiler to inject everything eagerly # where codegen can start finding and using it right away mi = result.linfo - if mi.def isa Method && isa_compileable_sig(mi) + if mi.def isa Method && isa_compileable_sig(mi) && is_cached(caller) ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, uncompressed) end end @@ -1099,10 +1099,10 @@ end """ SOURCE_MODE_NOT_REQUIRED -Indicates to inference that the source is not required and the only fields -of the resulting `CodeInstance` that the caller is interested in are types -and effects. Inference is still free to create a CodeInstance with source, -but is not required to do so. +Indicates to inference that the source is not required and the only fields of +the resulting `CodeInstance` that the caller is interested in are return or +exception types and IPO effects. Inference is still free to create source for +it or add it to the JIT even, but is not required or expected to do so. """ const SOURCE_MODE_NOT_REQUIRED = 0x0 @@ -1110,28 +1110,51 @@ const SOURCE_MODE_NOT_REQUIRED = 0x0 SOURCE_MODE_ABI Indicates to inference that it should return a CodeInstance that can -either be `->invoke`'d (because it has already been compiled or because -it has constabi) or one that can be made so by compiling its `->inferred` -field. - -N.B.: The `->inferred` field is volatile and the compiler may delete it. +be `->invoke`'d (because it has already been compiled). """ const SOURCE_MODE_ABI = 0x1 """ - ci_has_abi(code::CodeInstance) + SOURCE_MODE_GET_SOURCE + +Indicates to inference that it should return a CodeInstance after it has +prepared interp to be able to provide source code for it. +""" +const SOURCE_MODE_GET_SOURCE = 0xf -Determine whether this CodeInstance is something that could be invoked if we gave it -to the runtime system (either because it already has an ->invoke ptr, or -because it has source that could be compiled). Note that this information may -be stale by the time the user see it, so the user will need to perform their -own checks if they actually need the abi from it. """ -function ci_has_abi(code::CodeInstance) + ci_has_abi(interp::AbstractInterpreter, code::CodeInstance) + +Determine whether this CodeInstance is something that could be invoked if +interp gave it to the runtime system (either because it already has an ->invoke +ptr, or because interp has source that could be compiled). +""" +function ci_has_abi(interp::AbstractInterpreter, code::CodeInstance) (@atomic :acquire code.invoke) !== C_NULL && return true + return ci_has_source(interp, code) +end + +""" + ci_has_source(interp::AbstractInterpreter, code::CodeInstance) + +Determine whether this CodeInstance is something that could be compiled from +source that interp has. +""" +function ci_has_source(interp::AbstractInterpreter, code::CodeInstance) + codegen = codegen_cache(interp) + codegen === nothing && return false + use_const_api(code) && return true + haskey(codegen, code) && return true inf = @atomic :monotonic code.inferred - if code.owner === nothing ? (isa(inf, CodeInfo) || isa(inf, String)) : inf !== nothing - # interp.codegen[code] = maybe_uncompress(code, inf) # TODO: the correct way to ensure this information doesn't become stale would be to push it into the stable codegen cache + if isa(inf, String) + inf = _uncompressed_ir(code, inf) + end + if code.owner === nothing + if isa(inf, CodeInfo) + codegen[code] = inf + return true + end + elseif inf !== nothing return true end return false @@ -1141,9 +1164,10 @@ function ci_has_invoke(code::CodeInstance) return (@atomic :monotonic code.invoke) !== C_NULL end -function ci_meets_requirement(code::CodeInstance, source_mode::UInt8) +function ci_meets_requirement(interp::AbstractInterpreter, code::CodeInstance, source_mode::UInt8) source_mode == SOURCE_MODE_NOT_REQUIRED && return true - source_mode == SOURCE_MODE_ABI && return ci_has_abi(code) + source_mode == SOURCE_MODE_ABI && return ci_has_abi(interp, code) + source_mode == SOURCE_MODE_GET_SOURCE && return ci_has_source(interp, code) return false end @@ -1153,7 +1177,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod let code = get(code_cache(interp), mi, nothing) if code isa CodeInstance # see if this code already exists in the cache - if ci_meets_requirement(code, source_mode) + if ci_meets_requirement(interp, code, source_mode) ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time) return code end @@ -1165,7 +1189,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod let code = get(code_cache(interp), mi, nothing) if code isa CodeInstance # see if this code already exists in the cache - if ci_meets_requirement(code, source_mode) + if ci_meets_requirement(interp, code, source_mode) engine_reject(interp, ci) ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time) return code @@ -1196,18 +1220,11 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time) ci = result.ci # reload from result in case it changed + codegen = codegen_cache(interp) @assert frame.cache_mode != CACHE_MODE_NULL - @assert is_result_constabi_eligible(result) || begin - codegen = codegen_cache(interp) - codegen === nothing || haskey(codegen, ci) - end + @assert is_result_constabi_eligible(result) || codegen === nothing || haskey(codegen, ci) @assert is_result_constabi_eligible(result) == use_const_api(ci) @assert isdefined(ci, :inferred) "interpreter did not fulfill our expectations" - if !is_cached(frame) && source_mode == SOURCE_MODE_ABI - # XXX: jl_type_infer somewhat ambiguously assumes this must be cached - # XXX: this should be using the CI from the cache, if possible instead: haskey(cache, mi) && (ci = cache[mi]) - code_cache(interp)[mi] = ci - end return ci end @@ -1221,35 +1238,9 @@ end typeinf_type(interp::AbstractInterpreter, match::MethodMatch) = typeinf_type(interp, specialize_method(match)) function typeinf_type(interp::AbstractInterpreter, mi::MethodInstance) - # n.b.: this could be replaced with @something(typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED), return nothing).rettype - start_time = ccall(:jl_typeinf_timing_begin, UInt64, ()) - let code = get(code_cache(interp), mi, nothing) - if code isa CodeInstance - # see if this rettype already exists in the cache - ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time) - return code.rettype - end - end - ci = engine_reserve(interp, mi) - let code = get(code_cache(interp), mi, nothing) - if code isa CodeInstance - engine_reject(interp, ci) - # see if this rettype already exists in the cache - ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time) - return code.rettype - end - end - result = InferenceResult(mi, typeinf_lattice(interp)) - result.ci = ci - frame = InferenceState(result, #=cache_mode=#:global, interp) - if frame === nothing - engine_reject(interp, ci) - return nothing - end - typeinf(interp, frame) - ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time) - is_inferred(result) || return nothing - return widenconst(ignorelimited(result.result)) + ci = typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED) + ci isa CodeInstance || return nothing + return ci.rettype end # collect a list of all code that is needed along with CodeInstance to codegen it fully @@ -1286,18 +1277,31 @@ function add_codeinsts_to_jit!(interp::AbstractInterpreter, ci, source_mode::UIn src = _uncompressed_ir(callee, src) end if !isa(src, CodeInfo) - newcallee = typeinf_ext(interp, callee.def, source_mode) + newcallee = typeinf_ext(interp, callee.def, source_mode) # always SOURCE_MODE_ABI if newcallee isa CodeInstance callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee push!(tocompile, newcallee) - #else - # println("warning: could not get source code for ", callee.def) + end + if newcallee !== callee + push!(inspected, callee) end continue end end push!(inspected, callee) collectinvokes!(tocompile, src) + mi = get_ci_mi(callee) + if iszero(ccall(:jl_mi_cache_has_ci, Cint, (Any, Any), mi, callee)) + cached = ccall(:jl_get_ci_equiv, Any, (Any, UInt), callee, get_inference_world(interp))::CodeInstance + if cached === callee + # make sure callee is gc-rooted and cached, as required by jl_add_codeinst_to_jit + code_cache(interp)[mi] = callee + else + # use an existing CI from the cache, if there is available one that is compatible + callee === ci && (ci = cached) + callee = cached + end + end ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src) end return ci @@ -1341,7 +1345,7 @@ function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim_m # and this is either the primary world, or not applicable in the primary world # then we want to compile and emit this if item.def.primary_world <= this_world <= item.def.deleted_world - ci = typeinf_ext(interp, item, SOURCE_MODE_NOT_REQUIRED) + ci = typeinf_ext(interp, item, SOURCE_MODE_GET_SOURCE) ci isa CodeInstance && push!(tocompile, ci) end elseif item isa SimpleVector && latest @@ -1352,7 +1356,7 @@ function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim_m sig, this_world, #= mt_cache =# 0) if ptr !== C_NULL mi = unsafe_pointer_to_objref(ptr)::MethodInstance - ci = typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED) + ci = typeinf_ext(interp, mi, SOURCE_MODE_GET_SOURCE) ci isa CodeInstance && push!(tocompile, ci) end # additionally enqueue the ccallable entrypoint / adapter, which implicitly @@ -1364,26 +1368,37 @@ function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim_m while !isempty(tocompile) callee = pop!(tocompile) callee in inspected && continue - push!(inspected, callee) # now make sure everything has source code, if desired mi = get_ci_mi(callee) def = mi.def if use_const_api(callee) src = codeinfo_for_const(interp, mi, callee.rettype_const) - elseif haskey(interp.codegen, callee) - src = interp.codegen[callee] - elseif isa(def, Method) && !InferenceParams(interp).force_enable_inference && ccall(:jl_get_module_infer, Cint, (Any,), def.module) == 0 - src = retrieve_code_info(mi, get_inference_world(interp)) else - # TODO: typeinf_code could return something with different edges/ages/owner/abi (needing an update to callee), which we don't handle here - src = typeinf_code(interp, mi, true) + src = get(interp.codegen, callee, nothing) + if src === nothing + newcallee = typeinf_ext(interp, mi, SOURCE_MODE_GET_SOURCE) + if newcallee isa CodeInstance + @assert use_const_api(newcallee) || haskey(interp.codegen, newcallee) + push!(tocompile, newcallee) + end + if newcallee !== callee + push!(inspected, callee) + end + continue + end end + push!(inspected, callee) if src isa CodeInfo collectinvokes!(tocompile, src) - # It is somewhat ambiguous if typeinf_ext might have callee in the caches, - # but for the purpose of native compile, we always want them put there. + # try to reuse an existing CodeInstance from before to avoid making duplicates in the cache if iszero(ccall(:jl_mi_cache_has_ci, Cint, (Any, Any), mi, callee)) - code_cache(interp)[mi] = callee + cached = ccall(:jl_get_ci_equiv, Any, (Any, UInt), callee, this_world)::CodeInstance + if cached === callee + code_cache(interp)[mi] = callee + else + # Use an existing CI from the cache, if there is available one that is compatible + callee = cached + end end push!(codeinfos, callee) push!(codeinfos, src) diff --git a/Compiler/src/verifytrim.jl b/Compiler/src/verifytrim.jl index 5a80082c63330..2365d885efd79 100644 --- a/Compiler/src/verifytrim.jl +++ b/Compiler/src/verifytrim.jl @@ -110,7 +110,7 @@ end function verify_print_error(io::IOContext{IO}, desc::CallMissing, parents::ParentMap) (; codeinst, codeinfo, sptypes, stmtidx, desc) = desc frames = verify_create_stackframes(codeinst, stmtidx, parents) - print(io, desc, " from ") + print(io, desc, " from statement ") verify_print_stmt(io, codeinfo, sptypes, stmtidx) Base.show_backtrace(io, frames) print(io, "\n\n") @@ -181,6 +181,11 @@ function verify_codeinstance!(codeinst::CodeInstance, codeinfo::CodeInfo, inspec if edge isa CodeInstance haskey(parents, edge) || (parents[edge] = (codeinst, i)) edge in inspected && continue + edge_mi = get_ci_mi(edge) + if edge_mi === edge.def + ci = get(caches, edge_mi, nothing) + ci isa CodeInstance && continue # assume that only this_world matters for trim + end end # TODO: check for calls to Base.atexit? elseif isexpr(stmt, :call) @@ -287,7 +292,7 @@ function get_verify_typeinf_trim(codeinfos::Vector{Any}) # TODO: should we find a way to indicate to the user that this gets called via ccallable? # parent[ci] = something asrt = ci.rettype - ci in inspected + true else false end @@ -326,6 +331,14 @@ function verify_typeinf_trim(io::IO, codeinfos::Vector{Any}, onlywarn::Bool) verify_print_error(io, desc, parents) end + ## TODO: compute and display the minimum and/or full call graph instead of merely the first parent stacktrace? + #for i = 1:length(codeinfos) + # item = codeinfos[i] + # if item isa CodeInstance + # println(item, "::", item.rettype) + # end + #end + let severity = 0 if counts[1] > 0 || counts[2] > 0 print("Trim verify finished with ") diff --git a/Compiler/test/verifytrim.jl b/Compiler/test/verifytrim.jl index a03804a94cb62..0e9d040ef0c9b 100644 --- a/Compiler/test/verifytrim.jl +++ b/Compiler/test/verifytrim.jl @@ -33,7 +33,7 @@ let infos = typeinf_ext_toplevel(Any[Core.svec(Base.SecretBuffer, Tuple{Type{Bas @test occursin("finalizer", desc.desc) repr = sprint(verify_print_error, desc, parents) @test occursin( - r"""^unresolved finalizer registered from \(Core.finalizer\)\(Base.final_shred!, %new\(\)::Base.SecretBuffer\)::Nothing + r"""^unresolved finalizer registered from statement \(Core.finalizer\)\(Base.final_shred!, %new\(\)::Base.SecretBuffer\)::Nothing Stacktrace: \[1\] finalizer\(f::typeof\(Base.final_shred!\), o::Base.SecretBuffer\) @ Base gcutils.jl:(\d+) \[inlined\] diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 688aeca1b242c..b1d925d89c7ce 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -675,6 +675,20 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm fargs[0] = (jl_value_t*)codeinfos; void *data = jl_emit_native(codeinfos, llvmmod, &cgparams, external_linkage); + // examine everything just emitted and save it to the caches + if (!external_linkage) { + for (size_t i = 0, l = jl_array_nrows(codeinfos); i < l; i++) { + jl_value_t *item = jl_array_ptr_ref(codeinfos, i); + if (jl_is_code_instance(item)) { + // now add it to our compilation results + jl_code_instance_t *codeinst = (jl_code_instance_t*)item; + jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i); + assert(jl_is_code_info(src)); + jl_add_codeinst_to_cache(codeinst, src); + } + } + } + // move everything inside, now that we've merged everything // (before adding the exported headers) ((jl_native_code_desc_t*)data)->M.withModuleDo([&](Module &M) { diff --git a/src/gf.c b/src/gf.c index f8d88c4e44e38..860e5e5aa1247 100644 --- a/src/gf.c +++ b/src/gf.c @@ -585,8 +585,8 @@ JL_DLLEXPORT int jl_mi_cache_has_ci(jl_method_instance_t *mi, return 0; } -// look for something with an egal ABI and properties that is already in the JIT (compiled=true) or simply in the cache (compiled=false) -JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, int compiled) JL_NOTSAFEPOINT +// look for something with an egal ABI and properties that is already in the JIT for a whole edge (target_world=0) or can be added to the JIT with new source just for target_world. +JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, size_t target_world) JL_NOTSAFEPOINT { jl_value_t *def = ci->def; jl_method_instance_t *mi = jl_get_ci_mi(ci); @@ -598,9 +598,9 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPA while (codeinst) { if (codeinst != ci && jl_atomic_load_relaxed(&codeinst->inferred) != NULL && - (!compiled || jl_atomic_load_relaxed(&codeinst->invoke) != NULL) && - jl_atomic_load_relaxed(&codeinst->min_world) <= min_world && - jl_atomic_load_relaxed(&codeinst->max_world) >= max_world && + (target_world ? 1 : jl_atomic_load_relaxed(&codeinst->invoke) != NULL) && + jl_atomic_load_relaxed(&codeinst->min_world) <= (target_world ? target_world : min_world) && + jl_atomic_load_relaxed(&codeinst->max_world) >= (target_world ? target_world : max_world) && jl_egal(codeinst->def, def) && jl_egal(codeinst->owner, owner) && jl_egal(codeinst->rettype, rettype)) { @@ -608,7 +608,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPA } codeinst = jl_atomic_load_relaxed(&codeinst->next); } - return (jl_code_instance_t*)jl_nothing; + return ci; } @@ -2795,10 +2795,9 @@ void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_c jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT); -JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src) +JL_DLLEXPORT void jl_add_codeinst_to_cache(jl_code_instance_t *codeinst, jl_code_info_t *src) { assert(jl_is_code_info(src)); - jl_emit_codeinst_to_jit(codeinst, src); jl_method_instance_t *mi = jl_get_ci_mi(codeinst); if (jl_generating_output() && jl_is_method(mi->def.method) && jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) { jl_value_t *compressed = jl_compress_ir(mi->def.method, src); @@ -2814,6 +2813,14 @@ JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_i } } + +JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src) +{ + assert(jl_is_code_info(src)); + jl_emit_codeinst_to_jit(codeinst, src); + jl_add_codeinst_to_cache(codeinst, src); +} + jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world) { // quick check if we already have a compiled result diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 695953b602653..b8781d2bfe898 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -383,8 +383,8 @@ static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t } if (preal_decl.empty()) { // there may be an equivalent method already compiled (or at least registered with the JIT to compile), in which case we should be using that instead - jl_code_instance_t *compiled_ci = jl_get_ci_equiv(codeinst, 1); - if ((jl_value_t*)compiled_ci != jl_nothing) { + jl_code_instance_t *compiled_ci = jl_get_ci_equiv(codeinst, 0); + if (compiled_ci != codeinst) { codeinst = compiled_ci; uint8_t specsigflags; void *fptr; diff --git a/src/julia_internal.h b/src/julia_internal.h index 479ccbf961e71..bdcc816cbdd1b 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -685,6 +685,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROP JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile); JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache); JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src); +JL_DLLEXPORT void jl_add_codeinst_to_cache(jl_code_instance_t *codeinst, jl_code_info_t *src); JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner); JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( @@ -694,7 +695,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( int32_t const_flags, size_t min_world, size_t max_world, uint32_t effects, jl_value_t *analysis_results, jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/); -JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, int compiled) JL_NOTSAFEPOINT; +JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, size_t target_world) JL_NOTSAFEPOINT; STATIC_INLINE jl_method_instance_t *jl_get_ci_mi(jl_code_instance_t *ci JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT { From f70dfcf60f34c277b29dfd8f8fa6ac7db97b3718 Mon Sep 17 00:00:00 2001 From: Neven Sajko <4944410+nsajko@users.noreply.github.com> Date: Mon, 31 Mar 2025 10:00:17 +0200 Subject: [PATCH 13/20] `_precompilepkgs`: interactive progress display: fix unintended capture (#57932) The variable `str` also exists in one of the enclosing closures. Use a new variable, as was surely intended, instead of capturing and mutating the `str`. Improves the sysimage's resistance to invalidation. (cherry picked from commit fcf492d4b39c2becde6cd5ca3bb63fcbc7a308d3) --- base/precompilation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/precompilation.jl b/base/precompilation.jl index 5392e119d25a2..3ab9fcad5aee6 100644 --- a/base/precompilation.jl +++ b/base/precompilation.jl @@ -835,8 +835,8 @@ function _precompilepkgs(pkgs::Vector{String}, # window between print cycles termwidth = displaysize(io)[2] - 4 if !final_loop - str = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io) - print(iostr, Base._truncate_at_width_or_chars(true, str, termwidth), "\n") + s = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io) + print(iostr, Base._truncate_at_width_or_chars(true, s, termwidth), "\n") end for pkg_config in pkg_queue_show dep, config = pkg_config From 594b17d99ddd02b8416b1949c44428d014c05efc Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Mon, 31 Mar 2025 04:03:19 -0400 Subject: [PATCH 14/20] staticdata: Memoize `type_in_worklist` query (#57917) When pre-compiling `stdlib/` this cache has a 91% hit rate, so this seems fairly profitable. It also dramatically improves some pathological cases, a few of which have been hit in the wild (arguably due to inference bugs) Without this PR, this package takes exponentially long to pre-compile: ```julia function BigType(N) (N == 0) && return Nothing T = BigType(N-1) return Pair{T,T} end foo(::Type{T}) where T = T precompile(foo, (Type{BigType(40)},)) ``` For an in-the-wild test case hit by a customer, this reduces pre-compilation time from over an hour to just ~two and a half minutes. Resolves #53331. (cherry picked from commit 89271dc5a8a3236a148413409abb019bf973f6de) --- src/staticdata.c | 76 +++++++++++++++++++++++++++--------------- src/staticdata_utils.c | 70 +++++++++++++++++++++++--------------- 2 files changed, 94 insertions(+), 52 deletions(-) diff --git a/src/staticdata.c b/src/staticdata.c index 62f3feeaa2159..cab6e6be510b0 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -92,6 +92,22 @@ static const size_t WORLD_AGE_REVALIDATION_SENTINEL = 0x1; JL_DLLEXPORT size_t jl_require_world = ~(size_t)0; JL_DLLEXPORT _Atomic(size_t) jl_first_image_replacement_world = ~(size_t)0; +// This structure is used to store hash tables for the memoization +// of queries in staticdata.c (currently only `type_in_worklist`). +typedef struct { + htable_t type_in_worklist; +} jl_query_cache; + +static void init_query_cache(jl_query_cache *cache) +{ + htable_new(&cache->type_in_worklist, 0); +} + +static void destroy_query_cache(jl_query_cache *cache) +{ + htable_free(&cache->type_in_worklist); +} + #include "staticdata_utils.c" #include "precompile_utils.c" @@ -555,6 +571,7 @@ typedef struct { jl_array_t *method_roots_list; htable_t method_roots_index; uint64_t worklist_key; + jl_query_cache *query_cache; jl_ptls_t ptls; jl_image_t *image; int8_t incremental; @@ -702,14 +719,13 @@ static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) JL_NOTS return 1; } - -static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT +static int caching_tag(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT { if (jl_is_method_instance(v)) { jl_method_instance_t *mi = (jl_method_instance_t*)v; jl_value_t *m = mi->def.value; if (jl_is_method(m) && jl_object_in_image(m)) - return 1 + type_in_worklist(mi->specTypes); + return 1 + type_in_worklist(mi->specTypes, query_cache); } if (jl_is_binding(v)) { jl_globalref_t *gr = ((jl_binding_t*)v)->globalref; @@ -724,24 +740,24 @@ static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars) return 0; // aka !is_cacheable from jltypes.c if (jl_object_in_image((jl_value_t*)dt->name)) - return 1 + type_in_worklist(v); + return 1 + type_in_worklist(v, query_cache); } jl_value_t *dtv = jl_typeof(v); if (jl_is_datatype_singleton((jl_datatype_t*)dtv)) { - return 1 - type_in_worklist(dtv); // these are already recached in the datatype in the image + return 1 - type_in_worklist(dtv, query_cache); // these are already recached in the datatype in the image } return 0; } -static int needs_recaching(jl_value_t *v) JL_NOTSAFEPOINT +static int needs_recaching(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT { - return caching_tag(v) == 2; + return caching_tag(v, query_cache) == 2; } -static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT +static int needs_uniquing(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT { assert(!jl_object_in_image(v)); - return caching_tag(v) == 1; + return caching_tag(v, query_cache) == 1; } static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT @@ -861,7 +877,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ jl_datatype_t *dt = (jl_datatype_t*)v; // ensure all type parameters are recached jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1); - if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance)) { + if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance, s->query_cache)) { assert(jl_needs_serialization(s, dt->instance)); // should be true, since we visited dt // do not visit dt->instance for our template object as it leads to unwanted cycles here // (it may get serialized from elsewhere though) @@ -872,7 +888,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ if (s->incremental && jl_is_method_instance(v)) { jl_method_instance_t *mi = (jl_method_instance_t*)v; jl_value_t *def = mi->def.value; - if (needs_uniquing(v)) { + if (needs_uniquing(v, s->query_cache)) { // we only need 3 specific fields of this (the rest are not used) jl_queue_for_serialization(s, mi->def.value); jl_queue_for_serialization(s, mi->specTypes); @@ -887,7 +903,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ record_field_change((jl_value_t**)&mi->cache, NULL); } else { - assert(!needs_recaching(v)); + assert(!needs_recaching(v, s->query_cache)); } // n.b. opaque closures cannot be inspected and relied upon like a // normal method since they can get improperly introduced by generated @@ -897,7 +913,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ // error now. } if (s->incremental && jl_is_binding(v)) { - if (needs_uniquing(v)) { + if (needs_uniquing(v, s->query_cache)) { jl_binding_t *b = (jl_binding_t*)v; jl_queue_for_serialization(s, b->globalref->mod); jl_queue_for_serialization(s, b->globalref->name); @@ -1121,9 +1137,9 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i // Items that require postorder traversal must visit their children prior to insertion into // the worklist/serialization_order (and also before their first use) if (s->incremental && !immediate) { - if (jl_is_datatype(t) && needs_uniquing(v)) + if (jl_is_datatype(t) && needs_uniquing(v, s->query_cache)) immediate = 1; - if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v)) + if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v, s->query_cache)) immediate = 1; } @@ -1286,7 +1302,7 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t * static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t offset) JL_NOTSAFEPOINT { - if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) { + if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld, s->query_cache)) { if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld))) arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset); else if (jl_is_method_instance(fld) || jl_is_binding(fld)) @@ -1510,7 +1526,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED // write header if (object_id_expected) write_uint(f, jl_object_id(v)); - if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t)) + if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t, s->query_cache)) arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(f)|1)); if (f == s->const_data) write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED | GC_IN_IMAGE); @@ -1521,7 +1537,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED layout_table.items[item] = (void*)(reloc_offset | (f == s->const_data)); // store the inverse mapping of `serialization_order` (`id` => object-as-streampos) if (s->incremental) { - if (needs_uniquing(v)) { + if (needs_uniquing(v, s->query_cache)) { if (jl_typetagis(v, jl_binding_type)) { jl_binding_t *b = (jl_binding_t*)v; if (b->globalref == NULL) @@ -1550,7 +1566,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED assert(jl_is_datatype_singleton(t) && "unreachable"); } } - else if (needs_recaching(v)) { + else if (needs_recaching(v, s->query_cache)) { arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset); } } @@ -1985,7 +2001,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED } } void *superidx = ptrhash_get(&serialization_order, dt->super); - if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super)) + if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super, s->query_cache)) arraylist_push(&s->uniquing_super, dt->super); } else if (jl_is_typename(v)) { @@ -2919,13 +2935,14 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred, /* outputs */ jl_array_t **extext_methods JL_REQUIRE_ROOTED_SLOT, jl_array_t **new_ext_cis JL_REQUIRE_ROOTED_SLOT, - jl_array_t **edges JL_REQUIRE_ROOTED_SLOT) + jl_array_t **edges JL_REQUIRE_ROOTED_SLOT, + jl_query_cache *query_cache) { // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist // edges: [caller1, ext_targets, ...] for worklist-owned methods calling external methods // Save the inferred code from newly inferred, external methods - *new_ext_cis = queue_external_cis(newly_inferred); + *new_ext_cis = queue_external_cis(newly_inferred, query_cache); // Collect method extensions and edges data *extext_methods = jl_alloc_vec_any(0); @@ -2955,7 +2972,8 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new // In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, jl_array_t *worklist, jl_array_t *extext_methods, - jl_array_t *new_ext_cis, jl_array_t *edges) + jl_array_t *new_ext_cis, jl_array_t *edges, + jl_query_cache *query_cache) { htable_new(&field_replace, 0); htable_new(&bits_replace, 0); @@ -3062,6 +3080,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array, ios_mem(&gvar_record, 0); ios_mem(&fptr_record, 0); jl_serializer_state s = {0}; + s.query_cache = query_cache; s.incremental = !(worklist == NULL); s.s = &sysimg; s.const_data = &const_data; @@ -3422,11 +3441,14 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli int64_t datastartpos = 0; JL_GC_PUSH4(&mod_array, &extext_methods, &new_ext_cis, &edges); + jl_query_cache query_cache; + init_query_cache(&query_cache); + if (worklist) { mod_array = jl_get_loaded_modules(); // __toplevel__ modules loaded in this session (from Base.loaded_modules_array) // Generate _native_data` if (_native_data != NULL) { - jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, NULL); + jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, NULL, &query_cache); jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1); *_native_data = jl_precompile_worklist(worklist, extext_methods, new_ext_cis); jl_precompile_toplevel_module = NULL; @@ -3457,7 +3479,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli assert((ct->reentrant_timing & 0b1110) == 0); ct->reentrant_timing |= 0b1000; if (worklist) { - jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, &edges); + jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, &edges, &query_cache); if (!emit_split) { write_int32(f, 0); // No clone_targets write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f)); @@ -3469,7 +3491,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli } if (_native_data != NULL) native_functions = *_native_data; - jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, edges); + jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, edges, &query_cache); if (_native_data != NULL) native_functions = NULL; // make sure we don't run any Julia code concurrently before this point @@ -3498,6 +3520,8 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli } } + destroy_query_cache(&query_cache); + JL_GC_POP(); *s = f; if (emit_split) diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index e9f464b64470e..9bfd4c355efe6 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -131,63 +131,81 @@ JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* ci) JL_UNLOCK(&newly_inferred_mutex); } - // compute whether a type references something internal to worklist // and thus could not have existed before deserialize // and thus does not need delayed unique-ing -static int type_in_worklist(jl_value_t *v) JL_NOTSAFEPOINT +static int type_in_worklist(jl_value_t *v, jl_query_cache *cache) JL_NOTSAFEPOINT { if (jl_object_in_image(v)) return 0; // fast-path for rejection + + void *cached = HT_NOTFOUND; + if (cache != NULL) + cached = ptrhash_get(&cache->type_in_worklist, v); + + // fast-path for memoized results + if (cached != HT_NOTFOUND) + return cached == v; + + int result = 0; if (jl_is_uniontype(v)) { jl_uniontype_t *u = (jl_uniontype_t*)v; - return type_in_worklist(u->a) || - type_in_worklist(u->b); + result = type_in_worklist(u->a, cache) || + type_in_worklist(u->b, cache); } else if (jl_is_unionall(v)) { jl_unionall_t *ua = (jl_unionall_t*)v; - return type_in_worklist((jl_value_t*)ua->var) || - type_in_worklist(ua->body); + result = type_in_worklist((jl_value_t*)ua->var, cache) || + type_in_worklist(ua->body, cache); } else if (jl_is_typevar(v)) { jl_tvar_t *tv = (jl_tvar_t*)v; - return type_in_worklist(tv->lb) || - type_in_worklist(tv->ub); + result = type_in_worklist(tv->lb, cache) || + type_in_worklist(tv->ub, cache); } else if (jl_is_vararg(v)) { jl_vararg_t *tv = (jl_vararg_t*)v; - if (tv->T && type_in_worklist(tv->T)) - return 1; - if (tv->N && type_in_worklist(tv->N)) - return 1; + result = ((tv->T && type_in_worklist(tv->T, cache)) || + (tv->N && type_in_worklist(tv->N, cache))); } else if (jl_is_datatype(v)) { jl_datatype_t *dt = (jl_datatype_t*)v; - if (!jl_object_in_image((jl_value_t*)dt->name)) - return 1; - jl_svec_t *tt = dt->parameters; - size_t i, l = jl_svec_len(tt); - for (i = 0; i < l; i++) - if (type_in_worklist(jl_tparam(dt, i))) - return 1; + if (!jl_object_in_image((jl_value_t*)dt->name)) { + result = 1; + } + else { + jl_svec_t *tt = dt->parameters; + size_t i, l = jl_svec_len(tt); + for (i = 0; i < l; i++) { + if (type_in_worklist(jl_tparam(dt, i), cache)) { + result = 1; + break; + } + } + } } else { - return type_in_worklist(jl_typeof(v)); + return type_in_worklist(jl_typeof(v), cache); } - return 0; + + // Memoize result + if (cache != NULL) + ptrhash_put(&cache->type_in_worklist, (void*)v, result ? (void*)v : NULL); + + return result; } // When we infer external method instances, ensure they link back to the // package. Otherwise they might be, e.g., for external macros. // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable -static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, arraylist_t *stack) +static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, arraylist_t *stack, jl_query_cache *query_cache) { jl_module_t *mod = mi->def.module; if (jl_is_method(mod)) mod = ((jl_method_t*)mod)->module; assert(jl_is_module(mod)); uint8_t is_precompiled = jl_atomic_load_relaxed(&mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED; - if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) { + if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes, query_cache)) { return 1; } if (!mi->backedges) { @@ -211,7 +229,7 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, jl_code_instance_t *be; i = get_next_edge(mi->backedges, i, NULL, &be); JL_GC_PROMISE_ROOTED(be); // get_next_edge propagates the edge for us here - int child_found = has_backedge_to_worklist(jl_get_ci_mi(be), visited, stack); + int child_found = has_backedge_to_worklist(jl_get_ci_mi(be), visited, stack, query_cache); if (child_found == 1 || child_found == 2) { // found what we were looking for, so terminate early found = 1; @@ -243,7 +261,7 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, // from the worklist or explicitly added by a `precompile` statement, and // (4) are the most recently computed result for that method. // These will be preserved in the image. -static jl_array_t *queue_external_cis(jl_array_t *list) +static jl_array_t *queue_external_cis(jl_array_t *list, jl_query_cache *query_cache) { if (list == NULL) return NULL; @@ -262,7 +280,7 @@ static jl_array_t *queue_external_cis(jl_array_t *list) jl_method_instance_t *mi = jl_get_ci_mi(ci); jl_method_t *m = mi->def.method; if (ci->owner == jl_nothing && jl_atomic_load_relaxed(&ci->inferred) && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) { - int found = has_backedge_to_worklist(mi, &visited, &stack); + int found = has_backedge_to_worklist(mi, &visited, &stack, query_cache); assert(found == 0 || found == 1 || found == 2); assert(stack.len == 0); if (found == 1 && jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0) { From 318028a91464d1b167d0817d2aec80424a4f5479 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 31 Mar 2025 04:04:21 -0400 Subject: [PATCH 15/20] fix opaque_closure sparam capture (#57928) Fix #54236 Fix #54357 (cherry picked from commit 3a3926eba563e147d7d6a48c44fb7f47088d24c5) --- src/julia-syntax.scm | 1 + test/opaque_closure.jl | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 739fa45e088ca..b5d90a24ea13f 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4105,6 +4105,7 @@ f(x) = yt(x) (capt-var-access v fname opaq) v))) cvs))) + (set-car! (cdddr (lam:vinfo lam2)) '()) ;; must capture static_parameters as values inside opaque_closure `(new_opaque_closure ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) ,allow-partial (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs) parsed-method-stack)) diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl index 6c988b068a668..7b02578a86621 100644 --- a/test/opaque_closure.jl +++ b/test/opaque_closure.jl @@ -390,3 +390,20 @@ let ir = first(only(Base.code_ircode(sin, (Int,)))) oc = Core.OpaqueClosure(ir; do_compile=false) @test oc(1) == sin(1) end + +function typed_add54236(::Type{T}) where T + return @opaque (x::Int)->T(x) + T(1) +end +let f = typed_add54236(Float64) + @test f isa Core.OpaqueClosure + @test f(32) === 33.0 +end + +f54357(g, ::Type{AT}) where {AT} = Base.Experimental.@opaque AT->_ (args...) -> g((args::AT)...) +let f = f54357(+, Tuple{Int,Int}) + @test f isa Core.OpaqueClosure + @test f(32, 34) === 66 + g = f54357(+, Tuple{Float64,Float64}) + @test g isa Core.OpaqueClosure + @test g(32.0, 34.0) === 66.0 +end From 7a9c684135b3fd6e8b1a93d5cd86edc547e5fff2 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 27 Mar 2025 18:57:51 +0900 Subject: [PATCH 16/20] inference: fix exct modeling of `setglobal!` (#57896) (cherry picked from commit 626d541de6129179ff3934d6a4f95ef05c4c232c) --- Compiler/src/abstractinterpretation.jl | 7 +++---- Compiler/test/inference.jl | 5 +++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl index 1c12bbf0c5e64..2a535cb20b4d0 100644 --- a/Compiler/src/abstractinterpretation.jl +++ b/Compiler/src/abstractinterpretation.jl @@ -2561,7 +2561,6 @@ function abstract_eval_setglobalonce!(interp::AbstractInterpreter, sv::AbsIntSta end end - function abstract_eval_replaceglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any}) if length(argtypes) in (5, 6, 7) (M, s, x, v) = argtypes[2], argtypes[3], argtypes[4], argtypes[5] @@ -3624,7 +3623,7 @@ end function global_assignment_rt_exct(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, g::GlobalRef, @nospecialize(newty)) if saw_latestworld - return Pair{Any,Any}(newty, Union{ErrorException, TypeError}) + return Pair{Any,Any}(newty, ErrorException) end (valid_worlds, ret) = scan_partitions((interp, _, partition)->global_assignment_binding_rt_exct(interp, partition, newty), interp, g, sv.world) update_valid_age!(sv, valid_worlds) @@ -3641,10 +3640,10 @@ function global_assignment_binding_rt_exct(interp::AbstractInterpreter, partitio ty = kind == PARTITION_KIND_DECLARED ? Any : partition_restriction(partition) wnewty = widenconst(newty) if !hasintersect(wnewty, ty) - return Pair{Any,Any}(Bottom, TypeError) + return Pair{Any,Any}(Bottom, ErrorException) elseif !(wnewty <: ty) retty = tmeet(typeinf_lattice(interp), newty, ty) - return Pair{Any,Any}(retty, TypeError) + return Pair{Any,Any}(retty, ErrorException) end return Pair{Any,Any}(newty, Bottom) end diff --git a/Compiler/test/inference.jl b/Compiler/test/inference.jl index b77c99513a8b6..ec569a0ba04b5 100644 --- a/Compiler/test/inference.jl +++ b/Compiler/test/inference.jl @@ -6194,3 +6194,8 @@ f57292(xs::Union{Tuple{String}, Int}...) = getfield(xs...) g57292(xs::String...) = getfield(("abc",), 1, :not_atomic, xs...) @test Base.infer_return_type(f57292) == String @test Base.infer_return_type(g57292) == String + +global invalid_setglobal!_exct_modeling::Int +@test Base.infer_exception_type((Float64,)) do x + setglobal!(@__MODULE__, :invalid_setglobal!_exct_modeling, x) +end == ErrorException From fceace7cb912af3c11d2cadf6e8f3b1924e34bbc Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Mon, 31 Mar 2025 04:01:04 -0400 Subject: [PATCH 17/20] fix trimming size regression due to handling binding backedges in the wrong place (#57927) (cherry picked from commit fe613d4129f38efccb7821f0d37523abc649cb91) --- src/staticdata.c | 4 +++- test/trimming/trimming.jl | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/staticdata.c b/src/staticdata.c index cab6e6be510b0..2aa2180fc6923 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -841,7 +841,6 @@ static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_ // ... or point to Base functions accessed by the runtime (m == jl_base_module && (!strcmp(jl_symbol_name(b->globalref->name), "wait") || !strcmp(jl_symbol_name(b->globalref->name), "task_done_hook"))))) { - record_field_change((jl_value_t**)&b->backedges, NULL); jl_queue_for_serialization(s, b); } } @@ -1081,6 +1080,9 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ record_field_change((jl_value_t **)&tn->mt, NULL); } } + else if (jl_is_binding(v)) { + record_field_change((jl_value_t**)&((jl_binding_t*)v)->backedges, NULL); + } } char *data = (char*)jl_data_ptr(v); size_t i, np = layout->npointers; diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl index 0c5226cba01fe..69d9adf9b003f 100644 --- a/test/trimming/trimming.jl +++ b/test/trimming/trimming.jl @@ -4,7 +4,7 @@ let exe_suffix = splitext(Base.julia_exename())[2] hello_exe = joinpath(@__DIR__, "hello" * exe_suffix) @test readchomp(`$hello_exe`) == "Hello, world!" - @test filesize(hello_exe) < filesize(unsafe_string(Base.JLOptions().image_file))/10 + @test filesize(hello_exe) < 2_000_000 basic_jll_exe = joinpath(@__DIR__, "basic_jll" * exe_suffix) lines = split(readchomp(`$basic_jll_exe`), "\n") From 189359e83ac3c5eb78834e15a6c35329fd2632b7 Mon Sep 17 00:00:00 2001 From: Tommy Hofmann Date: Fri, 21 Feb 2025 19:41:27 +0100 Subject: [PATCH 18/20] Clarify disabling of tab-completion hinting in the documentation (#57493) (cherry picked from commit e2cc68ced04dbfa4ea2005a4a44e05fe1e6d6336) --- stdlib/REPL/docs/src/index.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md index eabd7e729280e..ddd0a0953fcfc 100644 --- a/stdlib/REPL/docs/src/index.md +++ b/stdlib/REPL/docs/src/index.md @@ -343,7 +343,15 @@ mapfoldl mapfoldr When a single complete tab-complete result is available at the end of an input line and 2 or more characters have been typed, a hint of the completion will show in a lighter color. -This can be disabled via `Base.active_repl.options.hint_tab_completes = false`. +This can be disabled via `Base.active_repl.options.hint_tab_completes = false` or by adding +``` +atreplinit() do repl + if VERSION >= v"1.11.0-0" + repl.options.hint_tab_completes = false + end +end +``` +to your `~/.julia/config/startup.jl`. !!! compat "Julia 1.11" Tab-complete hinting was added in Julia 1.11 From 4bf5a26baa07e93d311d12afcd35bdb72ecf8be9 Mon Sep 17 00:00:00 2001 From: Kristoffer Date: Mon, 31 Mar 2025 19:51:25 +0200 Subject: [PATCH 19/20] change file size test for trimming --- test/trimming/trimming.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl index 69d9adf9b003f..4a0bb14ceebe2 100644 --- a/test/trimming/trimming.jl +++ b/test/trimming/trimming.jl @@ -4,7 +4,7 @@ let exe_suffix = splitext(Base.julia_exename())[2] hello_exe = joinpath(@__DIR__, "hello" * exe_suffix) @test readchomp(`$hello_exe`) == "Hello, world!" - @test filesize(hello_exe) < 2_000_000 + @test filesize(hello_exe) < 20_000_000 basic_jll_exe = joinpath(@__DIR__, "basic_jll" * exe_suffix) lines = split(readchomp(`$basic_jll_exe`), "\n") From a3b193c48aaf4e6000049b9df375ae0c6fdcf815 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 26 Mar 2025 18:50:45 -0400 Subject: [PATCH 20/20] add trimming of new usings_backedges and scanned_methods fields (#57879) (cherry picked from commit 8fd3fb118222f3cbdb78c53c685e5ec994fca125) --- src/staticdata.c | 16 +++++++++++----- test/trimming/trimming.jl | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/staticdata.c b/src/staticdata.c index 2aa2180fc6923..b51013e7e0563 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -851,8 +851,14 @@ static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_ jl_queue_for_serialization(s, module_usings_getmod(m, i)); } - jl_queue_for_serialization(s, m->usings_backedges); - jl_queue_for_serialization(s, m->scanned_methods); + if (jl_options.trim || jl_options.strip_ir) { + record_field_change((jl_value_t**)&m->usings_backedges, jl_nothing); + record_field_change((jl_value_t**)&m->scanned_methods, jl_nothing); + } + else { + jl_queue_for_serialization(s, m->usings_backedges); + jl_queue_for_serialization(s, m->scanned_methods); + } } // Anything that requires uniquing or fixing during deserialization needs to be "toplevel" @@ -1367,10 +1373,10 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t newm->line = 0; newm->usings_backedges = NULL; arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings_backedges))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings_backedges, s->link_ids_relocs)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, get_replaceable_field(&m->usings_backedges, 1), s->link_ids_relocs)); newm->scanned_methods = NULL; arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, scanned_methods))); - arraylist_push(&s->relocs_list, (void*)backref_id(s, m->scanned_methods, s->link_ids_relocs)); + arraylist_push(&s->relocs_list, (void*)backref_id(s, get_replaceable_field(&m->scanned_methods, 1), s->link_ids_relocs)); // After reload, everything that has happened in this process happened semantically at // (for .incremental) or before jl_require_world, so reset this flag. @@ -3635,7 +3641,7 @@ static int jl_validate_binding_partition(jl_binding_t *b, jl_binding_partition_t jl_sym_t *name = b->globalref->name; JL_LOCK(&mod->lock); jl_atomic_store_release(&mod->export_set_changed_since_require_world, 1); - if (mod->usings_backedges) { + if (mod->usings_backedges != jl_nothing) { for (size_t i = 0; i < jl_array_len(mod->usings_backedges); i++) { jl_module_t *edge = (jl_module_t*)jl_array_ptr_ref(mod->usings_backedges, i); jl_binding_t *importee = jl_get_module_binding(edge, name, 0); diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl index 4a0bb14ceebe2..a752c69460ad4 100644 --- a/test/trimming/trimming.jl +++ b/test/trimming/trimming.jl @@ -4,7 +4,7 @@ let exe_suffix = splitext(Base.julia_exename())[2] hello_exe = joinpath(@__DIR__, "hello" * exe_suffix) @test readchomp(`$hello_exe`) == "Hello, world!" - @test filesize(hello_exe) < 20_000_000 + @test filesize(hello_exe) < 2000000 basic_jll_exe = joinpath(@__DIR__, "basic_jll" * exe_suffix) lines = split(readchomp(`$basic_jll_exe`), "\n")