From 5843ac52d5c28084bff3fb7a3b3194745c0724a7 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Mon, 17 Mar 2025 16:54:36 -0400
Subject: [PATCH 01/20] fix _apply_pure deprecation (#57800)

Fix typo from #57532 and ensure "inference world" code has the permitted
ability to inspect the max world age, since it needs that info in order
to cache results correctly.

(cherry picked from commit 7ee404ccc5293a10062c876d2e2c87f9591fdeee)
---
 base/boot.jl | 2 +-
 src/gf.c     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/base/boot.jl b/base/boot.jl
index 8cd032817cebe..32975e96af583 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -1017,7 +1017,7 @@ _setparser!(parser) = setglobal!(Core, :_parse, parser)
 
 # support for deprecated uses of builtin functions
 _apply(x...) = _apply_iterate(Main.Base.iterate, x...)
-_apply_pure(x...) = invoke_in_world_total(typemax_UInt, x...)
+const _apply_pure = _apply
 const _call_latest = invokelatest
 const _call_in_world = invoke_in_world
 
diff --git a/src/gf.c b/src/gf.c
index cc3966da5f393..f8d88c4e44e38 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -532,7 +532,10 @@ JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
     jl_task_t *ct = jl_current_task;
     size_t last_age = ct->world_age;
     ct->world_age = jl_typeinf_world;
+    int last_pure = ct->ptls->in_pure_callback;
+    ct->ptls->in_pure_callback = 0;
     jl_value_t *ret = jl_apply(args, nargs);
+    ct->ptls->in_pure_callback = last_pure;
     ct->world_age = last_age;
     return ret;
 }

From 665cf7c7696db8275be6812c6d087cd2fbf44cd5 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <84105208+topolarity@users.noreply.github.com>
Date: Wed, 26 Mar 2025 15:40:00 -0400
Subject: [PATCH 02/20] precompile_utils: Don't auto-enqueue `macro` methods
 for pre-compilation (#57833)

Despite disabling these from being compiled in `gf.c` for dynamic
invocations, the pre-compilation code was adding `macro` Methods anyway
to the workqueue.

Replaces https://github.com/JuliaLang/julia/pull/57782

(cherry picked from commit 6ce51d3c1792c7756c88bda26d75388cb7371540)
---
 src/precompile_utils.c | 4 ++++
 test/precompile.jl     | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index 281dbe0163586..c602a15c1fb74 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -170,6 +170,10 @@ static void jl_compile_all_defs(jl_array_t *mis, int all)
     size_t i, l = jl_array_nrows(allmeths);
     for (i = 0; i < l; i++) {
         jl_method_t *m = (jl_method_t*)jl_array_ptr_ref(allmeths, i);
+        int is_macro_method = jl_symbol_name(m->name)[0] == '@';
+        if (is_macro_method && !all)
+            continue; // Avoid inference / pre-compilation for macros
+
         if (jl_is_datatype(m->sig) && jl_isa_compileable_sig((jl_tupletype_t*)m->sig, jl_emptysvec, m)) {
             // method has a single compilable specialization, e.g. its definition
             // signature is concrete. in this case we can just hint it.
diff --git a/test/precompile.jl b/test/precompile.jl
index 7c5c63a277e27..f7b31c125014c 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -2416,4 +2416,9 @@ precompile_test_harness("Package top-level load itself") do load_path
     end
 end
 
+# Verify that inference / caching was not performed for any macros in the sysimage
+let m = only(methods(Base.var"@big_str"))
+    @test m.specializations === Core.svec() || !isdefined(m.specializations, :cache)
+end
+
 finish_precompile_test!()

From 383a8b8bd926b4b9a76b51ea73328e598aaac932 Mon Sep 17 00:00:00 2001
From: Neven Sajko <4944410+nsajko@users.noreply.github.com>
Date: Thu, 27 Mar 2025 15:12:20 +0100
Subject: [PATCH 03/20] `Compiler`: `abstract_eval_invoke_inst`: type assert
 `Expr` (#57860)

Should make the code less vulnerable to invalidation.

(cherry picked from commit ed3fccc0be0f3cf48ba8f0910e5a5faa58280787)
---
 Compiler/src/ssair/irinterp.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Compiler/src/ssair/irinterp.jl b/Compiler/src/ssair/irinterp.jl
index 084f28f0aa523..3d72da72625be 100644
--- a/Compiler/src/ssair/irinterp.jl
+++ b/Compiler/src/ssair/irinterp.jl
@@ -32,7 +32,7 @@ function concrete_eval_invoke(interp::AbstractInterpreter, ci::CodeInstance, arg
 end
 
 function abstract_eval_invoke_inst(interp::AbstractInterpreter, inst::Instruction, irsv::IRInterpretationState)
-    stmt = inst[:stmt]
+    stmt = inst[:stmt]::Expr
     ci = stmt.args[1]
     if ci isa MethodInstance
         world = frame_world(irsv)

From 4ffd73574d42defa32d9bdd7dbacfc864ce6c33c Mon Sep 17 00:00:00 2001
From: Neven Sajko <4944410+nsajko@users.noreply.github.com>
Date: Thu, 27 Mar 2025 19:29:55 +0100
Subject: [PATCH 04/20] `Compiler`: `walk_to_defs`, `collect_leaves`:
 specialize for `predecessors` (#57859)

Should make the code less vulnerable to invalidation.

(cherry picked from commit d9441ac9bc055562ba9009357b465e69c16ade68)
---
 Compiler/src/ssair/passes.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Compiler/src/ssair/passes.jl b/Compiler/src/ssair/passes.jl
index 14fc0ab20913c..46ed299167060 100644
--- a/Compiler/src/ssair/passes.jl
+++ b/Compiler/src/ssair/passes.jl
@@ -183,7 +183,7 @@ function find_def_for_use(
 end
 
 function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice,
-                        predecessors = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing))
+                        predecessors::Pre = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing)) where {Pre}
     if isa(val, Union{OldSSAValue, SSAValue})
         val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
     end
@@ -271,7 +271,7 @@ Starting at `val` walk use-def chains to get all the leaves feeding into this `v
 `predecessors(def, compact)` is a callback which should return the set of possible
 predecessors for a "phi-like" node (PhiNode or Core.ifelse) or `nothing` otherwise.
 """
-function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors, 𝕃ₒ::AbstractLattice)
+function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors::Pre, 𝕃ₒ::AbstractLattice) where {Pre}
     visited_philikes = AnySSAValue[]
     isa(defssa, AnySSAValue) || return Any[defssa], visited_philikes
     def = compact[defssa][:stmt]

From a2460d320043d7d0afec2fb4915981c465110621 Mon Sep 17 00:00:00 2001
From: Diogo Netto <61364108+d-netto@users.noreply.github.com>
Date: Fri, 28 Mar 2025 01:56:26 -0300
Subject: [PATCH 05/20] only update fragmentation data for pages that are not
 lazily freed (#57907)

We are including data from lazily freed pages into this metric, which
makes it inaccurate.

Thanks @qinsoon for spotting it.

(cherry picked from commit d6fdbf5212aaf270749138db2e285141e4de1dc6)
---
 src/gc-stock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gc-stock.c b/src/gc-stock.c
index 3b49b82caf530..66a1724ecf4ce 100644
--- a/src/gc-stock.c
+++ b/src/gc-stock.c
@@ -948,6 +948,7 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
 
 done:
     if (re_use_page) {
+        gc_update_page_fragmentation_data(pg);
         push_lf_back(allocd, pg);
     }
     else {
@@ -956,7 +957,6 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
         push_lf_back(&global_page_pool_lazily_freed, pg);
     }
     gc_page_profile_write_to_file(s);
-    gc_update_page_fragmentation_data(pg);
     gc_time_count_page(freedall, pg_skpd);
     jl_ptls_t ptls = jl_current_task->ptls;
     // Note that we aggregate the `pool_live_bytes` over all threads before returning this

From 19abebf9dea7f5ddc4914da3b087da69aeb87154 Mon Sep 17 00:00:00 2001
From: Neven Sajko <4944410+nsajko@users.noreply.github.com>
Date: Fri, 28 Mar 2025 12:26:13 +0100
Subject: [PATCH 06/20] `Random`: `show` method for `MersenneTwister`:
 invalidation resistance (#57913)

Avoid using or constructing the vector with nonconcrete element type.

Should make the sysimage more resistant to method invalidation.

(cherry picked from commit 8e03cb11c8460b14ccb1553cb11343c3f39a6774)
---
 stdlib/Random/src/RNGs.jl | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
index 7782de88ba537..2ea2fb3a684df 100644
--- a/stdlib/Random/src/RNGs.jl
+++ b/stdlib/Random/src/RNGs.jl
@@ -147,21 +147,26 @@ function show(io::IO, rng::MersenneTwister)
     end
     print(io, MersenneTwister, "(", repr(rng.seed), ", (")
     # state
-    adv = Integer[rng.adv_jump, rng.adv]
+    sep = ", "
+    show(io, rng.adv_jump)
+    print(io, sep)
+    show(io, rng.adv)
     if rng.adv_vals != -1 || rng.adv_ints != -1
-        if rng.adv_vals == -1
-            @assert rng.idxF == MT_CACHE_F
-            push!(adv, 0, 0) # "(0, 0)" is nicer on the eyes than (-1, 1002)
-        else
-            push!(adv, rng.adv_vals, rng.idxF)
-        end
+        # "(0, 0)" is nicer on the eyes than (-1, 1002)
+        s = rng.adv_vals != -1
+        print(io, sep)
+        show(io, s ? rng.adv_vals : zero(rng.adv_vals))
+        print(io, sep)
+        show(io, s ? rng.idxF : zero(rng.idxF))
     end
     if rng.adv_ints != -1
         idxI = (length(rng.ints)*16 - rng.idxI) / 8 # 8 represents one Int64
         idxI = Int(idxI) # idxI should always be an integer when using public APIs
-        push!(adv, rng.adv_ints, idxI)
+        print(io, sep)
+        show(io, rng.adv_ints)
+        print(io, sep)
+        show(io, idxI)
     end
-    join(io, adv, ", ")
     print(io, "))")
 end
 

From 5a7f1b5ae823c109b3689fc1d9f603916d231471 Mon Sep 17 00:00:00 2001
From: Gabriel Baraldi <baraldigabriel@gmail.com>
Date: Fri, 28 Mar 2025 11:36:50 -0300
Subject: [PATCH 07/20] Teach alloc-opt to handle atomics a bit better (#57208)

Fixes https://github.com/JuliaLang/julia/issues/57190

The fact that this passes thinks memcpy is a potential issue is quite
annoying so it deserves a decent refactor, which flows through the type
information from julia instead of trying to regenerate it on site,
specially given that opaque pointers means we can't really instrospect
into pointers at all

(cherry picked from commit 2c7527b1aa27b8bf523af95289767b66eac5796e)
---
 src/llvm-alloc-helpers.cpp        |   1 +
 src/llvm-alloc-opt.cpp            |   5 +-
 test/atomics.jl                   |  11 ++
 test/llvmpasses/alloc-opt-pass.ll | 180 +++++++++++++++++++++++++-----
 4 files changed, 171 insertions(+), 26 deletions(-)

diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index 194c6837860ca..a1ed66a190190 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -214,6 +214,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
         }
         if (auto call = dyn_cast<CallInst>(inst)) {
             // TODO handle `memcmp`
+            // TODO handle `memcpy` which is used a lot more often since opaque pointers
             // None of the intrinsics should care if the memory is stack or heap allocated.
             auto callee = call->getCalledOperand();
             if (auto II = dyn_cast<IntrinsicInst>(call)) {
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index 7dd794a4d8847..ce1d22f42d0ae 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -742,7 +742,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
     auto replace_inst = [&] (Instruction *user) {
         Instruction *orig_i = cur.orig_i;
         Instruction *new_i = cur.new_i;
-        if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
+        if (isa<LoadInst>(user) || isa<StoreInst>(user) ||
+            isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            // TODO: these atomics are likely removable if the user is the first argument
             user->replaceUsesOfWith(orig_i, new_i);
         }
         else if (auto call = dyn_cast<CallInst>(user)) {
@@ -1111,6 +1113,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             return;
         }
         else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            // TODO: Downgrade atomics here potentially
             auto slot_idx = find_slot(offset);
             auto &slot = slots[slot_idx];
             assert(slot.offset <= offset && slot.offset + slot.size >= offset);
diff --git a/test/atomics.jl b/test/atomics.jl
index 7e9f29c23ca10..2d4a713b1d30d 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -1099,3 +1099,14 @@ test_once_undef(Any)
 test_once_undef(Union{Nothing,Integer})
 test_once_undef(UndefComplex{Any})
 test_once_undef(UndefComplex{UndefComplex{Any}})
+
+mutable struct Atomic57190
+    @atomic x::Int
+end
+
+
+function add_one57190!()
+    @atomic (Atomic57190(0).x) += 1
+end
+
+@test add_one57190!() == 1
diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll
index 665687e86835d..83f2118412cc1 100644
--- a/test/llvmpasses/alloc-opt-pass.ll
+++ b/test/llvmpasses/alloc-opt-pass.ll
@@ -73,6 +73,11 @@ L3:                                               ; preds = %L2, %L1, %0
 }
 ; CHECK-LABEL: }{{$}}
 
+declare void @external_function()
+
+declare ptr addrspace(10) @external_function2()
+
+
 ; CHECK-LABEL: @legal_int_types
 ; CHECK: alloca [12 x i8]
 ; CHECK-NOT: alloca i96
@@ -89,21 +94,6 @@ define void @legal_int_types() {
 }
 ; CHECK-LABEL: }{{$}}
 
-declare void @external_function()
-
-declare ptr addrspace(10) @external_function2()
-
-declare ptr @julia.ptls_states()
-
-declare ptr @julia.get_pgcstack()
-
-declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
-
-declare ptr @julia.pointer_from_objref(ptr addrspace(11))
-
-declare token @llvm.julia.gc_preserve_begin(...)
-
-declare void @llvm.julia.gc_preserve_end(token)
 
 ; CHECK-LABEL: @memref_collision
 ; OPAQUE: call ptr @julia.ptls_states()
@@ -171,13 +161,13 @@ define void @initializers() {
   %pgcstack = call ptr @julia.get_pgcstack()
   %ptls = call ptr @julia.ptls_states()
   %ptls_i8 = bitcast ptr %ptls to ptr
-  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1
+  %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #4
   %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
   %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
-  %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2
+  %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #7
   %var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11)
   %var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5)
-  %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3
+  %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #1
   %var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11)
   %var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8)
   ret void
@@ -203,14 +193,154 @@ union_move9:                                      ; No predecessors!
 }
 ; CHECK-LABEL: }{{$}}
 
+@0 = private unnamed_addr constant ptr inttoptr (i64 4373799056 to ptr), !julia.constgv !0
+@1 = private unnamed_addr constant i64 0, align 8
+
+; CHECK-LABEL: @cmpxchg
+; CHECK: alloca 
+; CHECK: alloca
+; CHECK:  %20 = cmpxchg ptr %2,
+define swiftcc i64 @"cmpxchg"(ptr nonnull swiftself %0) #0 {
+  %2 = alloca i64, align 16
+  %3 = call ptr @julia.get_pgcstack()
+  %4 = getelementptr inbounds i8, ptr %3, i32 -152
+  %5 = getelementptr inbounds i8, ptr %4, i32 168
+  %6 = load ptr, ptr %5, align 8, !tbaa !4
+  %7 = getelementptr inbounds i8, ptr %6, i32 16
+  %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
+  fence syncscope("singlethread") seq_cst
+  call void @julia.safepoint(ptr %8)
+  fence syncscope("singlethread") seq_cst
+  %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
+  %10 = ptrtoint ptr %9 to i64
+  %11 = inttoptr i64 %10 to ptr
+  %12 = getelementptr inbounds i8, ptr %3, i32 -152
+  %13 = addrspacecast ptr %11 to ptr addrspace(10)
+  call void @llvm.lifetime.start.p0(i64 8, ptr %2)
+  %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
+  %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
+  %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
+  br label %19
+
+18:                                               ; preds = %19
+  ret i64 %21
+
+19:                                               ; preds = %19, %1
+  %20 = phi i64 [ %17, %1 ], [ %23, %19 ]
+  %21 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %20, i64 signext 1)
+  %22 = cmpxchg ptr addrspace(11) %16, i64 %20, i64 %21 seq_cst monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
+  %23 = extractvalue { i64, i1 } %22, 0
+  %24 = extractvalue { i64, i1 } %22, 1
+  br i1 %24, label %18, label %19
+}
+
+; CHECK-LABEL: }{{$}}
+; CHECK-LABEL: @atomicrmw
+; CHECK: alloca
+; CHECK: alloca
+; CHECK: atomicrmw xchg ptr %2,
+define swiftcc i64 @"atomicrmw"(ptr nonnull swiftself %0) #0 {
+  %2 = alloca i64, align 16
+  %3 = call ptr @julia.get_pgcstack()
+  %4 = getelementptr inbounds i8, ptr %3, i32 -152
+  %5 = getelementptr inbounds i8, ptr %4, i32 168
+  %6 = load ptr, ptr %5, align 8, !tbaa !4
+  %7 = getelementptr inbounds i8, ptr %6, i32 16
+  %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
+  fence syncscope("singlethread") seq_cst
+  call void @julia.safepoint(ptr %8)
+  fence syncscope("singlethread") seq_cst
+  %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
+  %10 = ptrtoint ptr %9 to i64
+  %11 = inttoptr i64 %10 to ptr
+  %12 = getelementptr inbounds i8, ptr %3, i32 -152
+  %13 = addrspacecast ptr %11 to ptr addrspace(10)
+  call void @llvm.lifetime.start.p0(i64 8, ptr %2)
+  %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
+  %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
+  %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
+  %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
+  %18 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %17, i64 signext 1)
+  %19 = atomicrmw xchg ptr addrspace(11) %16, i64 %18 seq_cst, align 8, !tbaa !25, !alias.scope !23, !noalias !24                                    ; preds = %19
+  ret i64 %19
+}
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.pointer_from_objref(ptr addrspace(11))
+
+declare token @llvm.julia.gc_preserve_begin(...)
+
+declare void @llvm.julia.gc_preserve_end(token)
+
+declare ptr @julia.get_pgcstack()
+
+; Function Attrs: mustprogress nounwind willreturn memory(inaccessiblemem: readwrite)
+declare nonnull align 8 dereferenceable(8) ptr addrspace(10) @ijl_box_int64(i64 signext) #2
+
+; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
+declare void @julia.safepoint(ptr) #3
+
+; Function Attrs: mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
+declare noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) #4
+
 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
+
 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #5
+
 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
+
+declare swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself, i64 signext, i64 signext) #0
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { nounwind willreturn allockind("alloc,zeroed") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
+attributes #2 = { mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) }
+attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
+attributes #4 = { mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
+attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #7 = { nounwind willreturn allockind("alloc,uninitialized") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
+attributes #8 = { nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.module.flags = !{!1, !2, !3}
+
+!0 = !{}
+!1 = !{i32 2, !"Dwarf Version", i32 4}
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 2, !"julia.optlevel", i32 2}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_gcframe", !6, i64 0}
+!6 = !{!"jtbaa", !7, i64 0}
+!7 = !{!"jtbaa"}
+!8 = !{!9, !9, i64 0, i64 1}
+!9 = !{!"jtbaa_const", !6, i64 0}
+!10 = !{!11}
+!11 = !{!"jnoalias_const", !12}
+!12 = !{!"jnoalias"}
+!13 = !{!14, !15, !16, !17}
+!14 = !{!"jnoalias_gcframe", !12}
+!15 = !{!"jnoalias_stack", !12}
+!16 = !{!"jnoalias_data", !12}
+!17 = !{!"jnoalias_typemd", !12}
+!18 = !{i64 56}
+!19 = !{i64 16}
+!20 = !{!21, !21, i64 0}
+!21 = !{!"jtbaa_value", !22, i64 0}
+!22 = !{!"jtbaa_data", !6, i64 0}
+!23 = !{!16}
+!24 = !{!14, !15, !17, !11}
+!25 = !{!26, !26, i64 0}
+!26 = !{!"jtbaa_mutab", !21, i64 0}
 
-attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-attributes #1 = { allockind("alloc") }
-attributes #2 = { allockind("alloc,uninitialized") }
-attributes #3 = { allockind("alloc,zeroed") }

From 42e5e4724b27ef5e66e4dfae1e3ed8eef3293b38 Mon Sep 17 00:00:00 2001
From: Jakob Nybo Nissen <jakobnybonissen@gmail.com>
Date: Fri, 28 Mar 2025 19:07:23 +0100
Subject: [PATCH 08/20] Refactor IOBuffer code (#57570)

I've been a little frustrated with the IOBuffer code. It contains a
whole bunch of implicit invariants, and is poorly commented. It also has
several bugs that ultimately stems from the code being unclear about its
own assumptions.

This is a refactoring of IOBuffer. The primary goals are:
* Comment the code more heavily
* Test the code more thoroughly

The secondary goals are
* Fix a few outstanding bugs
* Add some minor performance improvements

This is a purely internal refactoring with be no change in behaviour of
`IOBuffer`, except straight up bugfixes. However, note that previous
code may have relied on buggy behaviour. Fixing bugs may therefore cause
breakage.

## Current changes
### **BEHAVIOUR CHANGES**
* The following code used to not throw an error, but now does:
`IOBuffer(b"abc"; maxsize=2)`. I consider this a bugfix. It should not
be possible to construct an IOBuffer with a buffersize larger than
`maxsize`.
* It used to be possible to write to indices higher than `maxindex`,
which could trigger a bug causing data loss. The bug has been fixed, but
as a result, some IOBuffers may reach full capacity faster (really:
reach it at the correct point), changing writing behaviour.

### Bugfixes
* Do not corrupt data on `copyline` on a non-appending buffer
* Respect `maxsize` even after `take!` (fix #57549)
* Fix bug when copying from an appending iobuffer to itself
* Fix bug where re-allocating the buffer may cause it to shrink,
discarding data.
* Fix bug where `truncate` may throw a wrong BoundsError
* Fix a bug where truncating a buffer may not correctly removed mark at
position that has been deleted
* Fix a bug where initializing an IOBuffer without an explicit buffer
and with `truncate=false` makes it contain the full buffer
* Current behaviour for `reset` and `position` did not work for
`PipeBuffer`. Fix that

### Changes to brittle code
* Removed some tests that explicitly tested internal code and internal
behaviour. Some of that behaviour has changed.
* Changed some internal `PipeBuffer` behaviour, which did not respect
writable IOBuffer's ownership of their buffer and therefore failed
spuriously

### Performance improvements
* Writing to dense IOBuffer now uses memmove and is up to 10x faster for
long writes.
* Minor optimisations (about ten percent) for writing to IOBuffers in
general

Closes #57549
Co-authored-by: Jameson Nash <vtjnash@gmail.com>

(cherry picked from commit 54197132f530bd7e7dbc84cfe56daa0b43bcfb0c)
---
 base/iobuffer.jl | 770 +++++++++++++++++++++++++++++++++--------------
 base/stream.jl   |  13 +-
 test/iobuffer.jl | 450 ++++++++++++++++++++++-----
 3 files changed, 925 insertions(+), 308 deletions(-)

diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index 144b0a20568e9..5e08a21d53186 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -1,45 +1,168 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-## work with AbstractVector{UInt8} via I/O primitives ##
+# IOBuffer is a Memory{UInt8} backed IO type for in-memory IO.
+
+# Here, u represents used bytes (already read), X represents bytes still to read,
+# - represents bytes uninitialized data but which can be written to later.
+# . represents bytes before offset, which the buffer will not touch, until
+# a write operation happens.
+
+#   .....uuuuuuuuuuuuuXXXXXXXXXXXXX------------
+#   |   |            |            |           |    |
+#   |   offset       ptr         size         |    maxsize
+#   1                                         lastindex(data)
+
+# N.B: `mark` does not correspond to any index in the buffer. Instead, it stores
+# the mark at virtual offset in the buffer.
+
+#            AFTER COMPACTION
+
+#   XXXXXXXXXXXXX--------------------------
+#  ||    |           |                    |    |
+#  |1    ptr         size                 |    maxsize
+#  |                                      lastindex(data)
+#  offset (set to zero)
+
+# * The underlying array is always 1-indexed
+# * The IOBuffer has full control (ownership) of the underlying array, only when
+#   buffer.write == true.
+# * Unreachable data can be deleted in the buffer's data, shifting the whole thing to the left
+#   to make room for more data, without replacing or resizing data.
+#   This can be done only if the buffer is not seekable
 
-# Stateful string
 mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
-    data::T # T should support: getindex, setindex!, length, copyto!, similar, and (optionally) resize!
-    reinit::Bool # if true, data needs to be re-allocated (after take!)
+    # T should support: getindex, setindex!, length, copyto!, similar, size and (optionally) resize!
+    data::T
+
+    # The user can take control of `data` out of this struct. When that happens, instead of eagerly allocating
+    # a new array, we set `.reinit` to true, and then allocate a new one when needed.
+    # If reinit is true, the buffer is writable, and offset_or_compacted and size is zero. See `take!`
+    reinit::Bool
     readable::Bool
     writable::Bool
-    seekable::Bool # if not seekable, implementation is free to destroy (compact) past read data
-    append::Bool # add data at end instead of at pointer
-    size::Int # end pointer (and write pointer if append == true) + offset
-    maxsize::Int # fixed array size (typically pre-allocated)
-    ptr::Int # read (and maybe write) pointer + offset
-    offset::Int # offset of ptr and size from actual start of data and actual size
-    mark::Int # reset mark location for ptr (or <0 for no mark)
 
-    function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
-                                maxsize::Integer) where T<:AbstractVector{UInt8}
-        require_one_based_indexing(data)
-        return new(data, false, readable, writable, seekable, append, length(data), maxsize, 1, 0, -1)
-    end
+    # If not seekable, implementation is free to destroy (compact) data before ptr, unless
+    # it can be recovered using the mark by using `reset`.
+    # If it IS seekable, the user may always recover any data in 1:size by seeking,
+    # so no data can be destroyed.
+    # Non-seekable IOBuffers can only be constructed with `PipeBuffer`, which are writable,
+    # readable and append.
+    seekable::Bool
+
+    # If true, write new data to the index size+1 instead of the index ptr.
+    append::Bool
+
+    # Last index of `data` that has been written to. Data in size+1:end has not yet been used,
+    # and may contain arbitrary values.
+    # This value is always in 0 : lastindex(data)
+    size::Int
+
+    # When the buffer is resized, or a new buffer allocated, this is the maximum size of the buffer.
+    # A new GenericIOBuffer may be constructed with an existing data larger than `maxsize`.
+    # When that happensm we must make sure to not have more than `maxsize` bytes in the buffer,
+    # else reallocating will lose data. So, never write to indices > `maxsize + get_offset(io)`
+    # This value is always in 0:typemax(Int).
+    maxsize::Int
+
+    # Data is read/written from/to ptr, except in situations where append is true, in which case
+    # data is still read from ptr, but written to size+1.
+    # This value is always in offset + 1 : size+1
+    ptr::Int
+
+    # This field has two distinct meanings:
+    # If the value is positive, it encodes an offset of the start of the data in `data`.
+    # This is used if the buffer is instantiated from a Vector with non-zero memory offset.
+    # Then, the IOBuffer stores the underlying memory, and so the first data in the buffer
+    # is not at index 1.
+    # If the value is negative, then `-io.offset_or_compacted` gets the number of compacted
+    # bytes. That's the number of unused bytes deleted from a non-seekable stream to make space.
+    # We need to keep track of it in order to make `mark` and `position` etc work, that is,
+    # we need to know the virtual position of the mark even when an arbitrary number
+    # of unused bytes has been deleted due to compaction.
+    # Since compaction will move data in the buffer and thereby zero the offset, either the
+    # offset or the number of compacted bytes will be zero at any point, so both can be
+    # stored in one field.
+    # If offset: Value is always in 0:lastindex(data)
+    # If compacted: Value is in typemin(Int):0
+    offset_or_compacted::Int
+
+    # The mark is -1 if not set, else the zero-indexed virtual position of ptr in the buffer.
+    # Due to compaction and offset, this value is not an index into the buffer, but may be translated
+    # to an index.
+    # This value is in -1:typemax(Int)
+    mark::Int
+
+    # Unsafe constructor which does not do any checking
+    global function _new_generic_iobuffer(
+            ::Type{T},
+            data::T,
+            readable::Bool,
+            writable::Bool,
+            seekable::Bool,
+            append::Bool,
+            maxsize::Int,
+        ) where T<:AbstractVector{UInt8}
+        len = Int(length(data))::Int
+        return new{T}(data, false, readable, writable, seekable, append, len, maxsize, 1, 0, -1)
+    end
+end
+
+function GenericIOBuffer{T}(
+        data::T,
+        readable::Bool,
+        writable::Bool,
+        seekable::Bool,
+        append::Bool,
+        maxsize::Integer,
+        truncate::Bool,
+    ) where T<:AbstractVector{UInt8}
+    require_one_based_indexing(data)
+    mz = Int(maxsize)::Int
+    len = Int(length(data))::Int
+    if !truncate && mz < len
+        throw(ArgumentError("maxsize must not be smaller than data length"))
+    end
+    buf = _new_generic_iobuffer(T, data, readable, writable, seekable, append, mz)
+    if truncate
+        buf.size = buf.offset_or_compacted
+    end
+    buf
 end
 
 const IOBuffer = GenericIOBuffer{Memory{UInt8}}
 
 function GenericIOBuffer(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
-                         maxsize::Integer) where T<:AbstractVector{UInt8}
-    GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize)
+                         maxsize::Integer, truncate::Bool) where T<:AbstractVector{UInt8}
+    GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize, truncate)
 end
+
+# For this method, we use the underlying Memory of the vector. Therefore, we need to set the,
+# ptr and size accordingly, so the buffer only uses the part of the memory that the vector does.
 function GenericIOBuffer(data::Vector{UInt8}, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
-                         maxsize::Integer)
+                         maxsize::Integer, truncate::Bool)
     ref = data.ref
-    buf = GenericIOBuffer(ref.mem, readable, writable, seekable, append, maxsize)
+    mem = ref.mem
     offset = memoryrefoffset(ref) - 1
-    buf.ptr += offset
-    buf.size = length(data) + offset
-    buf.offset = offset
+    # The user may pass a vector of length <= maxsize, but where the underlying memory
+    # is larger than maxsize. Don't throw an error in that case.
+    mz = Int(maxsize)::Int
+    if !truncate && mz < length(data)
+        throw(ArgumentError("maxsize must not be smaller than data length"))
+    end
+    buf = _new_generic_iobuffer(Memory{UInt8}, mem, readable, writable, seekable, append, mz)
+    buf.offset_or_compacted = offset
+    buf.ptr = offset + 1
+    if truncate
+        buf.size = offset
+    else
+        buf.size = length(data) + offset
+    end
     return buf
 end
 
+get_offset(io::GenericIOBuffer) = max(0, io.offset_or_compacted)
+get_compacted(io::GenericIOBuffer) = max(0, -io.offset_or_compacted)
+
 # allocate Vector{UInt8}s for IOBuffer storage that can efficiently become Strings
 StringMemory(n::Integer) = unsafe_wrap(Memory{UInt8}, _string_n(n))
 StringVector(n::Integer) = wrap(Array, StringMemory(n))
@@ -111,17 +234,11 @@ function IOBuffer(
         truncate::Union{Bool,Nothing}=nothing,
         maxsize::Integer=typemax(Int),
         sizehint::Union{Integer,Nothing}=nothing)
-    if maxsize < 0
-        throw(ArgumentError("negative maxsize"))
-    end
     if sizehint !== nothing
         sizehint!(data, sizehint)
     end
     flags = open_flags(read=read, write=write, append=append, truncate=truncate)
-    buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, Int(maxsize))
-    if flags.truncate
-        buf.size = buf.offset
-    end
+    buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, maxsize, flags.truncate)
     return buf
 end
 
@@ -131,17 +248,23 @@ function IOBuffer(;
         append::Union{Bool,Nothing}=nothing,
         truncate::Union{Bool,Nothing}=true,
         maxsize::Integer=typemax(Int),
-        sizehint::Union{Integer,Nothing}=nothing)
-    size = sizehint !== nothing ? Int(sizehint) : maxsize != typemax(Int) ? Int(maxsize) : 32
+        sizehint::Union{Integer,Nothing}=nothing,
+    )
+    mz = Int(maxsize)::Int
+    if mz < 0
+        throw(ArgumentError("negative maxsize"))
+    end
+    size = if sizehint !== nothing
+        # Allow negative sizehint, just like `sizehint!` does
+        min(mz, max(0, Int(sizehint)::Int))
+    else
+        min(mz, 32)
+    end
     flags = open_flags(read=read, write=write, append=append, truncate=truncate)
-    buf = IOBuffer(
-        StringMemory(size),
-        read=flags.read,
-        write=flags.write,
-        append=flags.append,
-        truncate=flags.truncate,
-        maxsize=maxsize)
-    fill!(buf.data, 0)
+    # A common usecase of IOBuffer is to incrementally construct strings. By using StringMemory
+    # as the default storage, we can turn the result into a string without copying.
+    buf = _new_generic_iobuffer(Memory{UInt8}, StringMemory(size), flags.read, flags.write, true, flags.append, mz)
+    buf.size = 0
     return buf
 end
 
@@ -158,21 +281,53 @@ If `data` is given, creates a `PipeBuffer` to operate on a data vector,
 optionally specifying a size beyond which the underlying `Array` may not be grown.
 """
 PipeBuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int)) =
-    GenericIOBuffer(data, true, true, false, true, maxsize)
+    GenericIOBuffer(data, true, true, false, true, maxsize, false)
 PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringMemory(maxsize), maxsize = maxsize); x.size = 0; x)
 
+# Internal method where truncation IS supported
+function _truncated_pipebuffer(data::AbstractVector{UInt8}=Memory{UInt8}(); maxsize::Int = typemax(Int))
+    buf = PipeBuffer(data)
+    buf.size = get_offset(buf)
+    buf.maxsize = maxsize
+    buf
+end
+
 _similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len)
 _similar_data(b::IOBuffer, len::Int) = StringMemory(len)
 
-function copy(b::GenericIOBuffer)
-    ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ?
-                    copyto!(_similar_data(b, length(b.data)), b.data) : b.data,
-                    b.readable, b.writable, b.seekable, b.append, b.maxsize)
-    ret.size = b.size
-    ret.ptr  = b.ptr
-    ret.mark = b.mark
-    ret.offset = b.offset
-    return ret
+# Note: Copying may change the value of the position (and mark) for un-seekable streams.
+# However, these values are not stable anyway due to compaction.
+
+function copy(b::GenericIOBuffer{T}) where T
+    if b.reinit
+        # If buffer is used up, allocate a new size-zero buffer
+        # Reinit implies writable, and that ptr, size, offset and mark are already the default values
+        return typeof(b)(_similar_data(b, 0), b.readable, b.writable, b.seekable, b.append, b.maxsize, false)
+    elseif b.writable
+        # Else, we just copy the reachable bytes. If buffer is seekable, all bytes
+        # after offset are reachable, since they can be seeked to
+        used_span = get_used_span(b)
+        compacted = first(used_span) - get_offset(b) - 1
+        len = length(used_span)
+        data = copyto!(_similar_data(b, len), view(b.data, used_span))
+        ret = typeof(b)(data, b.readable, b.writable, b.seekable, b.append, b.maxsize, false)
+        ret.size = len
+        # Copying data over implicitly compacts, and may add compaction
+        ret.offset_or_compacted = -get_compacted(b) - compacted
+        ret.ptr = b.ptr - first(used_span) + 1
+        ret.mark = b.mark
+        return ret
+    else
+        # When the buffer is just readable, they can share the same data, so we just make
+        # a shallow copy of the IOBuffer struct.
+        # Use internal constructor because we want to allow b.maxsize to be larger than data,
+        # in case that is the case for `b`.
+        ret = _new_generic_iobuffer(T, b.data, b.readable, b.writable, b.seekable, b.append, b.maxsize)
+        ret.offset_or_compacted = b.offset_or_compacted
+        ret.ptr = b.ptr
+        ret.mark = b.mark
+        return ret
+    end
 end
 
 show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
@@ -180,9 +335,9 @@ show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
                                       "writable=", b.writable, ", ",
                                       "seekable=", b.seekable, ", ",
                                       "append=",   b.append, ", ",
-                                      "size=",     b.size - b.offset, ", ",
+                                      "size=",     b.size - get_offset(b), ", ",
                                       "maxsize=",  b.maxsize == typemax(Int) ? "Inf" : b.maxsize, ", ",
-                                      "ptr=",      b.ptr - b.offset, ", ",
+                                      "ptr=",      b.ptr - get_offset(b), ", ",
                                       "mark=",     b.mark, ")")
 
 @noinline function _throw_not_readable()
@@ -192,7 +347,7 @@ end
 
 function unsafe_read(from::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     from.readable || _throw_not_readable()
-    avail = bytesavailable(from)
+    avail = bytesavailable(from) % UInt
     adv = min(avail, nb)
     unsafe_read!(p, from.data, from.ptr, adv)
     from.ptr += adv
@@ -221,7 +376,45 @@ function unsafe_read!(dest::Ptr{UInt8}, src::DenseBytes, so::Integer, nbytes::UI
     nothing
 end
 
-function peek(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
+const MultiByteBitNumberType = Union{
+    Type{UInt16},
+    Type{Int16},
+    Type{UInt32},
+    Type{Int32},
+    Type{UInt64},
+    Type{Int64},
+    Type{UInt128},
+    Type{Int128},
+    Type{Float16},
+    Type{Float32},
+    Type{Float64},
+}
+
+function load_from_array(T::MultiByteBitNumberType, data::AbstractArray{UInt8}, from::Int)
+    x = if T <: AbstractFloat
+        uinttype(T)(0)
+    else
+        unsigned(T)(0)
+    end
+    for i in 0:sizeof(x)-1
+        x |= typeof(x)(data[from + i]) << (8 * i)
+    end
+    reinterpret(T, ltoh(x))
+end
+
+function peek(from::GenericIOBuffer, T::MultiByteBitNumberType)
+    from.readable || _throw_not_readable()
+    avail = bytesavailable(from)
+    nb = sizeof(T)
+    if nb > avail
+        throw(EOFError())
+    end
+    return load_from_array(T, from.data, from.ptr)
+end
+
+# This method can use a pointer, since the underlying buffer is dense
+# and memory backed
+function peek(from::GenericIOBuffer{<:MutableDenseArrayType}, T::MultiByteBitNumberType)
     from.readable || _throw_not_readable()
     avail = bytesavailable(from)
     nb = sizeof(T)
@@ -235,29 +428,12 @@ function peek(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int3
     return x
 end
 
-function read(from::GenericIOBuffer, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64},Type{Int128},Type{UInt128},Type{Float16},Type{Float32},Type{Float64}})
+function read(from::GenericIOBuffer, T::MultiByteBitNumberType)
     x = peek(from, T)
     from.ptr += sizeof(T)
     return x
 end
 
-function read_sub(from::GenericIOBuffer, a::AbstractArray{T}, offs, nel) where T
-    require_one_based_indexing(a)
-    from.readable || _throw_not_readable()
-    if offs+nel-1 > length(a) || offs < 1 || nel < 0
-        throw(BoundsError())
-    end
-    if isa(a, MutableDenseArrayType{UInt8})
-        nb = UInt(nel * sizeof(T))
-        GC.@preserve a unsafe_read(from, pointer(a, offs), nb)
-    else
-        for i = offs:offs+nel-1
-            a[i] = read(from, T)
-        end
-    end
-    return a
-end
-
 @inline function read(from::GenericIOBuffer, ::Type{UInt8})
     from.readable || _throw_not_readable()
     ptr = from.ptr
@@ -283,20 +459,35 @@ read(from::GenericIOBuffer, ::Type{Ptr{T}}) where {T} = convert(Ptr{T}, read(fro
 isreadable(io::GenericIOBuffer) = io.readable
 iswritable(io::GenericIOBuffer) = io.writable
 
-filesize(io::GenericIOBuffer) = (io.seekable ? io.size - io.offset : bytesavailable(io))
+# Number of bytes that can be read from the buffer, if you seek to the start first.
+filesize(io::GenericIOBuffer) = (io.seekable ? io.size - get_offset(io) : bytesavailable(io))
+
+# Number of bytes that can be read from the buffer.
 bytesavailable(io::GenericIOBuffer) = io.size - io.ptr + 1
-position(io::GenericIOBuffer) = io.ptr - io.offset - 1
+
+# TODO: Document that position for an unmarked and unseekable stream is invalid (and make it error?)
+function position(io::GenericIOBuffer)
+    # Position is zero-indexed, but ptr is one-indexed, hence the -1
+    io.ptr - io.offset_or_compacted - 1
+end
 
 function skip(io::GenericIOBuffer, n::Integer)
     skip(io, clamp(n, Int))
 end
+
 function skip(io::GenericIOBuffer, n::Int)
+    # In both cases, the result will never go to before the first position,
+    # nor beyond the last position, and will not throw an error unless the stream
+    # is not seekable and try to skip a negative number of bytes.
     if signbit(n)
+        # Skipping a negative number of bytes is equivalent to seeking backwards.
         seekto = clamp(widen(position(io)) + widen(n), Int)
         seek(io, seekto) # Does error checking
     else
-        n_max = io.size + 1 - io.ptr
-        io.ptr += min(n, n_max)
+        # Don't use seek in order to allow a non-seekable IO to still skip bytes.
+        # Handle overflow.
+        maxptr = io.size + 1
+        io.ptr = n > maxptr || io.ptr - n > maxptr ? maxptr : io.ptr + n
         io
     end
 end
@@ -304,16 +495,30 @@ end
 function seek(io::GenericIOBuffer, n::Integer)
     seek(io, clamp(n, Int))
 end
+
+function translate_seek_position(io::GenericIOBuffer, n::Int)
+    # If there is an offset (the field F is positive), then there are F unused bytes at the beginning
+    # of the data, and we need to seek to n + F + 1. (Also compensate for `seek` being zero-
+    # indexed)
+
+    # If bytes has been compacted (field F is negative), then F bytes has been deleted from
+    # the buffer, and a virtual position n means a position n + F in the data.
+    # Remember that F is negative, so n + F is subtracting from n. So we also end up with
+    # n + F + 1.
+    clamp(widen(n) + widen(io.offset_or_compacted) + widen(1), Int)
+end
+
 function seek(io::GenericIOBuffer, n::Int)
     if !io.seekable
         ismarked(io) || throw(ArgumentError("seek failed, IOBuffer is not seekable and is not marked"))
         n == io.mark || throw(ArgumentError("seek failed, IOBuffer is not seekable and n != mark"))
     end
+
     # TODO: REPL.jl relies on the fact that this does not throw (by seeking past the beginning or end
     #       of an GenericIOBuffer), so that would need to be fixed in order to throw an error here
-    #(n < 0 || n > io.size - io.offset) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
-    #io.ptr = n + io.offset + 1
-    io.ptr = clamp(n, 0, io.size - io.offset) + io.offset + 1
+    max_ptr = io.size + 1
+    min_ptr = get_offset(io) + 1
+    io.ptr = clamp(translate_seek_position(io, n), min_ptr, max_ptr)
     return io
 end
 
@@ -322,113 +527,163 @@ function seekend(io::GenericIOBuffer)
     return io
 end
 
-# choose a resize strategy based on whether `resize!` is defined:
-# for a Vector, we use `resize!`, but for most other types,
-# this calls `similar`+copy
-function _resize!(io::GenericIOBuffer, sz::Int)
-    a = io.data
-    offset = io.offset
-    if applicable(resize!, a, sz)
-        if offset != 0
-            size = io.size
-            size > offset && copyto!(a, 1, a, offset + 1, min(sz, size - offset))
-            io.ptr -= offset
-            io.size -= offset
-            io.offset = 0
-        end
-        resize!(a, sz)
+# Resize the io's data to `new_size`, which must not be > io.maxsize.
+# Use `resize!` if the data supports it, else reallocate a new one and
+# copy the old data over.
+# If not `exact` and resizing is not supported, overallocate in order to
+# prevent excessive resizing.
+function _resize!(io::GenericIOBuffer, new_size::Int, exact::Bool)
+    old_data = io.data
+    if applicable(resize!, old_data, new_size)
+        resize!(old_data, new_size)
     else
-        size = io.size
-        if size >= sz && sz != 0
-            b = a
-        else
-            b = _similar_data(io, sz == 0 ? 0 : max(overallocation(size - io.offset), sz))
-        end
-        size > offset && copyto!(b, 1, a, offset + 1, min(sz, size - offset))
-        io.data = b
-        io.ptr -= offset
-        io.size -= offset
-        io.offset = 0
+        new_size = exact ? new_size : min(io.maxsize, overallocation(new_size))
+        used_span = get_used_span(io)
+        deleted = first(used_span) - 1
+        compacted = deleted - get_offset(io)
+        new_data = _similar_data(io, new_size)
+        io.data = new_data
+        iszero(new_size) && return io
+        len_used = length(used_span)
+        iszero(len_used) || copyto!(new_data, 1, old_data, first(used_span), len_used)
+        # Copying will implicitly compact, and so compaction must be updated
+        io.offset_or_compacted = -get_compacted(io) - compacted
+        io.ptr -= deleted
+        io.size = len_used
     end
     return io
 end
 
 function truncate(io::GenericIOBuffer, n::Integer)
     io.writable || throw(ArgumentError("truncate failed, IOBuffer is not writeable"))
+    # Non-seekable buffers can only be constructed with `PipeBuffer`, which is explicitly
+    # documented to not be truncatable.
     io.seekable || throw(ArgumentError("truncate failed, IOBuffer is not seekable"))
     n < 0 && throw(ArgumentError("truncate failed, n bytes must be ≥ 0, got $n"))
     n > io.maxsize && throw(ArgumentError("truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io.maxsize)"))
-    n = Int(n)
+    n = Int(n)::Int
+    offset = get_offset(io)
+    current_size = io.size - offset
     if io.reinit
-        io.data = _similar_data(io, n)
+        # If reinit, we don't need to truncate anything but just reinitializes
+        # the buffer with zeros. Mark, ptr and offset has already been reset.
+        io.data = fill!(_similar_data(io, n), 0x00)
         io.reinit = false
-    elseif n > length(io.data) + io.offset
-        _resize!(io, n)
-    end
-    ismarked(io) && io.mark > n && unmark(io)
-    n += io.offset
-    io.data[io.size+1:n] .= 0
-    io.size = n
-    io.ptr = min(io.ptr, n+1)
+        io.size = n
+    elseif n < current_size
+        # Else, if we need to shrink the iobuffer, we simply change the pointers without
+        # actually shrinking the underlying storage, or copying data.
+
+        # Clear the mark if it points to data that has now been deleted.
+        if translate_seek_position(io, io.mark) > n+offset
+            io.mark = -1
+        end
+        io.size = n + offset
+        io.ptr = min(io.ptr, n + offset + 1)
+    elseif n > current_size
+        if n + offset > io.maxsize
+            compact!(io)
+        end
+        _resize!(io, n + get_offset(io), false)
+        fill!(view(io.data, io.size + 1:min(length(io.data), n + get_offset(io))), 0x00)
+        io.size = min(length(io.data), n + get_offset(io))
+    end
     return io
 end
 
-function compact(io::GenericIOBuffer)
-    io.writable || throw(ArgumentError("compact failed, IOBuffer is not writeable"))
-    io.seekable && throw(ArgumentError("compact failed, IOBuffer is seekable"))
-    io.reinit && return
-    local ptr::Int, bytes_to_move::Int
-    if ismarked(io) && io.mark < position(io)
-        io.mark == 0 && return
-        ptr = io.mark + io.offset
-        bytes_to_move = bytesavailable(io) + (io.ptr - ptr)
-    else
-        ptr = io.ptr
-        bytes_to_move = bytesavailable(io)
+# Ensure that the buffer has room for at least `nshort` more bytes, except when
+# doing that would exceed maxsize.
+@inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort))
+
+@inline function ensureroom(io::GenericIOBuffer, nshort::UInt)
+    # If the IO is not writable, we call the slow path only to error.
+    # If reinit, the data has been handed out to the user, and the IOBuffer
+    # no longer controls it, so we need to allocate a new one.
+    if !io.writable || io.reinit
+        return ensureroom_reallocate(io, nshort)
+    end
+    # The fast path here usually checks there is already room, then does nothing.
+    # When append is true, new data is added after io.size, not io.ptr
+    existing_space = min(lastindex(io.data), io.maxsize + get_offset(io)) - (io.append ? io.size : io.ptr - 1)
+    if existing_space < nshort % Int
+        # Outline this function to make it more likely that ensureroom inlines itself
+        return ensureroom_slowpath(io, nshort, existing_space)
     end
-    copyto!(io.data, 1, io.data, ptr, bytes_to_move)
-    io.size -= ptr - 1
-    io.ptr -= ptr - 1
-    io.offset = 0
-    return
+    return io
 end
 
-@noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt)
+# Throw error (placed in this function to outline it) or reinit the buffer
+@noinline function ensureroom_reallocate(io::GenericIOBuffer, nshort::UInt)
     io.writable || throw(ArgumentError("ensureroom failed, IOBuffer is not writeable"))
-    if io.reinit
-        io.data = _similar_data(io, nshort % Int)
-        io.reinit = false
-    end
-    if !io.seekable
-        if !ismarked(io) && io.ptr > io.offset+1 && io.size <= io.ptr - 1
-            io.ptr = 1
-            io.size = 0
-            io.offset = 0
-        else
-            datastart = (ismarked(io) ? io.mark : io.ptr - io.offset)
-            if (io.size-io.offset+nshort > io.maxsize) ||
-                (datastart > 4096 && datastart > io.size - io.ptr) ||
-                (datastart > 262144)
-                # apply somewhat arbitrary heuristics to decide when to destroy
-                # old, read data to make more room for new data
-                compact(io)
-            end
+    io.data = _similar_data(io, min(io.maxsize, nshort % Int))
+    io.reinit = false
+    io.offset_or_compacted = -get_compacted(io)
+    return io
+end
+
+# Here, we already know there is not enough room at the end of the io's data.
+@noinline function ensureroom_slowpath(io::GenericIOBuffer, nshort::UInt, available_bytes::Int)
+    reclaimable_bytes = first(get_used_span(io)) - 1
+    # Avoid resizing and instead compact the buffer, only if we gain enough bytes from
+    # doing so (at least 32 bytes and 1/8th of the data length). Also, if we would have
+    # to resize anyway, there would be no point in compacting, so also check that.
+    if (
+            reclaimable_bytes ≥ 32 &&
+            reclaimable_bytes ≥ length(io.data) >>> 3 &&
+            (reclaimable_bytes + available_bytes) % UInt ≥ nshort
+        )
+        compact!(io)
+        return io
+    end
+
+    desired_size = length(io.data) + Int(nshort) - available_bytes
+    if desired_size > io.maxsize
+        # If we can't fit all the requested data in the new buffer, we need to
+        # fit as much as possible, so we must compact
+        if !iszero(reclaimable_bytes)
+            desired_size -= compact!(io)
+        end
+        # Max out the buffer size if we want more than the buffer size
+        if length(io.data) < io.maxsize
+            _resize!(io, io.maxsize, true)
         end
+    else
+        # Else, we request only the requested size, but set `exact` to `false`,
+        # in order to overallocate to avoid growing the buffer by too little
+        _resize!(io, desired_size, false)
     end
-    return
+
+    return io
 end
 
-@inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort))
-@inline function ensureroom(io::GenericIOBuffer, nshort::UInt)
-    if !io.writable || (!io.seekable && io.ptr > io.offset+1) || io.reinit
-        ensureroom_slowpath(io, nshort)
-    end
-    n = min((nshort % Int) + (io.append ? io.size : io.ptr-1) - io.offset, io.maxsize)
-    l = length(io.data) + io.offset
-    if n > l
-        _resize!(io, Int(n))
+# Get the indices in data which cannot be deleted
+function get_used_span(io::IOBuffer)
+    # A seekable buffer can recover data before ptr
+    return if io.seekable
+        get_offset(io) + 1 : io.size
+    # If non-seekable, the mark can be used to recover data before ptr,
+    # so data at the mark and after must also be saved
+    elseif io.mark > -1
+        min(io.ptr, translate_seek_position(io, io.mark)) : io.size
+    else
+        io.ptr : io.size
     end
-    return io
+end
+
+# Delete any offset, and also compact data if buffer is not seekable.
+# Return the number of bytes deleted
+function compact!(io::GenericIOBuffer)::Int
+    offset = get_offset(io)
+    used_span = get_used_span(io)
+    deleted = first(used_span) - 1
+    compacted = deleted - offset
+    iszero(deleted) && return 0
+    data = io.data
+    copyto!(data, 1, data, deleted + 1, length(used_span))
+    io.offset_or_compacted = -get_compacted(io) - compacted
+    io.ptr -= deleted
+    io.size -= deleted
+    return deleted
 end
 
 eof(io::GenericIOBuffer) = (io.ptr - 1 >= io.size)
@@ -439,17 +694,17 @@ function closewrite(io::GenericIOBuffer)
 end
 
 @noinline function close(io::GenericIOBuffer{T}) where T
+    if io.writable && !io.reinit
+        _resize!(io, 0, true)
+    end
     io.readable = false
     io.writable = false
     io.seekable = false
     io.size = 0
-    io.offset = 0
     io.maxsize = 0
     io.ptr = 1
     io.mark = -1
-    if io.writable && !io.reinit
-        io.data = _resize!(io, 0)
-    end
+    io.offset_or_compacted = -get_compacted(io)
     nothing
 end
 
@@ -472,31 +727,42 @@ julia> String(take!(io))
 ```
 """
 function take!(io::GenericIOBuffer)
-    ismarked(io) && unmark(io)
+    io.mark = -1
     if io.seekable
-        nbytes = io.size - io.offset
-        data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
+        # If the buffer is seekable, then the previously consumed bytes from ptr+1:size
+        # must still be output, as they are not truly gone.
+        # Hence, we output all bytes from 1:io.size
+        offset = get_offset(io)
+        nbytes = io.size - offset
+        data = copyto!(StringVector(nbytes), 1, io.data, offset + 1, nbytes)
     else
+        # Else, if not seekable, bytes from 1:ptr-1 are truly gone and should not
+        # be output. Hence, we output `bytesavailable`, which is ptr:size
         nbytes = bytesavailable(io)
         data = read!(io, StringVector(nbytes))
     end
     if io.writable
+        io.reinit = true
         io.ptr = 1
         io.size = 0
-        io.offset = 0
+        io.offset_or_compacted = 0
     end
     return data
 end
+
+# This method is specialized because we know the underlying data is a Memory, so we can
+# e.g. wrap directly in an array without copying. Otherwise the logic is the same as
+# the generic method
 function take!(io::IOBuffer)
-    ismarked(io) && unmark(io)
+    io.mark = -1
     if io.seekable
         nbytes = filesize(io)
         if nbytes == 0 || io.reinit
             data = StringVector(0)
         elseif io.writable
-            data = wrap(Array, memoryref(io.data, io.offset + 1), nbytes)
+            data = wrap(Array, memoryref(io.data, get_offset(io) + 1), nbytes)
         else
-            data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
+            data = copyto!(StringVector(nbytes), 1, io.data, get_offset(io) + 1, nbytes)
         end
     else
         nbytes = bytesavailable(io)
@@ -512,7 +778,7 @@ function take!(io::IOBuffer)
         io.reinit = true
         io.ptr = 1
         io.size = 0
-        io.offset = 0
+        io.offset_or_compacted = 0
     end
     return data
 end
@@ -529,46 +795,79 @@ state.  This should only be used internally for performance-critical
 It might save an allocation compared to `take!` (if the compiler elides the
 Array allocation), as well as omits some checks.
 """
-_unsafe_take!(io::IOBuffer) =
-    wrap(Array, io.size == io.offset ?
-        memoryref(Memory{UInt8}()) :
-        memoryref(io.data, io.offset + 1),
-        io.size - io.offset)
+function _unsafe_take!(io::IOBuffer)
+    offset = get_offset(io)
+    mem = if io.size == offset
+        memoryref(Memory{UInt8}())
+    else
+        memoryref(io.data, offset + 1)
+    end
+    wrap(Array, mem, io.size - offset)
+end
 
 function write(to::IO, from::GenericIOBuffer)
-    written::Int = bytesavailable(from)
+    # This would cause an infinite loop, as it should read until the end, but more
+    # data is being written into it continuously.
     if to === from
-        from.ptr = from.size + 1
+        throw(ArgumentError("Writing all content fron an IOBuffer into itself in invalid"))
     else
-        written = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(written))
-        from.ptr += written
+        available = bytesavailable(from)
+        written = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(available))
+        from.ptr = from.size + 1
     end
     return written
 end
 
 function unsafe_write(to::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     ensureroom(to, nb)
-    ptr = (to.append ? to.size+1 : to.ptr)
-    written = Int(min(nb, Int(length(to.data))::Int - ptr + 1))
-    towrite = written
-    d = to.data
-    while towrite > 0
-        @inbounds d[ptr] = unsafe_load(p)
-        ptr += 1
+    size = to.size
+    append = to.append
+    ptr = append ? size+1 : to.ptr
+    data = to.data
+    to_write = min(nb, (min(Int(length(data))::Int, to.maxsize + get_offset(to)) - ptr + 1) % UInt) % Int
+    # Dispatch based on the type of data, to possibly allow using memcpy
+    _unsafe_write(data, p, ptr, to_write % UInt)
+    # Update to.size only if the ptr has advanced to higher than
+    # the previous size. Otherwise, we just overwrote existing data
+    to.size = max(size, ptr + to_write - 1)
+    # If to.append, we only update size, not ptr.
+    if !append
+        to.ptr = ptr + to_write
+    end
+    return to_write
+end
+
+@inline function _unsafe_write(data::AbstractVector{UInt8}, p::Ptr{UInt8}, from::Int, nb::UInt)
+    for i in 0:nb-1
+        data[from + i] = unsafe_load(p)
         p += 1
-        towrite -= 1
     end
-    to.size = max(to.size, ptr - 1)
-    if !to.append
-        to.ptr += written
+end
+
+@inline function _unsafe_write(data::MutableDenseArrayType{UInt8}, p::Ptr{UInt8}, from::Int, nb::UInt)
+    # Calling `unsafe_copyto!` is very efficient for large arrays, but has some overhead
+    # for small (< 5 bytes) arrays.
+    # Since a common use case of IOBuffer is to construct strings incrementally, often
+    # one char at a time, it's crucial to be fast in the case of small arrays.
+    # This optimization only gives a minor 10% speed boost in the best case.
+    if nb < 5
+        @inbounds for i in UInt(1):nb
+            data[from + (i % Int) - 1] = unsafe_load(p, i)
+        end
+    else
+        GC.@preserve data begin
+            ptr = Ptr{UInt8}(pointer(data, from))::Ptr{UInt8}
+            @inline unsafe_copyto!(ptr, p, nb)
+        end
     end
-    return written
 end
 
 @inline function write(to::GenericIOBuffer, a::UInt8)
     ensureroom(to, UInt(1))
     ptr = (to.append ? to.size+1 : to.ptr)
-    if ptr > to.maxsize
+    # We have just ensured there is room for 1 byte, EXCEPT if we were to exceed
+    # maxsize. So, we just need to check that here.
+    if ptr > to.maxsize + get_offset(to)
         return 0
     else
         to.data[ptr] = a
@@ -581,31 +880,26 @@ end
 end
 
 readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb=length(b)) = readbytes!(io, b, Int(nb))
+
 function readbytes!(io::GenericIOBuffer, b::MutableDenseArrayType{UInt8}, nb::Int)
-    nr = min(nb, bytesavailable(io))
-    if length(b) < nr
-        resize!(b, nr)
+    io.readable || _throw_not_readable()
+    to_read = min(nb, bytesavailable(io))
+    if length(b) < to_read
+        resize!(b, to_read)
     end
-    read_sub(io, b, 1, nr)
-    return nr
+    checkbounds(b, 1:to_read)
+    GC.@preserve b unsafe_read(io, pointer(b), to_read)
+    to_read
 end
 read(io::GenericIOBuffer) = read!(io, StringVector(bytesavailable(io)))
+
+# For IO buffers, all the data is immediately available.
 readavailable(io::GenericIOBuffer) = read(io)
-read(io::GenericIOBuffer, nb::Integer) = read!(io, StringVector(min(nb, bytesavailable(io))))
 
-function occursin(delim::UInt8, buf::IOBuffer)
-    p = pointer(buf.data, buf.ptr)
-    q = GC.@preserve buf ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, delim, bytesavailable(buf))
-    return q != C_NULL
-end
+read(io::GenericIOBuffer, nb::Integer) = read!(io, StringVector(min(nb, bytesavailable(io))))
 
 function occursin(delim::UInt8, buf::GenericIOBuffer)
-    data = buf.data
-    for i = buf.ptr:buf.size
-        @inbounds b = data[i]
-        b == delim && return true
-    end
-    return false
+    return in(delim, view(buf.data, buf.ptr:buf.size))
 end
 
 function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
@@ -622,21 +916,45 @@ function copyuntil(out::IO, io::GenericIOBuffer, delim::UInt8; keep::Bool=false)
 end
 
 function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false)
-    copyuntil(out, s, 0x0a, keep=true)
-    line = out.data
-    i = out.size # XXX: this is only correct for appended data. if the data was inserted, only ptr should change
-    if keep || i == out.offset || line[i] != 0x0a
+    # If the data is copied into the middle of the buffer of `out` instead of appended to the end,
+    # and !keep, and the line copied ends with \r\n, then the copyuntil (even if keep=false)
+    # will overwrite one too many bytes with the new \r byte.
+    # Work around this by making a new temporary buffer.
+    # Could perhaps be done better
+    if !out.append && out.ptr < out.size + 1
+        newbuf = IOBuffer()
+        copyuntil(newbuf, s, 0x0a, keep=true)
+        v = take!(newbuf)
+        # Remove \r\n or \n if present
+        if !keep
+            if length(v) > 1 && last(v) == UInt8('\n')
+                pop!(v)
+            end
+            if length(v) > 1 && last(v) == UInt8('\r')
+                pop!(v)
+            end
+        end
+        write(out, v)
         return out
-    elseif i < 2 || line[i-1] != 0x0d
-        i -= 1
     else
-        i -= 2
-    end
-    out.size = i
-    if !out.append
-        out.ptr = i+1
+        # Else, we can just copy the data directly into the buffer, and then
+        # subtract the last one or two bytes depending on `keep`.
+        copyuntil(out, s, 0x0a, keep=true)
+        line = out.data
+        i = out.size
+        if keep || i == out.offset_or_compacted || line[i] != 0x0a
+            return out
+        elseif i < 2 || line[i-1] != 0x0d
+            i -= 1
+        else
+            i -= 2
+        end
+        out.size = i
+        if !out.append
+            out.ptr = i+1
+        end
+        return out
     end
-    return out
 end
 
 function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false)
@@ -644,6 +962,7 @@ function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false)
     # note: findfirst + copyto! is much faster than a single loop
     #       except for nout ≲ 20.  A single loop is 2x faster for nout=5.
     nout = nread = something(findfirst(==(0x0a), data), length(data))::Int
+    # Remove the 0x0a (newline) if not keep, and also remove the 0x0d (\r) if it is there
     if !keep && nout > 0 && data[nout] == 0x0a
         nout -= 1
         nout > 0 && data[nout] == 0x0d && (nout -= 1)
@@ -652,6 +971,7 @@ function _copyline(out::IO, io::GenericIOBuffer; keep::Bool=false)
     io.ptr += nread
     return out
 end
+
 copyline(out::IO, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
 copyline(out::GenericIOBuffer, io::GenericIOBuffer; keep::Bool=false) = _copyline(out, io; keep)
 
diff --git a/base/stream.jl b/base/stream.jl
index 33d884018d5ad..5732a62c2153b 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -615,9 +615,9 @@ end
 ## BUFFER ##
 ## Allocate space in buffer (for immediate use)
 function alloc_request(buffer::IOBuffer, recommended_size::UInt)
-    ensureroom(buffer, Int(recommended_size))
+    ensureroom(buffer, recommended_size)
     ptr = buffer.append ? buffer.size + 1 : buffer.ptr
-    nb = min(length(buffer.data)-buffer.offset, buffer.maxsize) + buffer.offset - ptr + 1
+    nb = min(length(buffer.data), buffer.maxsize + get_offset(buffer)) - ptr + 1
     return (Ptr{Cvoid}(pointer(buffer.data, ptr)), nb)
 end
 
@@ -942,8 +942,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
         nread = readbytes!(sbuf, a, nb)
     else
         initsize = length(a)
-        newbuf = PipeBuffer(a, maxsize=nb)
-        newbuf.size = newbuf.offset # reset the write pointer to the beginning
+        newbuf = _truncated_pipebuffer(a; maxsize=nb)
         nread = try
             s.buffer = newbuf
             write(newbuf, sbuf)
@@ -990,8 +989,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
     if bytesavailable(sbuf) >= nb
         unsafe_read(sbuf, p, nb)
     else
-        newbuf = PipeBuffer(unsafe_wrap(Array, p, nb), maxsize=Int(nb))
-        newbuf.size = newbuf.offset # reset the write pointer to the beginning
+        newbuf = _truncated_pipebuffer(unsafe_wrap(Array, p, nb); maxsize=Int(nb))
         try
             s.buffer = newbuf
             write(newbuf, sbuf)
@@ -1599,8 +1597,7 @@ function readbytes!(s::BufferStream, a::Vector{UInt8}, nb::Int)
             nread = readbytes!(sbuf, a, nb)
         else
             initsize = length(a)
-            newbuf = PipeBuffer(a, maxsize=nb)
-            newbuf.size = newbuf.offset # reset the write pointer to the beginning
+            newbuf = _truncated_pipebuffer(a; maxsize=nb)
             nread = try
                 s.buffer = newbuf
                 write(newbuf, sbuf)
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index a9d58f4b7871e..7ed5c1f5b3ed6 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -6,6 +6,267 @@ ioslength(io::IOBuffer) = (io.seekable ? io.size : bytesavailable(io))
 
 bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
 
+# Julia Base's internals uses the PipeBuffer, which is an unseekable IOBuffer.
+# There are no public constructors to build such a buffer, but we need to test
+# it anyway.
+# I make a new method here such that if the implementation of Base.PipeBuffer
+# changes, these tests will still work.
+new_unseekable_buffer() = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, typemax(Int), false)
+
+@testset "Basic tests" begin
+    @test_throws ArgumentError IOBuffer(;maxsize=-1)
+    @test_throws ArgumentError IOBuffer([0x01]; maxsize=-1)
+
+    # Test that sizehint actually will sizehint the vector,
+    v = UInt8[]
+    buf = IOBuffer(v; sizehint=64, write=true)
+    @test length(v.ref.mem) >= 64
+
+    # Test that you can't make an IOBuffer with a maxsize
+    # smaller than the size you actually give it
+    @test_throws ArgumentError IOBuffer([0x01, 0x02]; maxsize=1)
+    @test_throws ArgumentError IOBuffer(b"abcdefghij"; maxsize=8)
+end
+
+@testset "Basic reading" begin
+    # Readavailable is equal to read
+    buf = IOBuffer("abcdef")
+    @test read(buf, UInt8) == UInt8('a')
+    @test bytesavailable(buf) == 5
+    @test readavailable(buf) == b"bcdef"
+
+    # Reading less than all the bytes
+    buf = IOBuffer(b"ABCDEFGHIJ")
+    @test read(buf, 1) == b"A"
+    @test read(buf, 3) == b"BCD"
+
+    # Reading more bytes than available will not error
+    @test read(buf, 100) == b"EFGHIJ"
+
+    # Passing truncate=false will still truncate an IOBuffer with no
+    # initialized data
+    @test isempty(read(IOBuffer(;sizehint=34, truncate=false)))
+end
+
+@testset "Byte occursin GenericIOBuffer" begin
+    buf = IOBuffer(@view(collect(0x1f:0x3d)[1:end]))
+    @test occursin(0x1f, buf)
+    @test occursin(0x3d, buf)
+    @test occursin(0x2a, buf)
+
+    @test !occursin(0xff, buf)
+    @test !occursin(0x00, buf)
+
+    v = Vector{UInt8}("bcdefg")
+    pushfirst!(v, UInt8('a'))
+    buf = IOBuffer(v)
+    @test occursin(UInt8('a'), buf)
+    read(buf, UInt8)
+    @test !occursin(UInt8('a'), buf)
+    @test !occursin(0x00, buf)
+
+    buf = IOBuffer("abcdefg")
+    @test occursin(UInt8('a'), buf)
+end
+
+@testset "Non-Memory backed IOBuffer" begin
+    buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true)
+    @test read(buf) == 0x02:0x0d
+
+    buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true)
+    @test read(buf, UInt8) == 0x02
+    @test read(buf) == 0x03:0x0d
+
+    v = view(collect(UInt8('a'):UInt8('z')), 4:10)
+    buf = IOBuffer(v, read=true, write=true)
+    @test read(buf, UInt8) == UInt8('d')
+    @test read(buf) == UInt8('e'):UInt8('j')
+    seekstart(buf)
+    @test read(buf, UInt8) == UInt8('d')
+    write(buf, UInt8('x'))
+    write(buf, "ABC")
+    seekstart(buf)
+    @test read(buf) == b"dxABCij"
+end
+
+@testset "Copying" begin
+    # Test offset is preserved when copying
+    v = UInt8[]
+    pushfirst!(v, UInt8('a'), UInt8('b'), UInt8('c'))
+    buf = IOBuffer(v; write=true, read=true, append=true)
+    write(buf, "def")
+    read(buf, UInt16)
+    buf2 = copy(buf)
+    @test String(read(buf)) == "cdef"
+    @test String(read(buf2)) == "cdef"
+
+    # Test copying with non-Memory backed GenericIOBuffer
+    buf = IOBuffer(Test.GenericArray(collect(0x02:0x0d)), read=true)
+    @test read(buf, UInt16) == 0x0302
+    buf2 = copy(buf)
+    @test isreadable(buf2)
+    @test !iswritable(buf2)
+    @test read(buf2) == 0x04:0x0d
+
+    # Test copying a non-seekable stream
+    buf = new_unseekable_buffer()
+    write(buf, "abcdef")
+    read(buf, UInt16)
+    mark(buf)
+    read(buf, UInt16)
+    buf2 = copy(buf)
+    @test read(buf2) == b"ef"
+    reset(buf2)
+    @test read(buf2) == b"cdef"
+
+    # Test copying seekable stream
+    buf = IOBuffer()
+    write(buf, "abcdef")
+    seekstart(buf)
+    read(buf)
+    mark(buf)
+    buf2 = copy(buf)
+    @test reset(buf2) == 6
+    seekstart(buf2)
+    @test read(buf2) == b"abcdef"
+
+    # Test copying a taken buffer
+    buf = IOBuffer()
+    write(buf, "abcdef")
+    take!(buf)
+    buf2 = copy(buf)
+    @test eof(buf2)
+    seekstart(buf2)
+    @test eof(buf2)
+end
+
+@testset "copyuntil" begin
+    a = IOBuffer(b"abcdeajdgabdfg")
+    b = IOBuffer(collect(b"xx"); write=true, read=true, append=true)
+    copyuntil(b, a, UInt8('a'))
+    @test read(b) == b"xx"
+    seekstart(b)
+    copyuntil(b, a, UInt8('a'); keep=true)
+    @test read(b) == b"xxbcdea"
+    seekstart(b)
+    copyuntil(b, a, UInt('w'))
+    @test read(b) == b"xxbcdeajdgabdfg"
+end
+
+@testset "copyline" begin
+    a = IOBuffer(b"abcde\nabc\r\nabc\n\r\nac")
+    b = IOBuffer()
+    copyline(b, a)
+    @test take!(copy(b)) == b"abcde"
+    copyline(b, a)
+    @test take!(copy(b)) == b"abcdeabc"
+    copyline(b, a; keep=true)
+    @test take!(copy(b)) == b"abcdeabcabc\n"
+    copyline(b, a; keep=false)
+    @test take!(copy(b)) == b"abcdeabcabc\n"
+    copyline(b, a; keep=false)
+    @test take!(copy(b)) == b"abcdeabcabc\nac"
+
+    # Test a current bug in copyline
+    a = Base.SecretBuffer("abcde\r\n")
+    b = IOBuffer()
+    write(b, "xxxxxxxxxx")
+    seek(b, 2)
+    copyline(b, a; keep=false)
+    Base.shred!(a)
+    @test take!(b) == b"xxabcdexxx"
+end
+
+@testset "take!" begin
+    a = IOBuffer("abc")
+    @test take!(a) == b"abc"
+
+    v = UInt8[]
+    pushfirst!(v, 0x0a)
+    buf = IOBuffer(v; write=true, append=true)
+    write(buf, "def")
+    @test take!(buf) == b"\ndef"
+
+    v = view(collect(b"abcdefghij"), 3:9)
+    buf = IOBuffer(v; write=true, read=true)
+    read(buf, UInt8)
+    write(buf, "xxy")
+    @test take!(buf) == b"cxxyghi"
+
+    v = view(collect(b"abcdefghij"), 3:9)
+    buf = IOBuffer(v; write=true, read=true)
+
+    # Take on unseekable buffer does not return used bytes.
+    buf = new_unseekable_buffer()
+    write(buf, 0x61)
+    write(buf, "bcd")
+    @test read(buf, UInt8) == 0x61
+    @test take!(buf) == b"bcd"
+
+    # Compaction is reset after take!
+    buf = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, 100, false)
+    write(buf, rand(UInt8, 50))
+    read(buf, 40)
+    write(buf, rand(UInt8, 100))
+    mark(buf)
+    read(buf, 70)
+    @test position(buf) == 110
+    @test length(buf.data) <= 100
+    v = take!(buf)
+    write(buf, 0xf1)
+    @test position(buf) == 0
+    @test !ismarked(buf)
+end
+
+@testset "maxsize is preserved" begin
+    # After take!
+    buf = IOBuffer(; maxsize=3)
+    print(buf, "abcdef")
+    @test take!(buf) == b"abc"
+    print(buf, "abcdef")
+    @test take!(buf) == b"abc"
+
+    # After resizing
+    buf = IOBuffer(;maxsize=128)
+    write(buf, collect(0x00:0x10))
+    write(buf, collect(0x11:0x30))
+    write(buf, collect(0x31:0x98))
+    write(buf, collect(0x99:0xff))
+    seekstart(buf)
+    @test read(buf) == 0x00:UInt8(127)
+
+    # Edge case: When passing a Vector, does not error if the
+    # underlying mem is larger than maxsize
+    v = pushfirst!([0x01], 0x02)
+    io = IOBuffer(v; maxsize=2)
+    @test read(io) == b"\x02\x01"
+
+    # Buffer will not write past maxsize, even if given a larger buffer
+    # And also even if the data is taken and replaced
+    v = sizehint!(UInt8[], 128)
+    io = IOBuffer(v; write=true, read=true, maxsize=12)
+    write(io, 0x01:0x0f)
+    seekstart(io)
+    @test read(io) == 0x01:0x0c
+    @test write(io, 0x01) == 0
+    @test write(io, "abc") == 0
+    @test take!(io).ref.mem === v.ref.mem
+    write(io, 0x01:0x0f)
+    @test take!(io) == 0x01:0x0c
+end
+
+@testset "Write to self" begin
+    buffer = IOBuffer()
+    @test_throws ArgumentError write(buffer, buffer)
+
+    # Write to another IOBuffer with limited size
+    to = IOBuffer(;maxsize=4)
+    from = IOBuffer(collect(b"abcdefghi"))
+    write(to, from)
+    @test String(take!(to)) == "abcd"
+    @test eof(from)
+end
+
 @testset "Read/write empty IOBuffer" begin
     io = IOBuffer()
     @test eof(io)
@@ -33,7 +294,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     @test position(io) == 0
     truncate(io, 10)
     @test position(io) == 0
-    @test all(io.data .== 0)
+    @test all(view(io.data, 1:10) .== 0)
     @test write(io, Int16[1, 2, 3, 4, 5, 6]) === 12
     seek(io, 2)
     truncate(io, 10)
@@ -67,22 +328,89 @@ end
     @test_throws ArgumentError write(io,UInt8[0])
     @test String(take!(io)) == "hamster\nguinea pig\nturtle"
     @test String(take!(io)) == "hamster\nguinea pig\nturtle" #should be unchanged
-    @test_throws ArgumentError Base.compact(io) # not writeable
     close(io)
 end
 
+@testset "Truncate" begin
+    # Fails for non-writable and non-seekable
+    @test_throws ArgumentError truncate(PipeBuffer(), 0)
+    @test_throws ArgumentError truncate(IOBuffer(b"abcde"), 3)
+
+    # Standard use
+    buf = IOBuffer(collect(b"abcdef"); write=true, read=true)
+    truncate(buf, 4)
+    @test read(buf) == b"abcd"
+    @test take!(buf) == b"abcd"
+
+    # Mark is removed if beyond the size
+    buf = IOBuffer()
+    write(buf, "abcde")
+    seek(buf, 4)
+    mark(buf)
+    truncate(buf, 4)
+    @test !ismarked(buf)
+
+    # Making it larger
+    buf = IOBuffer(collect(b"abcdef"); write=true, read=true)
+    seek(buf, 3)
+    truncate(buf, 3)
+    write(buf, 'X')
+    mark(buf)
+    truncate(buf, 5)
+    @test ismarked(buf)
+    @test reset(buf) == 4
+    @test take!(buf) == b"abcX\0"
+
+    # With offset
+    v = pushfirst!(UInt8[0x62, 0x63, 0x64], 0x61)
+    buf = IOBuffer(v; write=true, read=true)
+    seekstart(buf)
+    read(buf, UInt8)
+    mark(buf)
+    truncate(buf, 7)
+    @test reset(buf) == 1
+    @test take!(buf) == b"abcd\0\0\0"
+end
+
+@testset "Position of compactable buffer" begin
+    # Set maxsize, because otherwise compaction it too hard to reason about,
+    # and this test will be brittle
+    io = Base.GenericIOBuffer(Memory{UInt8}(), true, true, false, true, 100, false)
+    write(io, "abcd")
+    read(io, UInt16)
+    @test position(io) == 2
+    write(io, "abcde"^80)
+    @test position(io) == 2
+    read(io, 60)
+    @test position(io) == 62
+    mark(io)
+    # Trigger compaction
+    write(io, rand(UInt8, 50))
+    @test position(io) == 62
+    v1 = read(io, 20)
+    @test position(io) == 82
+    @test reset(io) == 62
+    @test position(io) == 62
+    v2 = read(io, 20)
+    @test v1 == v2
+end
+
 @testset "PipeBuffer" begin
-    io = PipeBuffer()
+    io = new_unseekable_buffer()
     @test_throws EOFError read(io,UInt8)
     @test write(io,"pancakes\nwaffles\nblueberries\n") > 0
+
+    # PipeBuffer is append, so writing to it does not advance the position
     @test position(io) == 0
     @test readline(io) == "pancakes"
-    Base.compact(io)
     @test readline(io) == "waffles"
     @test write(io,"whipped cream\n") > 0
     @test readline(io) == "blueberries"
+
+    # Pipebuffers do not support seeking, and therefore do not support truncation.
     @test_throws ArgumentError seek(io,0)
     @test_throws ArgumentError truncate(io,0)
+
     @test readline(io) == "whipped cream"
     @test write(io,"pancakes\nwaffles\nblueberries\n") > 0
     @test readlines(io) == String["pancakes", "waffles", "blueberries"]
@@ -116,58 +444,6 @@ end
         end
         rm(fname)
     end
-
-    Base.compact(io)
-    @test position(io) == 0
-    @test ioslength(io) == 0
-    Base._resize!(io,0)
-    Base.ensureroom(io,50)
-    @test position(io) == 0
-    @test ioslength(io) == 0
-    @test length(io.data) == 50
-    Base.ensureroom(io,10)
-    @test ioslength(io) == 0
-    @test length(io.data) == 50
-    io.maxsize = 75
-    Base.ensureroom(io,100)
-    @test ioslength(io) == 0
-    @test length(io.data) == 75
-    seekend(io)
-    @test ioslength(io) == 0
-    @test position(io) == 0
-    write(io,zeros(UInt8,200))
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    write(io,1)
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    write(io,[1,2,3])
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    skip(io,1)
-    @test write(io,UInt8(104)) === 1
-    skip(io,3)
-    @test write(io,b"apples") === 3
-    skip(io,71)
-    @test write(io,'y') === 1
-    @test read(io, String) == "happy"
-    @test eof(io)
-    write(io,zeros(UInt8,73))
-    write(io,'a')
-    write(io,'b')
-    write(io,'c')
-    write(io,'d')
-    write(io,'e')
-    @test ioslength(io) == 75
-    @test length(io.data) == 75
-    @test position(io) == 0
-    skip(io,72)
-    @test String(take!(io)) == "\0ab"
-    @test String(take!(io)) == ""
-
-    # issues 4021
-    print(io, true)
-    close(io)
 end
 
 @testset "issue 5453" begin
@@ -248,9 +524,6 @@ end
     truncate(io2, io2.size - 2)
     @test read(io2, String) == "goodnightmoonhelloworld"
     seek(io2, 0)
-    write(io2, io2)
-    @test read(io2, String) == ""
-    @test bufcontents(io2) == "goodnightmoonhelloworld"
 end
 
 # issue #11917
@@ -347,24 +620,42 @@ end
     @test n == 5
 end
 
-@testset "Base.compact" begin
-    a = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
-    mark(a) # mark at position 0
-    write(a, "Hello!")
-    @test Base.compact(a) === nothing # because pointer > mark
-    close(a)
-    b = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
-    write(b, "Hello!")
-    read(b)
-    mark(b) # mark at position 6
-    write(b, "Goodbye!") # now pointer is > mark but mark is > 0
-    Base.compact(b)
-    @test readline(b) == "Goodbye!"
-    close(b)
+@testset "Compacting" begin
+    # Compacting works
+    buf = Base.GenericIOBuffer(UInt8[], true, true, false, true, 20, false)
+    mark(buf)
+    write(buf, "Hello"^5)
+    reset(buf)
+    unmark(buf)
+    read(buf, UInt8)
+    read(buf, UInt8)
+    write(buf, "a!")
+    @test length(buf.data) == 20
+    @test String(take!(buf)) == "llo" * "Hello"^3 * "a!"
+
+    # Compacting does not do anything when mark == 0
+    buf = Base.GenericIOBuffer(UInt8[], true, true, false, true, 5, false)
+    mark(buf)
+    write(buf, "Hello")
+    reset(buf)
+    mark(buf)
+    read(buf, UInt8)
+    read(buf, UInt8)
+    @test write(buf, "a!") == 0
+    @test take!(buf) == b"llo"
+
+    # Compacting without maxsize still works
+    buf = new_unseekable_buffer()
+    data = repeat(b"abcdefg", 100)
+    write(buf, data)
+    read(buf, 600)
+    data_len = length(buf.data)
+    write(buf, view(data, 1:500))
+    @test length(buf.data) == data_len
 end
 
 @testset "peek(::GenericIOBuffer)" begin
-    io = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
+    io = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int), false)
     write(io, "こんにちは")
     @test peek(io) == 0xe3
     @test peek(io, Char) == 'こ'
@@ -381,13 +672,22 @@ end
     v = @view a[1:2]
     io = IOBuffer()
     write(io,1)
+    write(io,0)
     seek(io,0)
-    @test Base.read_sub(io,v,1,1) == [1,0]
+    @test read!(io, v) == [1, 0]
 end
 
 @testset "with offset" begin
     b = pushfirst!([0x02], 0x01)
     @test take!(IOBuffer(b)) == [0x01, 0x02]
+
+    # Read-only buffer does not take control of underlying buffer
+    v = pushfirst!([0x62, 0x63], 0x61)
+    buf = IOBuffer(v; write=false)
+    @test read(buf) == b"abc"
+    @test v == b"abc" # v is unchanged
+
+    # Truncate
 end
 
 @testset "#54636 reading from non-dense vectors" begin

From efc1ba908b5bfd914e61aaa1740b5ae6aa08bac0 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <84105208+topolarity@users.noreply.github.com>
Date: Sat, 29 Mar 2025 12:36:42 -0400
Subject: [PATCH 09/20] Move `eachregion(::AnnotatedString)` implementation to
 `Base` (#57912)

Excising part of https://github.com/JuliaLang/julia/pull/56194/ on the
way to reviving that PR.

(cherry picked from commit a5e0eabfdbecc40ff9c3d4ba86a4bc76036f352e)
---
 base/strings/annotated.jl    | 258 +++++++++++------------------------
 base/strings/annotated_io.jl | 201 +++++++++++++++++++++++++++
 base/strings/strings.jl      |   1 +
 test/strings/annotated.jl    |  48 +++++++
 4 files changed, 333 insertions(+), 175 deletions(-)
 create mode 100644 base/strings/annotated_io.jl

diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
index 0dcac0bf2de3b..1fbbdc1dc44e9 100644
--- a/base/strings/annotated.jl
+++ b/base/strings/annotated.jl
@@ -460,201 +460,109 @@ function annotated_chartransform(f::Function, str::AnnotatedString, state=nothin
     AnnotatedString(String(take!(outstr)), annots)
 end
 
-## AnnotatedIOBuffer
-
-struct AnnotatedIOBuffer <: AbstractPipe
-    io::IOBuffer
-    annotations::Vector{RegionAnnotation}
-end
-
-AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{RegionAnnotation}())
-AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())
-
-function show(io::IO, aio::AnnotatedIOBuffer)
-    show(io, AnnotatedIOBuffer)
-    size = filesize(aio.io)
-    print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ",
-          length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
+struct RegionIterator{S <: AbstractString}
+    str::S
+    regions::Vector{UnitRange{Int}}
+    annotations::Vector{Vector{Annotation}}
 end
 
-pipe_reader(io::AnnotatedIOBuffer) = io.io
-pipe_writer(io::AnnotatedIOBuffer) = io.io
-
-# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
-position(io::AnnotatedIOBuffer) = position(io.io)
-seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
-seekend(io::AnnotatedIOBuffer) = (seekend(io.io); io)
-skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
-copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))
-
-annotations(io::AnnotatedIOBuffer) = io.annotations
-
-annotate!(io::AnnotatedIOBuffer, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
-    (_annotate!(io.annotations, range, label, val); io)
-
-function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
-    astr = AnnotatedString(astr)
-    offset = position(io.io)
-    eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
-    _insert_annotations!(io, astr.annotations)
-    write(io.io, String(astr))
-end
+Base.length(si::RegionIterator) = length(si.regions)
 
-write(io::AnnotatedIOBuffer, c::AnnotatedChar) =
-    write(io, AnnotatedString(string(c), [(region=1:ncodeunits(c), a...) for a in c.annotations]))
-write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
-write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
-write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
-
-function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
-    destpos = position(dest)
-    isappending = eof(dest)
-    srcpos = position(src)
-    nb = write(dest.io, src.io)
-    isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
-    srcannots = [setindex(annot, max(1 + srcpos, first(annot.region)):last(annot.region), :region)
-                 for annot in src.annotations if first(annot.region) >= srcpos]
-    _insert_annotations!(dest, srcannots, destpos - srcpos)
-    nb
+Base.@propagate_inbounds function Base.iterate(si::RegionIterator, i::Integer=1)
+    if i <= length(si.regions)
+        @inbounds ((SubString(si.str, si.regions[i]), si.annotations[i]), i+1)
+    end
 end
 
-# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers)
-# work as expected.
-function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}})
-    if pipe_writer(io) isa AnnotatedIOBuffer
-        write(pipe_writer(io), s)
-    else
-        invoke(write, Tuple{IO, typeof(s)}, io, s)
-    end::Int
-end
-# Can't be part of the `Union` above because it introduces method ambiguities
-function write(io::AbstractPipe, c::AnnotatedChar)
-    if pipe_writer(io) isa AnnotatedIOBuffer
-        write(pipe_writer(io), c)
-    else
-        invoke(write, Tuple{IO, typeof(c)}, io, c)
-    end::Int
-end
+Base.eltype(::RegionIterator{S}) where { S <: AbstractString} =
+    Tuple{SubString{S}, Vector{Annotation}}
 
 """
-    _clear_annotations_in_region!(annotations::Vector{$RegionAnnotation}, span::UnitRange{Int})
+    eachregion(s::AnnotatedString{S})
+    eachregion(s::SubString{AnnotatedString{S}})
 
-Erase the presence of `annotations` within a certain `span`.
+Identify the contiguous substrings of `s` with a constant annotations, and return
+an iterator which provides each substring and the applicable annotations as a
+`Tuple{SubString{S}, Vector{$Annotation}}`.
 
-This operates by removing all elements of `annotations` that are entirely
-contained in `span`, truncating ranges that partially overlap, and splitting
-annotations that subsume `span` to just exist either side of `span`.
+# Examples
+
+```jldoctest; setup=:(using Base: AnnotatedString, eachregion)
+julia> collect(eachregion(AnnotatedString(
+           "hey there", [(1:3, :face, :bold),
+                         (5:9, :face, :italic)])))
+3-element Vector{Tuple{SubString{String}, Vector{$Annotation}}}:
+ ("hey", [$Annotation((:face, :bold))])
+ (" ", [])
+ ("there", [$Annotation((:face, :italic))])
+```
 """
-function _clear_annotations_in_region!(annotations::Vector{RegionAnnotation}, span::UnitRange{Int})
-    # Clear out any overlapping pre-existing annotations.
-    filter!(ann -> first(ann.region) < first(span) || last(ann.region) > last(span), annotations)
-    extras = Tuple{Int, RegionAnnotation}[]
-    for i in eachindex(annotations)
-        annot = annotations[i]
-        region = annot.region
-        # Test for partial overlap
-        if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
-            annotations[i] =
-                setindex(annot,
-                         if first(region) < first(span)
-                             first(region):first(span)-1
-                         else
-                             last(span)+1:last(region)
-                         end,
-                         :region)
-            # If `span` fits exactly within `region`, then we've only copied over
-            # the beginning overhang, but also need to conserve the end overhang.
-            if first(region) < first(span) && last(span) < last(region)
-                push!(extras, (i, setindex(annot, last(span)+1:last(region), :region)))
-            end
+function eachregion(s::AnnotatedString, subregion::UnitRange{Int}=firstindex(s):lastindex(s))
+    isempty(s) || isempty(subregion) &&
+        return RegionIterator(s.string, UnitRange{Int}[], Vector{Annotation}[])
+    events = annotation_events(s, subregion)
+    isempty(events) && return RegionIterator(s.string, [subregion], [Annotation[]])
+    annotvals = Annotation[
+        (; label, value) for (; label, value) in annotations(s)]
+    regions = Vector{UnitRange{Int}}()
+    annots = Vector{Vector{Annotation}}()
+    pos = first(events).pos
+    if pos > first(subregion)
+        push!(regions, thisind(s, first(subregion)):prevind(s, pos))
+        push!(annots, [])
+    end
+    activelist = Int[]
+    for event in events
+        if event.pos != pos
+            push!(regions, pos:prevind(s, event.pos))
+            push!(annots, annotvals[activelist])
+            pos = event.pos
+        end
+        if event.active
+            insert!(activelist, searchsortedfirst(activelist, event.index), event.index)
+        else
+            deleteat!(activelist, searchsortedfirst(activelist, event.index))
         end
     end
-    # Insert any extra entries in the appropriate position
-    for (offset, (i, entry)) in enumerate(extras)
-        insert!(annotations, i + offset, entry)
+    if last(events).pos < nextind(s, last(subregion))
+        push!(regions, last(events).pos:thisind(s, last(subregion)))
+        push!(annots, [])
     end
-    annotations
+    RegionIterator(s.string, regions, annots)
 end
 
-"""
-    _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{$RegionAnnotation}, offset::Int = position(io))
+function eachregion(s::SubString{<:AnnotatedString}, pos::UnitRange{Int}=firstindex(s):lastindex(s))
+    if isempty(s)
+        RegionIterator(s.string, Vector{UnitRange{Int}}(), Vector{Vector{Annotation}}())
+    else
+        eachregion(s.string, first(pos)+s.offset:last(pos)+s.offset)
+    end
+end
 
-Register new `annotations` in `io`, applying an `offset` to their regions.
+"""
+    annotation_events(string::AbstractString, annots::Vector{$RegionAnnotation}, subregion::UnitRange{Int})
+    annotation_events(string::AnnotatedString, subregion::UnitRange{Int})
 
-The largely consists of simply shifting the regions of `annotations` by `offset`
-and pushing them onto `io`'s annotations. However, when it is possible to merge
-the new annotations with recent annotations in accordance with the semantics
-outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there
-is a run of the most recent annotations that are also present as the first
-`annotations`, with the same value and adjacent regions, the new annotations are
-merged into the existing recent annotations by simply extending their range.
+Find all annotation "change events" that occur within a `subregion` of `annots`,
+with respect to `string`. When `string` is styled, `annots` is inferred.
 
-This is implemented so that one can say write an `AnnotatedString` to an
-`AnnotatedIOBuffer` one character at a time without needlessly producing a
-new annotation for each character.
+Each change event is given in the form of a `@NamedTuple{pos::Int, active::Bool,
+index::Int}` where `pos` is the position of the event, `active` is a boolean
+indicating whether the annotation is being activated or deactivated, and `index`
+is the index of the annotation in question.
 """
-function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionAnnotation}, offset::Int = position(io))
-    run = 0
-    if !isempty(io.annotations) && last(last(io.annotations).region) == offset
-        for i in reverse(axes(annotations, 1))
-            annot = annotations[i]
-            first(annot.region) == 1 || continue
-            i <= length(io.annotations) || continue
-            if annot.label == last(io.annotations).label && annot.value == last(io.annotations).value
-                valid_run = true
-                for runlen in 1:i
-                    new = annotations[begin+runlen-1]
-                    old = io.annotations[end-i+runlen]
-                    if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value
-                        valid_run = false
-                        break
-                    end
-                end
-                if valid_run
-                    run = i
-                    break
-                end
-            end
+function annotation_events(s::AbstractString, annots::Vector{RegionAnnotation}, subregion::UnitRange{Int})
+    events = Vector{NamedTuple{(:pos, :active, :index), Tuple{Int, Bool, Int}}}() # Position, Active?, Annotation index
+    for (i, (; region)) in enumerate(annots)
+        if !isempty(intersect(subregion, region))
+            start, stop = max(first(subregion), first(region)), min(last(subregion), last(region))
+            start <= stop || continue # Currently can't handle empty regions
+            push!(events, (pos=thisind(s, start), active=true, index=i))
+            push!(events, (pos=nextind(s, stop), active=false, index=i))
         end
     end
-    for runindex in 0:run-1
-        old_index = lastindex(io.annotations) - run + 1 + runindex
-        old = io.annotations[old_index]
-        new = annotations[begin+runindex]
-        io.annotations[old_index] = setindex(old, first(old.region):last(new.region)+offset, :region)
-    end
-    for index in run+1:lastindex(annotations)
-        annot = annotations[index]
-        start, stop = first(annot.region), last(annot.region)
-        push!(io.annotations, setindex(annotations[index], start+offset:stop+offset, :region))
-    end
+    sort(events, by=e -> e.pos)
 end
 
-function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
-    if (start = position(io)) == 0
-        AnnotatedString(read(io.io, T), copy(io.annotations))
-    else
-        annots = [setindex(annot, UnitRange{Int}(max(1, first(annot.region) - start), last(annot.region)-start), :region)
-                  for annot in io.annotations if last(annot.region) > start]
-        AnnotatedString(read(io.io, T), annots)
-    end
-end
-read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
-read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})
-
-function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
-    pos = position(io)
-    char = read(io.io, T)
-    annots = [NamedTuple{(:label, :value)}(annot) for annot in io.annotations if pos+1 in annot.region]
-    AnnotatedChar(char, annots)
-end
-read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
-read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})
-
-function truncate(io::AnnotatedIOBuffer, size::Integer)
-    truncate(io.io, size)
-    filter!(ann -> first(ann.region) <= size, io.annotations)
-    map!(ann -> setindex(ann, first(ann.region):min(size, last(ann.region)), :region),
-         io.annotations, io.annotations)
-    io
-end
+annotation_events(s::AnnotatedString, subregion::UnitRange{Int}) =
+    annotation_events(s.string, annotations(s), subregion)
diff --git a/base/strings/annotated_io.jl b/base/strings/annotated_io.jl
new file mode 100644
index 0000000000000..87db57b8030c9
--- /dev/null
+++ b/base/strings/annotated_io.jl
@@ -0,0 +1,201 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## AnnotatedIOBuffer
+
+struct AnnotatedIOBuffer <: AbstractPipe
+    io::IOBuffer
+    annotations::Vector{RegionAnnotation}
+end
+
+AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{RegionAnnotation}())
+AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())
+
+function show(io::IO, aio::AnnotatedIOBuffer)
+    show(io, AnnotatedIOBuffer)
+    size = filesize(aio.io)
+    print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ",
+          length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
+end
+
+pipe_reader(io::AnnotatedIOBuffer) = io.io
+pipe_writer(io::AnnotatedIOBuffer) = io.io
+
+# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
+position(io::AnnotatedIOBuffer) = position(io.io)
+seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
+seekend(io::AnnotatedIOBuffer) = (seekend(io.io); io)
+skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
+copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))
+
+annotations(io::AnnotatedIOBuffer) = io.annotations
+
+annotate!(io::AnnotatedIOBuffer, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
+    (_annotate!(io.annotations, range, label, val); io)
+
+function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    astr = AnnotatedString(astr)
+    offset = position(io.io)
+    eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
+    _insert_annotations!(io, astr.annotations)
+    write(io.io, String(astr))
+end
+
+write(io::AnnotatedIOBuffer, c::AnnotatedChar) =
+    write(io, AnnotatedString(string(c), [(region=1:ncodeunits(c), a...) for a in c.annotations]))
+write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
+write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
+write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
+
+function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
+    destpos = position(dest)
+    isappending = eof(dest)
+    srcpos = position(src)
+    nb = write(dest.io, src.io)
+    isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
+    srcannots = [setindex(annot, max(1 + srcpos, first(annot.region)):last(annot.region), :region)
+                 for annot in src.annotations if first(annot.region) >= srcpos]
+    _insert_annotations!(dest, srcannots, destpos - srcpos)
+    nb
+end
+
+# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers)
+# work as expected.
+function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}})
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), s)
+    else
+        invoke(write, Tuple{IO, typeof(s)}, io, s)
+    end::Int
+end
+
+# Can't be part of the `Union` above because it introduces method ambiguities
+function write(io::AbstractPipe, c::AnnotatedChar)
+    if pipe_writer(io) isa AnnotatedIOBuffer
+        write(pipe_writer(io), c)
+    else
+        invoke(write, Tuple{IO, typeof(c)}, io, c)
+    end::Int
+end
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
+    if (start = position(io)) == 0
+        AnnotatedString(read(io.io, T), copy(io.annotations))
+    else
+        annots = [setindex(annot, UnitRange{Int}(max(1, first(annot.region) - start), last(annot.region)-start), :region)
+                  for annot in io.annotations if last(annot.region) > start]
+        AnnotatedString(read(io.io, T), annots)
+    end
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})
+
+function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
+    pos = position(io)
+    char = read(io.io, T)
+    annots = [NamedTuple{(:label, :value)}(annot) for annot in io.annotations if pos+1 in annot.region]
+    AnnotatedChar(char, annots)
+end
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
+read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})
+
+function truncate(io::AnnotatedIOBuffer, size::Integer)
+    truncate(io.io, size)
+    filter!(ann -> first(ann.region) <= size, io.annotations)
+    map!(ann -> setindex(ann, first(ann.region):min(size, last(ann.region)), :region),
+         io.annotations, io.annotations)
+    io
+end
+
+"""
+    _clear_annotations_in_region!(annotations::Vector{$RegionAnnotation}, span::UnitRange{Int})
+
+Erase the presence of `annotations` within a certain `span`.
+
+This operates by removing all elements of `annotations` that are entirely
+contained in `span`, truncating ranges that partially overlap, and splitting
+annotations that subsume `span` to just exist either side of `span`.
+"""
+function _clear_annotations_in_region!(annotations::Vector{RegionAnnotation}, span::UnitRange{Int})
+    # Clear out any overlapping pre-existing annotations.
+    filter!(ann -> first(ann.region) < first(span) || last(ann.region) > last(span), annotations)
+    extras = Tuple{Int, RegionAnnotation}[]
+    for i in eachindex(annotations)
+        annot = annotations[i]
+        region = annot.region
+        # Test for partial overlap
+        if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
+            annotations[i] =
+                setindex(annot,
+                         if first(region) < first(span)
+                             first(region):first(span)-1
+                         else
+                             last(span)+1:last(region)
+                         end,
+                         :region)
+            # If `span` fits exactly within `region`, then we've only copied over
+            # the beginning overhang, but also need to conserve the end overhang.
+            if first(region) < first(span) && last(span) < last(region)
+                push!(extras, (i, setindex(annot, last(span)+1:last(region), :region)))
+            end
+        end
+    end
+    # Insert any extra entries in the appropriate position
+    for (offset, (i, entry)) in enumerate(extras)
+        insert!(annotations, i + offset, entry)
+    end
+    annotations
+end
+
+"""
+    _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{$RegionAnnotation}, offset::Int = position(io))
+
+Register new `annotations` in `io`, applying an `offset` to their regions.
+
+The largely consists of simply shifting the regions of `annotations` by `offset`
+and pushing them onto `io`'s annotations. However, when it is possible to merge
+the new annotations with recent annotations in accordance with the semantics
+outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there
+is a run of the most recent annotations that are also present as the first
+`annotations`, with the same value and adjacent regions, the new annotations are
+merged into the existing recent annotations by simply extending their range.
+
+This is implemented so that one can say write an `AnnotatedString` to an
+`AnnotatedIOBuffer` one character at a time without needlessly producing a
+new annotation for each character.
+"""
+function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionAnnotation}, offset::Int = position(io))
+    run = 0
+    if !isempty(io.annotations) && last(last(io.annotations).region) == offset
+        for i in reverse(axes(annotations, 1))
+            annot = annotations[i]
+            first(annot.region) == 1 || continue
+            i <= length(io.annotations) || continue
+            if annot.label == last(io.annotations).label && annot.value == last(io.annotations).value
+                valid_run = true
+                for runlen in 1:i
+                    new = annotations[begin+runlen-1]
+                    old = io.annotations[end-i+runlen]
+                    if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value
+                        valid_run = false
+                        break
+                    end
+                end
+                if valid_run
+                    run = i
+                    break
+                end
+            end
+        end
+    end
+    for runindex in 0:run-1
+        old_index = lastindex(io.annotations) - run + 1 + runindex
+        old = io.annotations[old_index]
+        new = annotations[begin+runindex]
+        io.annotations[old_index] = setindex(old, first(old.region):last(new.region)+offset, :region)
+    end
+    for index in run+1:lastindex(annotations)
+        annot = annotations[index]
+        start, stop = first(annot.region), last(annot.region)
+        push!(io.annotations, setindex(annotations[index], start+offset:stop+offset, :region))
+    end
+end
diff --git a/base/strings/strings.jl b/base/strings/strings.jl
index 8dae311f475b4..32975b6ea3fc7 100644
--- a/base/strings/strings.jl
+++ b/base/strings/strings.jl
@@ -11,3 +11,4 @@ import .Iterators: PartitionIterator
 
 include("strings/util.jl")
 include("strings/io.jl")
+include("strings/annotated_io.jl")
diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
index 8658c1b52a2ab..7f53740b9eec1 100644
--- a/test/strings/annotated.jl
+++ b/test/strings/annotated.jl
@@ -258,3 +258,51 @@ end
     write(aio, Base.AnnotatedString("hello", [(1:5, :tag, 1)]))
     @test sprint(show, aio) == "Base.AnnotatedIOBuffer(5 bytes, 1 annotation)"
 end
+
+@testset "Eachregion" begin
+    annregions(str::String, annots::Vector{<:Tuple{UnitRange{Int}, Symbol, <:Any}}) =
+        [(s, Tuple.(a)) for (s, a) in Base.eachregion(Base.AnnotatedString(str, annots))]
+    # Regions that do/don't extend to the left/right edges
+    @test annregions(" abc ", [(2:4, :face, :bold)]) ==
+        [(" ", []),
+         ("abc", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions(" x ", [(2:2, :face, :bold)]) ==
+        [(" ", []),
+         ("x", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions(" x", [(2:2, :face, :bold)]) ==
+        [(" ", []),
+         ("x", [(:face, :bold)])]
+    @test annregions("x ", [(1:1, :face, :bold)]) ==
+        [("x", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions("x", [(1:1, :face, :bold)]) ==
+        [("x", [(:face, :bold)])]
+    # Overlapping/nested regions
+    @test annregions(" abc ", [(2:4, :face, :bold), (3:3, :face, :italic)]) ==
+        [(" ", []),
+         ("a", [(:face, :bold)]),
+         ("b", [(:face, :bold), (:face, :italic)]),
+         ("c", [(:face, :bold)]),
+         (" ", [])]
+    @test annregions("abc-xyz", [(1:7, :face, :bold), (1:3, :face, :green), (4:4, :face, :yellow), (4:7, :face, :italic)]) ==
+        [("abc", [(:face, :bold), (:face, :green)]),
+         ("-", [(:face, :bold), (:face, :yellow), (:face, :italic)]),
+         ("xyz", [(:face, :bold), (:face, :italic)])]
+    # Preserving annotation order
+    @test annregions("abcd", [(1:3, :face, :red), (2:2, :face, :yellow), (2:3, :face, :green), (2:4, :face, :blue)]) ==
+        [("a", [(:face, :red)]),
+         ("b", [(:face, :red), (:face, :yellow), (:face, :green), (:face, :blue)]),
+         ("c", [(:face, :red), (:face, :green), (:face, :blue)]),
+         ("d", [(:face, :blue)])]
+    @test annregions("abcd", [(2:4, :face, :blue), (1:3, :face, :red), (2:3, :face, :green), (2:2, :face, :yellow)]) ==
+        [("a", [(:face, :red)]),
+         ("b", [(:face, :blue), (:face, :red), (:face, :green), (:face, :yellow)]),
+         ("c", [(:face, :blue), (:face, :red), (:face, :green)]),
+         ("d", [(:face, :blue)])]
+    # Region starting after a character spanning multiple codepoints.
+    @test annregions("𝟏x", [(1:4, :face, :red)]) ==
+        [("𝟏", [(:face, :red)]),
+         ("x", [])]
+end

From 57b1cd54adbfdd7c9862901e80f75f9886137df0 Mon Sep 17 00:00:00 2001
From: Neven Sajko <4944410+nsajko@users.noreply.github.com>
Date: Sun, 30 Mar 2025 08:55:46 +0200
Subject: [PATCH 10/20] `Base`: shell escaping: inference improvement to
 prevent invalidation (#57915)

(cherry picked from commit 4d2a350870b97059432ac8fa8de92b0b161c335b)
---
 base/shell.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/shell.jl b/base/shell.jl
index e07fff128acfe..68925cbd5d5af 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -344,7 +344,7 @@ function shell_escape_csh(io::IO, args::AbstractString...)
 end
 shell_escape_csh(args::AbstractString...) =
     sprint(shell_escape_csh, args...;
-           sizehint = sum(sizeof.(args)) + length(args) * 3)
+           sizehint = sum(sizeof, args) + length(args) * 3)
 
 """
     shell_escape_wincmd(s::AbstractString)
@@ -494,4 +494,4 @@ function escape_microsoft_c_args(io::IO, args::AbstractString...)
 end
 escape_microsoft_c_args(args::AbstractString...) =
     sprint(escape_microsoft_c_args, args...;
-           sizehint = (sum(sizeof.(args)) + 3*length(args)))
+           sizehint = (sum(sizeof, args) + 3*length(args)))

From 4183b2c2e8cb3a3cc120da3b633d8c46dc155046 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Belmant?= <cedric.bel@hotmail.fr>
Date: Tue, 18 Feb 2025 00:53:00 +0100
Subject: [PATCH 11/20] Support adding `CodeInstance`s to JIT for interpreters
 defining a codegen cache (#57272)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements a way to add `CodeInstance`s compiled by external
interpreters to JIT, such that they become legal targets for `invoke`
calls.

Based on a design proposed by @Keno, the `AbstractInterpreter` interface
is extended to support providing a codegen cache that is filled during
inference for future use with `add_codeinsts_to_jit!`.

This allows `invoke(f, ::CodeInstance, args...)` to work on external
interpreters, which is currently failing on `master` (see #57193).

---------

Co-authored-by: Cédric Belmant <cedric.belmant@juliahub.com>
(cherry picked from commit 9d2e9ed8a275f770994488ec3e4bd3f04b9a229e)
---
 .../CompilerDevTools/src/CompilerDevTools.jl  |  6 +-
 Compiler/src/typeinfer.jl                     | 86 +++++++++++--------
 Compiler/src/types.jl                         | 17 ++++
 Compiler/test/AbstractInterpreter.jl          | 14 +++
 4 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
index 5d0df5ccaa4e4..dd32564d7fa8d 100644
--- a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
+++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
@@ -9,12 +9,13 @@ struct SplitCacheInterp <: Compiler.AbstractInterpreter
     inf_params::Compiler.InferenceParams
     opt_params::Compiler.OptimizationParams
     inf_cache::Vector{Compiler.InferenceResult}
+    codegen_cache::IdDict{CodeInstance,CodeInfo}
     function SplitCacheInterp(;
         world::UInt = Base.get_world_counter(),
         inf_params::Compiler.InferenceParams = Compiler.InferenceParams(),
         opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(),
         inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[])
-        new(world, inf_params, opt_params, inf_cache)
+        new(world, inf_params, opt_params, inf_cache, IdDict{CodeInstance,CodeInfo}())
     end
 end
 
@@ -23,10 +24,11 @@ Compiler.OptimizationParams(interp::SplitCacheInterp) = interp.opt_params
 Compiler.get_inference_world(interp::SplitCacheInterp) = interp.world
 Compiler.get_inference_cache(interp::SplitCacheInterp) = interp.inf_cache
 Compiler.cache_owner(::SplitCacheInterp) = SplitCacheOwner()
+Compiler.codegen_cache(interp::SplitCacheInterp) = interp.codegen_cache
 
 import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge
 @eval @noinline typeinf(::SplitCacheOwner, mi::MethodInstance, source_mode::UInt8) =
-    Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode)
+    Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext_toplevel, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode)
 
 @eval @noinline function typeinf_edge(::SplitCacheOwner, mi::MethodInstance, parent_frame::Compiler.InferenceState, world::UInt, source_mode::UInt8)
     # TODO: This isn't quite right, we're just sketching things for now
diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl
index e07ff4a842e3c..d3c8e65e214a2 100644
--- a/Compiler/src/typeinfer.jl
+++ b/Compiler/src/typeinfer.jl
@@ -144,9 +144,10 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
             ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects),
             result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, di, edges)
         engine_reject(interp, ci)
-        if !discard_src && isdefined(interp, :codegen) && uncompressed isa CodeInfo
+        codegen = codegen_cache(interp)
+        if !discard_src && codegen !== nothing && uncompressed isa CodeInfo
             # record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
-            interp.codegen[ci] = uncompressed
+            codegen[ci] = uncompressed
             if bootstrapping_compiler && inferred_result == nothing
                 # This is necessary to get decent bootstrapping performance
                 # when compiling the compiler to inject everything eagerly
@@ -186,8 +187,9 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
     ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
         ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, 0.0, 0.0, 0.0, di, edges)
     code_cache(interp)[mi] = ci
-    if isdefined(interp, :codegen)
-        interp.codegen[ci] = src
+    codegen = codegen_cache(interp)
+    if codegen !== nothing
+        codegen[ci] = src
     end
     engine_reject(interp, ci)
     return nothing
@@ -1195,7 +1197,10 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod
 
     ci = result.ci # reload from result in case it changed
     @assert frame.cache_mode != CACHE_MODE_NULL
-    @assert is_result_constabi_eligible(result) || (!isdefined(interp, :codegen) || haskey(interp.codegen, ci))
+    @assert is_result_constabi_eligible(result) || begin
+            codegen = codegen_cache(interp)
+            codegen === nothing || haskey(codegen, ci)
+        end
     @assert is_result_constabi_eligible(result) == use_const_api(ci)
     @assert isdefined(ci, :inferred) "interpreter did not fulfill our expectations"
     if !is_cached(frame) && source_mode == SOURCE_MODE_ABI
@@ -1261,44 +1266,55 @@ function collectinvokes!(wq::Vector{CodeInstance}, ci::CodeInfo)
     end
 end
 
-# This is a bridge for the C code calling `jl_typeinf_func()` on a single Method match
-function typeinf_ext_toplevel(mi::MethodInstance, world::UInt, source_mode::UInt8)
-    interp = NativeInterpreter(world)
-    ci = typeinf_ext(interp, mi, source_mode)
-    if source_mode == SOURCE_MODE_ABI && ci isa CodeInstance && !ci_has_invoke(ci)
-        inspected = IdSet{CodeInstance}()
-        tocompile = Vector{CodeInstance}()
-        push!(tocompile, ci)
-        while !isempty(tocompile)
-            # ci_has_real_invoke(ci) && return ci # optimization: cease looping if ci happens to get compiled (not just jl_fptr_wait_for_compiled, but fully jl_is_compiled_codeinst)
-            callee = pop!(tocompile)
-            ci_has_invoke(callee) && continue
-            callee in inspected && continue
-            src = get(interp.codegen, callee, nothing)
+function add_codeinsts_to_jit!(interp::AbstractInterpreter, ci, source_mode::UInt8)
+    source_mode == SOURCE_MODE_ABI || return ci
+    ci isa CodeInstance && !ci_has_invoke(ci) || return ci
+    codegen = codegen_cache(interp)
+    codegen === nothing && return ci
+    inspected = IdSet{CodeInstance}()
+    tocompile = Vector{CodeInstance}()
+    push!(tocompile, ci)
+    while !isempty(tocompile)
+        # ci_has_real_invoke(ci) && return ci # optimization: cease looping if ci happens to get compiled (not just jl_fptr_wait_for_compiled, but fully jl_is_compiled_codeinst)
+        callee = pop!(tocompile)
+        ci_has_invoke(callee) && continue
+        callee in inspected && continue
+        src = get(codegen, callee, nothing)
+        if !isa(src, CodeInfo)
+            src = @atomic :monotonic callee.inferred
+            if isa(src, String)
+                src = _uncompressed_ir(callee, src)
+            end
             if !isa(src, CodeInfo)
-                src = @atomic :monotonic callee.inferred
-                if isa(src, String)
-                    src = _uncompressed_ir(callee, src)
-                end
-                if !isa(src, CodeInfo)
-                    newcallee = typeinf_ext(interp, callee.def, source_mode)
-                    if newcallee isa CodeInstance
-                        callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee
-                        push!(tocompile, newcallee)
-                    #else
-                    #    println("warning: could not get source code for ", callee.def)
-                    end
-                    continue
+                newcallee = typeinf_ext(interp, callee.def, source_mode)
+                if newcallee isa CodeInstance
+                    callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee
+                    push!(tocompile, newcallee)
+                #else
+                #    println("warning: could not get source code for ", callee.def)
                 end
+                continue
             end
-            push!(inspected, callee)
-            collectinvokes!(tocompile, src)
-            ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src)
         end
+        push!(inspected, callee)
+        collectinvokes!(tocompile, src)
+        ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src)
     end
     return ci
 end
 
+function typeinf_ext_toplevel(interp::AbstractInterpreter, mi::MethodInstance, source_mode::UInt8)
+    ci = typeinf_ext(interp, mi, source_mode)
+    ci = add_codeinsts_to_jit!(interp, ci, source_mode)
+    return ci
+end
+
+# This is a bridge for the C code calling `jl_typeinf_func()` on a single Method match
+function typeinf_ext_toplevel(mi::MethodInstance, world::UInt, source_mode::UInt8)
+    interp = NativeInterpreter(world)
+    return typeinf_ext_toplevel(interp, mi, source_mode)
+end
+
 # This is a bridge for the C code calling `jl_typeinf_func()` on set of Method matches
 # The trim_mode can be any of:
 const TRIM_NO = 0
diff --git a/Compiler/src/types.jl b/Compiler/src/types.jl
index eb05ba2b8daa6..a04c9e70174fe 100644
--- a/Compiler/src/types.jl
+++ b/Compiler/src/types.jl
@@ -23,6 +23,10 @@ the following methods to satisfy the `AbstractInterpreter` API requirement:
 - `get_inference_world(interp::NewInterpreter)` - return the world age for this interpreter
 - `get_inference_cache(interp::NewInterpreter)` - return the local inference cache
 - `cache_owner(interp::NewInterpreter)` - return the owner of any new cache entries
+
+If `CodeInstance`s compiled using `interp::NewInterpreter` are meant to be executed with `invoke`,
+a method `codegen_cache(interp::NewInterpreter) -> IdDict{CodeInstance, CodeInfo}` must be defined,
+and inference must be triggered via `typeinf_ext_toplevel` with source mode `SOURCE_MODE_ABI`.
 """
 abstract type AbstractInterpreter end
 
@@ -446,6 +450,19 @@ to incorporate customized dispatches for the overridden methods.
 method_table(interp::AbstractInterpreter) = InternalMethodTable(get_inference_world(interp))
 method_table(interp::NativeInterpreter) = interp.method_table
 
+"""
+    codegen_cache(interp::AbstractInterpreter) -> Union{Nothing, IdDict{CodeInstance, CodeInfo}}
+
+Optionally return a cache associating a `CodeInfo` to a `CodeInstance` that should be added to the JIT
+for future execution via `invoke(f, ::CodeInstance, args...)`. This cache is used during `typeinf_ext_toplevel`,
+and may be safely discarded between calls to this function.
+
+By default, a value of `nothing` is returned indicating that `CodeInstance`s should not be added to the JIT.
+Attempting to execute them via `invoke` will result in an error.
+"""
+codegen_cache(interp::AbstractInterpreter) = nothing
+codegen_cache(interp::NativeInterpreter) = interp.codegen
+
 """
 By default `AbstractInterpreter` implements the following inference bail out logic:
 - `bail_out_toplevel_call(::AbstractInterpreter, sig, ::InferenceState)`: bail out from
diff --git a/Compiler/test/AbstractInterpreter.jl b/Compiler/test/AbstractInterpreter.jl
index 533eaf93937a3..83218d73cad69 100644
--- a/Compiler/test/AbstractInterpreter.jl
+++ b/Compiler/test/AbstractInterpreter.jl
@@ -534,3 +534,17 @@ let interp = DebugInterp()
     end
     @test found
 end
+
+@newinterp InvokeInterp
+struct InvokeOwner end
+codegen = IdDict{CodeInstance, CodeInfo}()
+Compiler.cache_owner(::InvokeInterp) = InvokeOwner()
+Compiler.codegen_cache(::InvokeInterp) = codegen
+let interp = InvokeInterp()
+    source_mode = Compiler.SOURCE_MODE_ABI
+    f = (+)
+    args = (1, 1)
+    mi = @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance}
+    ci = Compiler.typeinf_ext_toplevel(interp, mi, source_mode)
+    @test invoke(f, ci, args...) == 2
+end

From bea6fd124f2035a2583ce41802f89928d4d7a5ad Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Fri, 28 Mar 2025 10:34:48 -0400
Subject: [PATCH 12/20] inference: add internal SOURCE_MODE_GET_SOURCE mode
 (#57878)

Helps avoids some code duplication and divergence of inference behaviors
in some edge cases, also slightly more correct caching in some edge cases.

(cherry picked from commit e7ff95d643fbbe5445beda7b1d859d327d34f729)
---
 Compiler/src/bootstrap.jl   |   9 +-
 Compiler/src/typeinfer.jl   | 167 ++++++++++++++++++++----------------
 Compiler/src/verifytrim.jl  |  17 +++-
 Compiler/test/verifytrim.jl |   2 +-
 src/aotcompile.cpp          |  14 +++
 src/gf.c                    |  23 +++--
 src/jitlayers.cpp           |   4 +-
 src/julia_internal.h        |   3 +-
 8 files changed, 141 insertions(+), 98 deletions(-)

diff --git a/Compiler/src/bootstrap.jl b/Compiler/src/bootstrap.jl
index 2671ea114e818..a847d1fb835c7 100644
--- a/Compiler/src/bootstrap.jl
+++ b/Compiler/src/bootstrap.jl
@@ -67,17 +67,10 @@ function bootstrap!()
                     end
                     mi = specialize_method(m.method, Tuple{params...}, m.sparams)
                     #isa_compileable_sig(mi) || println(stderr, "WARNING: inferring `", mi, "` which isn't expected to be called.")
-                    push!(methods, mi)
+                    typeinf_ext_toplevel(mi, world, isa_compileable_sig(mi) ? SOURCE_MODE_ABI : SOURCE_MODE_NOT_REQUIRED)
                 end
             end
         end
-        codeinfos = typeinf_ext_toplevel(methods, [world], TRIM_NO)
-        for i = 1:2:length(codeinfos)
-            ci = codeinfos[i]::CodeInstance
-            src = codeinfos[i + 1]::CodeInfo
-            isa_compileable_sig(ci.def) || continue # println(stderr, "WARNING: compiling `", ci.def, "` which isn't expected to be called.")
-            ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, src)
-        end
         endtime = time()
         println("Base.Compiler ──── ", sub_float(endtime,starttime), " seconds")
     end
diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl
index d3c8e65e214a2..59e0fbd8262e5 100644
--- a/Compiler/src/typeinfer.jl
+++ b/Compiler/src/typeinfer.jl
@@ -153,7 +153,7 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
                 # when compiling the compiler to inject everything eagerly
                 # where codegen can start finding and using it right away
                 mi = result.linfo
-                if mi.def isa Method && isa_compileable_sig(mi)
+                if mi.def isa Method && isa_compileable_sig(mi) && is_cached(caller)
                     ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), ci, uncompressed)
                 end
             end
@@ -1099,10 +1099,10 @@ end
 """
     SOURCE_MODE_NOT_REQUIRED
 
-Indicates to inference that the source is not required and the only fields
-of the resulting `CodeInstance` that the caller is interested in are types
-and effects. Inference is still free to create a CodeInstance with source,
-but is not required to do so.
+Indicates to inference that the source is not required and the only fields of
+the resulting `CodeInstance` that the caller is interested in are return or
+exception types and IPO effects. Inference is still free to create source for
+it or add it to the JIT even, but is not required or expected to do so.
 """
 const SOURCE_MODE_NOT_REQUIRED = 0x0
 
@@ -1110,28 +1110,51 @@ const SOURCE_MODE_NOT_REQUIRED = 0x0
     SOURCE_MODE_ABI
 
 Indicates to inference that it should return a CodeInstance that can
-either be `->invoke`'d (because it has already been compiled or because
-it has constabi) or one that can be made so by compiling its `->inferred`
-field.
-
-N.B.: The `->inferred` field is volatile and the compiler may delete it.
+be `->invoke`'d (because it has already been compiled).
 """
 const SOURCE_MODE_ABI = 0x1
 
 """
-    ci_has_abi(code::CodeInstance)
+    SOURCE_MODE_GET_SOURCE
+
+Indicates to inference that it should return a CodeInstance after it has
+prepared interp to be able to provide source code for it.
+"""
+const SOURCE_MODE_GET_SOURCE = 0xf
 
-Determine whether this CodeInstance is something that could be invoked if we gave it
-to the runtime system (either because it already has an ->invoke ptr, or
-because it has source that could be compiled). Note that this information may
-be stale by the time the user see it, so the user will need to perform their
-own checks if they actually need the abi from it.
 """
-function ci_has_abi(code::CodeInstance)
+    ci_has_abi(interp::AbstractInterpreter, code::CodeInstance)
+
+Determine whether this CodeInstance is something that could be invoked if
+interp gave it to the runtime system (either because it already has an ->invoke
+ptr, or because interp has source that could be compiled).
+"""
+function ci_has_abi(interp::AbstractInterpreter, code::CodeInstance)
     (@atomic :acquire code.invoke) !== C_NULL && return true
+    return ci_has_source(interp, code)
+end
+
+"""
+    ci_has_source(interp::AbstractInterpreter, code::CodeInstance)
+
+Determine whether this CodeInstance is something that could be compiled from
+source that interp has.
+"""
+function ci_has_source(interp::AbstractInterpreter, code::CodeInstance)
+    codegen = codegen_cache(interp)
+    codegen === nothing && return false
+    use_const_api(code) && return true
+    haskey(codegen, code) && return true
     inf = @atomic :monotonic code.inferred
-    if code.owner === nothing ? (isa(inf, CodeInfo) || isa(inf, String)) : inf !== nothing
-        # interp.codegen[code] = maybe_uncompress(code, inf) # TODO: the correct way to ensure this information doesn't become stale would be to push it into the stable codegen cache
+    if isa(inf, String)
+        inf = _uncompressed_ir(code, inf)
+    end
+    if code.owner === nothing
+        if isa(inf, CodeInfo)
+            codegen[code] = inf
+            return true
+        end
+    elseif inf !== nothing
         return true
     end
     return false
@@ -1141,9 +1164,10 @@ function ci_has_invoke(code::CodeInstance)
     return (@atomic :monotonic code.invoke) !== C_NULL
 end
 
-function ci_meets_requirement(code::CodeInstance, source_mode::UInt8)
+function ci_meets_requirement(interp::AbstractInterpreter, code::CodeInstance, source_mode::UInt8)
     source_mode == SOURCE_MODE_NOT_REQUIRED && return true
-    source_mode == SOURCE_MODE_ABI && return ci_has_abi(code)
+    source_mode == SOURCE_MODE_ABI && return ci_has_abi(interp, code)
+    source_mode == SOURCE_MODE_GET_SOURCE && return ci_has_source(interp, code)
     return false
 end
 
@@ -1153,7 +1177,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod
     let code = get(code_cache(interp), mi, nothing)
         if code isa CodeInstance
             # see if this code already exists in the cache
-            if ci_meets_requirement(code, source_mode)
+            if ci_meets_requirement(interp, code, source_mode)
                 ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
                 return code
             end
@@ -1165,7 +1189,7 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod
     let code = get(code_cache(interp), mi, nothing)
         if code isa CodeInstance
             # see if this code already exists in the cache
-            if ci_meets_requirement(code, source_mode)
+            if ci_meets_requirement(interp, code, source_mode)
                 engine_reject(interp, ci)
                 ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
                 return code
@@ -1196,18 +1220,11 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance, source_mod
     ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
 
     ci = result.ci # reload from result in case it changed
+    codegen = codegen_cache(interp)
     @assert frame.cache_mode != CACHE_MODE_NULL
-    @assert is_result_constabi_eligible(result) || begin
-            codegen = codegen_cache(interp)
-            codegen === nothing || haskey(codegen, ci)
-        end
+    @assert is_result_constabi_eligible(result) || codegen === nothing || haskey(codegen, ci)
     @assert is_result_constabi_eligible(result) == use_const_api(ci)
     @assert isdefined(ci, :inferred) "interpreter did not fulfill our expectations"
-    if !is_cached(frame) && source_mode == SOURCE_MODE_ABI
-        # XXX: jl_type_infer somewhat ambiguously assumes this must be cached
-        # XXX: this should be using the CI from the cache, if possible instead: haskey(cache, mi) && (ci = cache[mi])
-        code_cache(interp)[mi] = ci
-    end
     return ci
 end
 
@@ -1221,35 +1238,9 @@ end
 typeinf_type(interp::AbstractInterpreter, match::MethodMatch) =
     typeinf_type(interp, specialize_method(match))
 function typeinf_type(interp::AbstractInterpreter, mi::MethodInstance)
-    # n.b.: this could be replaced with @something(typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED), return nothing).rettype
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
-    let code = get(code_cache(interp), mi, nothing)
-        if code isa CodeInstance
-            # see if this rettype already exists in the cache
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            return code.rettype
-        end
-    end
-    ci = engine_reserve(interp, mi)
-    let code = get(code_cache(interp), mi, nothing)
-        if code isa CodeInstance
-            engine_reject(interp, ci)
-            # see if this rettype already exists in the cache
-            ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-            return code.rettype
-        end
-    end
-    result = InferenceResult(mi, typeinf_lattice(interp))
-    result.ci = ci
-    frame = InferenceState(result, #=cache_mode=#:global, interp)
-    if frame === nothing
-        engine_reject(interp, ci)
-        return nothing
-    end
-    typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
-    is_inferred(result) || return nothing
-    return widenconst(ignorelimited(result.result))
+    ci = typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED)
+    ci isa CodeInstance || return nothing
+    return ci.rettype
 end
 
 # collect a list of all code that is needed along with CodeInstance to codegen it fully
@@ -1286,18 +1277,31 @@ function add_codeinsts_to_jit!(interp::AbstractInterpreter, ci, source_mode::UIn
                 src = _uncompressed_ir(callee, src)
             end
             if !isa(src, CodeInfo)
-                newcallee = typeinf_ext(interp, callee.def, source_mode)
+                newcallee = typeinf_ext(interp, callee.def, source_mode) # always SOURCE_MODE_ABI
                 if newcallee isa CodeInstance
                     callee === ci && (ci = newcallee) # ci stopped meeting the requirements after typeinf_ext last checked, try again with newcallee
                     push!(tocompile, newcallee)
-                #else
-                #    println("warning: could not get source code for ", callee.def)
+                end
+                if newcallee !== callee
+                    push!(inspected, callee)
                 end
                 continue
             end
         end
         push!(inspected, callee)
         collectinvokes!(tocompile, src)
+        mi = get_ci_mi(callee)
+        if iszero(ccall(:jl_mi_cache_has_ci, Cint, (Any, Any), mi, callee))
+            cached = ccall(:jl_get_ci_equiv, Any, (Any, UInt), callee, get_inference_world(interp))::CodeInstance
+            if cached === callee
+                # make sure callee is gc-rooted and cached, as required by jl_add_codeinst_to_jit
+                code_cache(interp)[mi] = callee
+            else
+                # use an existing CI from the cache, if there is available one that is compatible
+                callee === ci && (ci = cached)
+                callee = cached
+            end
+        end
         ccall(:jl_add_codeinst_to_jit, Cvoid, (Any, Any), callee, src)
     end
     return ci
@@ -1341,7 +1345,7 @@ function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim_m
                 # and this is either the primary world, or not applicable in the primary world
                 # then we want to compile and emit this
                 if item.def.primary_world <= this_world <= item.def.deleted_world
-                    ci = typeinf_ext(interp, item, SOURCE_MODE_NOT_REQUIRED)
+                    ci = typeinf_ext(interp, item, SOURCE_MODE_GET_SOURCE)
                     ci isa CodeInstance && push!(tocompile, ci)
                 end
             elseif item isa SimpleVector && latest
@@ -1352,7 +1356,7 @@ function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim_m
                             sig, this_world, #= mt_cache =# 0)
                 if ptr !== C_NULL
                     mi = unsafe_pointer_to_objref(ptr)::MethodInstance
-                    ci = typeinf_ext(interp, mi, SOURCE_MODE_NOT_REQUIRED)
+                    ci = typeinf_ext(interp, mi, SOURCE_MODE_GET_SOURCE)
                     ci isa CodeInstance && push!(tocompile, ci)
                 end
                 # additionally enqueue the ccallable entrypoint / adapter, which implicitly
@@ -1364,26 +1368,37 @@ function typeinf_ext_toplevel(methods::Vector{Any}, worlds::Vector{UInt}, trim_m
         while !isempty(tocompile)
             callee = pop!(tocompile)
             callee in inspected && continue
-            push!(inspected, callee)
             # now make sure everything has source code, if desired
             mi = get_ci_mi(callee)
             def = mi.def
             if use_const_api(callee)
                 src = codeinfo_for_const(interp, mi, callee.rettype_const)
-            elseif haskey(interp.codegen, callee)
-                src = interp.codegen[callee]
-            elseif isa(def, Method) && !InferenceParams(interp).force_enable_inference && ccall(:jl_get_module_infer, Cint, (Any,), def.module) == 0
-                src = retrieve_code_info(mi, get_inference_world(interp))
             else
-                # TODO: typeinf_code could return something with different edges/ages/owner/abi (needing an update to callee), which we don't handle here
-                src = typeinf_code(interp, mi, true)
+                src = get(interp.codegen, callee, nothing)
+                if src === nothing
+                    newcallee = typeinf_ext(interp, mi, SOURCE_MODE_GET_SOURCE)
+                    if newcallee isa CodeInstance
+                        @assert use_const_api(newcallee) || haskey(interp.codegen, newcallee)
+                        push!(tocompile, newcallee)
+                    end
+                    if newcallee !== callee
+                        push!(inspected, callee)
+                    end
+                    continue
+                end
             end
+            push!(inspected, callee)
             if src isa CodeInfo
                 collectinvokes!(tocompile, src)
-                # It is somewhat ambiguous if typeinf_ext might have callee in the caches,
-                # but for the purpose of native compile, we always want them put there.
+                # try to reuse an existing CodeInstance from before to avoid making duplicates in the cache
                 if iszero(ccall(:jl_mi_cache_has_ci, Cint, (Any, Any), mi, callee))
-                    code_cache(interp)[mi] = callee
+                    cached = ccall(:jl_get_ci_equiv, Any, (Any, UInt), callee, this_world)::CodeInstance
+                    if cached === callee
+                        code_cache(interp)[mi] = callee
+                    else
+                        # Use an existing CI from the cache, if there is available one that is compatible
+                        callee = cached
+                    end
                 end
                 push!(codeinfos, callee)
                 push!(codeinfos, src)
diff --git a/Compiler/src/verifytrim.jl b/Compiler/src/verifytrim.jl
index 5a80082c63330..2365d885efd79 100644
--- a/Compiler/src/verifytrim.jl
+++ b/Compiler/src/verifytrim.jl
@@ -110,7 +110,7 @@ end
 function verify_print_error(io::IOContext{IO}, desc::CallMissing, parents::ParentMap)
     (; codeinst, codeinfo, sptypes, stmtidx, desc) = desc
     frames = verify_create_stackframes(codeinst, stmtidx, parents)
-    print(io, desc, " from ")
+    print(io, desc, " from statement ")
     verify_print_stmt(io, codeinfo, sptypes, stmtidx)
     Base.show_backtrace(io, frames)
     print(io, "\n\n")
@@ -181,6 +181,11 @@ function verify_codeinstance!(codeinst::CodeInstance, codeinfo::CodeInfo, inspec
             if edge isa CodeInstance
                 haskey(parents, edge) || (parents[edge] = (codeinst, i))
                 edge in inspected && continue
+                edge_mi = get_ci_mi(edge)
+                if edge_mi === edge.def
+                    ci = get(caches, edge_mi, nothing)
+                    ci isa CodeInstance && continue # assume that only this_world matters for trim
+                end
             end
             # TODO: check for calls to Base.atexit?
         elseif isexpr(stmt, :call)
@@ -287,7 +292,7 @@ function get_verify_typeinf_trim(codeinfos::Vector{Any})
                     # TODO: should we find a way to indicate to the user that this gets called via ccallable?
                     # parent[ci] = something
                     asrt = ci.rettype
-                    ci in inspected
+                    true
                 else
                     false
                 end
@@ -326,6 +331,14 @@ function verify_typeinf_trim(io::IO, codeinfos::Vector{Any}, onlywarn::Bool)
         verify_print_error(io, desc, parents)
     end
 
+    ## TODO: compute and display the minimum and/or full call graph instead of merely the first parent stacktrace?
+    #for i = 1:length(codeinfos)
+    #    item = codeinfos[i]
+    #    if item isa CodeInstance
+    #        println(item, "::", item.rettype)
+    #    end
+    #end
+
     let severity = 0
         if counts[1] > 0 || counts[2] > 0
             print("Trim verify finished with ")
diff --git a/Compiler/test/verifytrim.jl b/Compiler/test/verifytrim.jl
index a03804a94cb62..0e9d040ef0c9b 100644
--- a/Compiler/test/verifytrim.jl
+++ b/Compiler/test/verifytrim.jl
@@ -33,7 +33,7 @@ let infos = typeinf_ext_toplevel(Any[Core.svec(Base.SecretBuffer, Tuple{Type{Bas
     @test occursin("finalizer", desc.desc)
     repr = sprint(verify_print_error, desc, parents)
     @test occursin(
-        r"""^unresolved finalizer registered from \(Core.finalizer\)\(Base.final_shred!, %new\(\)::Base.SecretBuffer\)::Nothing
+        r"""^unresolved finalizer registered from statement \(Core.finalizer\)\(Base.final_shred!, %new\(\)::Base.SecretBuffer\)::Nothing
             Stacktrace:
              \[1\] finalizer\(f::typeof\(Base.final_shred!\), o::Base.SecretBuffer\)
                @ Base gcutils.jl:(\d+) \[inlined\]
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 688aeca1b242c..b1d925d89c7ce 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -675,6 +675,20 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     fargs[0] = (jl_value_t*)codeinfos;
     void *data = jl_emit_native(codeinfos, llvmmod, &cgparams, external_linkage);
 
+    // examine everything just emitted and save it to the caches
+    if (!external_linkage) {
+        for (size_t i = 0, l = jl_array_nrows(codeinfos); i < l; i++) {
+            jl_value_t *item = jl_array_ptr_ref(codeinfos, i);
+            if (jl_is_code_instance(item)) {
+                // now add it to our compilation results
+                jl_code_instance_t *codeinst = (jl_code_instance_t*)item;
+                jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i);
+                assert(jl_is_code_info(src));
+                jl_add_codeinst_to_cache(codeinst, src);
+            }
+        }
+    }
+
     // move everything inside, now that we've merged everything
     // (before adding the exported headers)
     ((jl_native_code_desc_t*)data)->M.withModuleDo([&](Module &M) {
diff --git a/src/gf.c b/src/gf.c
index f8d88c4e44e38..860e5e5aa1247 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -585,8 +585,8 @@ JL_DLLEXPORT int jl_mi_cache_has_ci(jl_method_instance_t *mi,
     return 0;
 }
 
-// look for something with an egal ABI and properties that is already in the JIT (compiled=true) or simply in the cache (compiled=false)
-JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, int compiled) JL_NOTSAFEPOINT
+// look for something with an egal ABI and properties that is already in the JIT for a whole edge (target_world=0) or can be added to the JIT with new source just for target_world.
+JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, size_t target_world) JL_NOTSAFEPOINT
 {
     jl_value_t *def = ci->def;
     jl_method_instance_t *mi = jl_get_ci_mi(ci);
@@ -598,9 +598,9 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPA
     while (codeinst) {
         if (codeinst != ci &&
             jl_atomic_load_relaxed(&codeinst->inferred) != NULL &&
-            (!compiled || jl_atomic_load_relaxed(&codeinst->invoke) != NULL) &&
-            jl_atomic_load_relaxed(&codeinst->min_world) <= min_world &&
-            jl_atomic_load_relaxed(&codeinst->max_world) >= max_world &&
+            (target_world ? 1 : jl_atomic_load_relaxed(&codeinst->invoke) != NULL) &&
+            jl_atomic_load_relaxed(&codeinst->min_world) <= (target_world ? target_world : min_world) &&
+            jl_atomic_load_relaxed(&codeinst->max_world) >= (target_world ? target_world : max_world) &&
             jl_egal(codeinst->def, def) &&
             jl_egal(codeinst->owner, owner) &&
             jl_egal(codeinst->rettype, rettype)) {
@@ -608,7 +608,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPA
         }
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
-    return (jl_code_instance_t*)jl_nothing;
+    return ci;
 }
 
 
@@ -2795,10 +2795,9 @@ void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_c
 
 jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT);
 
-JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src)
+JL_DLLEXPORT void jl_add_codeinst_to_cache(jl_code_instance_t *codeinst, jl_code_info_t *src)
 {
     assert(jl_is_code_info(src));
-    jl_emit_codeinst_to_jit(codeinst, src);
     jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
     if (jl_generating_output() && jl_is_method(mi->def.method) && jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
         jl_value_t *compressed = jl_compress_ir(mi->def.method, src);
@@ -2814,6 +2813,14 @@ JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_i
     }
 }
 
+
+JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src)
+{
+    assert(jl_is_code_info(src));
+    jl_emit_codeinst_to_jit(codeinst, src);
+    jl_add_codeinst_to_cache(codeinst, src);
+}
+
 jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)
 {
     // quick check if we already have a compiled result
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 695953b602653..b8781d2bfe898 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -383,8 +383,8 @@ static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t
         }
         if (preal_decl.empty()) {
             // there may be an equivalent method already compiled (or at least registered with the JIT to compile), in which case we should be using that instead
-            jl_code_instance_t *compiled_ci = jl_get_ci_equiv(codeinst, 1);
-            if ((jl_value_t*)compiled_ci != jl_nothing) {
+            jl_code_instance_t *compiled_ci = jl_get_ci_equiv(codeinst, 0);
+            if (compiled_ci != codeinst) {
                 codeinst = compiled_ci;
                 uint8_t specsigflags;
                 void *fptr;
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 479ccbf961e71..bdcc816cbdd1b 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -685,6 +685,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROP
 JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile);
 JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache);
 JL_DLLEXPORT void jl_add_codeinst_to_jit(jl_code_instance_t *codeinst, jl_code_info_t *src);
+JL_DLLEXPORT void jl_add_codeinst_to_cache(jl_code_instance_t *codeinst, jl_code_info_t *src);
 
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
@@ -694,7 +695,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
         int32_t const_flags, size_t min_world, size_t max_world,
         uint32_t effects, jl_value_t *analysis_results,
         jl_debuginfo_t *di, jl_svec_t *edges /* , int absolute_max*/);
-JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, int compiled) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_code_instance_t *jl_get_ci_equiv(jl_code_instance_t *ci JL_PROPAGATES_ROOT, size_t target_world) JL_NOTSAFEPOINT;
 
 STATIC_INLINE jl_method_instance_t *jl_get_ci_mi(jl_code_instance_t *ci JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {

From f70dfcf60f34c277b29dfd8f8fa6ac7db97b3718 Mon Sep 17 00:00:00 2001
From: Neven Sajko <4944410+nsajko@users.noreply.github.com>
Date: Mon, 31 Mar 2025 10:00:17 +0200
Subject: [PATCH 13/20] `_precompilepkgs`: interactive progress display: fix
 unintended capture (#57932)

The variable `str` also exists in one of the enclosing closures. Use a
new variable, as was surely intended, instead of capturing and mutating
the `str`.

Improves the sysimage's resistance to invalidation.

(cherry picked from commit fcf492d4b39c2becde6cd5ca3bb63fcbc7a308d3)
---
 base/precompilation.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/precompilation.jl b/base/precompilation.jl
index 5392e119d25a2..3ab9fcad5aee6 100644
--- a/base/precompilation.jl
+++ b/base/precompilation.jl
@@ -835,8 +835,8 @@ function _precompilepkgs(pkgs::Vector{String},
                         # window between print cycles
                         termwidth = displaysize(io)[2] - 4
                         if !final_loop
-                            str = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io)
-                            print(iostr, Base._truncate_at_width_or_chars(true, str, termwidth), "\n")
+                            s = sprint(io -> show_progress(io, bar; termwidth, carriagereturn=false); context=io)
+                            print(iostr, Base._truncate_at_width_or_chars(true, s, termwidth), "\n")
                         end
                         for pkg_config in pkg_queue_show
                             dep, config = pkg_config

From 594b17d99ddd02b8416b1949c44428d014c05efc Mon Sep 17 00:00:00 2001
From: Cody Tapscott <84105208+topolarity@users.noreply.github.com>
Date: Mon, 31 Mar 2025 04:03:19 -0400
Subject: [PATCH 14/20] staticdata: Memoize `type_in_worklist` query (#57917)

When pre-compiling `stdlib/` this cache has a 91% hit rate, so this
seems fairly profitable. It also dramatically improves some pathological
cases, a few of which have been hit in the wild (arguably due to
inference bugs)

Without this PR, this package takes exponentially long to pre-compile:
```julia
function BigType(N)
    (N == 0) && return Nothing
    T = BigType(N-1)
    return Pair{T,T}
end
foo(::Type{T}) where T = T
precompile(foo, (Type{BigType(40)},))
```

For an in-the-wild test case hit by a customer, this reduces
pre-compilation time from over an hour to just ~two and a half minutes.

Resolves #53331.

(cherry picked from commit 89271dc5a8a3236a148413409abb019bf973f6de)
---
 src/staticdata.c       | 76 +++++++++++++++++++++++++++---------------
 src/staticdata_utils.c | 70 +++++++++++++++++++++++---------------
 2 files changed, 94 insertions(+), 52 deletions(-)

diff --git a/src/staticdata.c b/src/staticdata.c
index 62f3feeaa2159..cab6e6be510b0 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -92,6 +92,22 @@ static const size_t WORLD_AGE_REVALIDATION_SENTINEL = 0x1;
 JL_DLLEXPORT size_t jl_require_world = ~(size_t)0;
 JL_DLLEXPORT _Atomic(size_t) jl_first_image_replacement_world = ~(size_t)0;
 
+// This structure is used to store hash tables for the memoization
+// of queries in staticdata.c (currently only `type_in_worklist`).
+typedef struct {
+    htable_t type_in_worklist;
+} jl_query_cache;
+
+static void init_query_cache(jl_query_cache *cache)
+{
+    htable_new(&cache->type_in_worklist, 0);
+}
+
+static void destroy_query_cache(jl_query_cache *cache)
+{
+    htable_free(&cache->type_in_worklist);
+}
+
 #include "staticdata_utils.c"
 #include "precompile_utils.c"
 
@@ -555,6 +571,7 @@ typedef struct {
     jl_array_t *method_roots_list;
     htable_t method_roots_index;
     uint64_t worklist_key;
+    jl_query_cache *query_cache;
     jl_ptls_t ptls;
     jl_image_t *image;
     int8_t incremental;
@@ -702,14 +719,13 @@ static int jl_needs_serialization(jl_serializer_state *s, jl_value_t *v) JL_NOTS
     return 1;
 }
 
-
-static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT
+static int caching_tag(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT
 {
     if (jl_is_method_instance(v)) {
         jl_method_instance_t *mi = (jl_method_instance_t*)v;
         jl_value_t *m = mi->def.value;
         if (jl_is_method(m) && jl_object_in_image(m))
-            return 1 + type_in_worklist(mi->specTypes);
+            return 1 + type_in_worklist(mi->specTypes, query_cache);
     }
     if (jl_is_binding(v)) {
         jl_globalref_t *gr = ((jl_binding_t*)v)->globalref;
@@ -724,24 +740,24 @@ static int caching_tag(jl_value_t *v) JL_NOTSAFEPOINT
         if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars)
             return 0; // aka !is_cacheable from jltypes.c
         if (jl_object_in_image((jl_value_t*)dt->name))
-            return 1 + type_in_worklist(v);
+            return 1 + type_in_worklist(v, query_cache);
     }
     jl_value_t *dtv = jl_typeof(v);
     if (jl_is_datatype_singleton((jl_datatype_t*)dtv)) {
-        return 1 - type_in_worklist(dtv); // these are already recached in the datatype in the image
+        return 1 - type_in_worklist(dtv, query_cache); // these are already recached in the datatype in the image
     }
     return 0;
 }
 
-static int needs_recaching(jl_value_t *v) JL_NOTSAFEPOINT
+static int needs_recaching(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT
 {
-    return caching_tag(v) == 2;
+    return caching_tag(v, query_cache) == 2;
 }
 
-static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT
+static int needs_uniquing(jl_value_t *v, jl_query_cache *query_cache) JL_NOTSAFEPOINT
 {
     assert(!jl_object_in_image(v));
-    return caching_tag(v) == 1;
+    return caching_tag(v, query_cache) == 1;
 }
 
 static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT
@@ -861,7 +877,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
         jl_datatype_t *dt = (jl_datatype_t*)v;
         // ensure all type parameters are recached
         jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1);
-        if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance)) {
+        if (jl_is_datatype_singleton(dt) && needs_uniquing(dt->instance, s->query_cache)) {
             assert(jl_needs_serialization(s, dt->instance)); // should be true, since we visited dt
             // do not visit dt->instance for our template object as it leads to unwanted cycles here
             // (it may get serialized from elsewhere though)
@@ -872,7 +888,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     if (s->incremental && jl_is_method_instance(v)) {
         jl_method_instance_t *mi = (jl_method_instance_t*)v;
         jl_value_t *def = mi->def.value;
-        if (needs_uniquing(v)) {
+        if (needs_uniquing(v, s->query_cache)) {
             // we only need 3 specific fields of this (the rest are not used)
             jl_queue_for_serialization(s, mi->def.value);
             jl_queue_for_serialization(s, mi->specTypes);
@@ -887,7 +903,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
             record_field_change((jl_value_t**)&mi->cache, NULL);
         }
         else {
-            assert(!needs_recaching(v));
+            assert(!needs_recaching(v, s->query_cache));
         }
         // n.b. opaque closures cannot be inspected and relied upon like a
         // normal method since they can get improperly introduced by generated
@@ -897,7 +913,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
         // error now.
     }
     if (s->incremental && jl_is_binding(v)) {
-        if (needs_uniquing(v)) {
+        if (needs_uniquing(v, s->query_cache)) {
             jl_binding_t *b = (jl_binding_t*)v;
             jl_queue_for_serialization(s, b->globalref->mod);
             jl_queue_for_serialization(s, b->globalref->name);
@@ -1121,9 +1137,9 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i
     // Items that require postorder traversal must visit their children prior to insertion into
     // the worklist/serialization_order (and also before their first use)
     if (s->incremental && !immediate) {
-        if (jl_is_datatype(t) && needs_uniquing(v))
+        if (jl_is_datatype(t) && needs_uniquing(v, s->query_cache))
             immediate = 1;
-        if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v))
+        if (jl_is_datatype_singleton((jl_datatype_t*)t) && needs_uniquing(v, s->query_cache))
             immediate = 1;
     }
 
@@ -1286,7 +1302,7 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
 
 static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t offset) JL_NOTSAFEPOINT
 {
-    if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
+    if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld, s->query_cache)) {
         if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
             arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
         else if (jl_is_method_instance(fld) || jl_is_binding(fld))
@@ -1510,7 +1526,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
         // write header
         if (object_id_expected)
             write_uint(f, jl_object_id(v));
-        if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t))
+        if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t, s->query_cache))
             arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(f)|1));
         if (f == s->const_data)
             write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED | GC_IN_IMAGE);
@@ -1521,7 +1537,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
         layout_table.items[item] = (void*)(reloc_offset | (f == s->const_data)); // store the inverse mapping of `serialization_order` (`id` => object-as-streampos)
 
         if (s->incremental) {
-            if (needs_uniquing(v)) {
+            if (needs_uniquing(v, s->query_cache)) {
                 if (jl_typetagis(v, jl_binding_type)) {
                     jl_binding_t *b = (jl_binding_t*)v;
                     if (b->globalref == NULL)
@@ -1550,7 +1566,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     assert(jl_is_datatype_singleton(t) && "unreachable");
                 }
             }
-            else if (needs_recaching(v)) {
+            else if (needs_recaching(v, s->query_cache)) {
                 arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
             }
         }
@@ -1985,7 +2001,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     }
                 }
                 void *superidx = ptrhash_get(&serialization_order, dt->super);
-                if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super))
+                if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super, s->query_cache))
                     arraylist_push(&s->uniquing_super, dt->super);
             }
             else if (jl_is_typename(v)) {
@@ -2919,13 +2935,14 @@ JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert)
 static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *newly_inferred,
                            /* outputs */  jl_array_t **extext_methods JL_REQUIRE_ROOTED_SLOT,
                                           jl_array_t **new_ext_cis JL_REQUIRE_ROOTED_SLOT,
-                                          jl_array_t **edges JL_REQUIRE_ROOTED_SLOT)
+                                          jl_array_t **edges JL_REQUIRE_ROOTED_SLOT,
+                                          jl_query_cache *query_cache)
 {
     // extext_methods: [method1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
     // edges: [caller1, ext_targets, ...] for worklist-owned methods calling external methods
 
     // Save the inferred code from newly inferred, external methods
-    *new_ext_cis = queue_external_cis(newly_inferred);
+    *new_ext_cis = queue_external_cis(newly_inferred, query_cache);
 
     // Collect method extensions and edges data
     *extext_methods = jl_alloc_vec_any(0);
@@ -2955,7 +2972,8 @@ static void jl_prepare_serialization_data(jl_array_t *mod_array, jl_array_t *new
 // In addition to the system image (where `worklist = NULL`), this can also save incremental images with external linkage
 static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
                                            jl_array_t *worklist, jl_array_t *extext_methods,
-                                           jl_array_t *new_ext_cis, jl_array_t *edges)
+                                           jl_array_t *new_ext_cis, jl_array_t *edges,
+                                           jl_query_cache *query_cache)
 {
     htable_new(&field_replace, 0);
     htable_new(&bits_replace, 0);
@@ -3062,6 +3080,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     ios_mem(&gvar_record, 0);
     ios_mem(&fptr_record, 0);
     jl_serializer_state s = {0};
+    s.query_cache = query_cache;
     s.incremental = !(worklist == NULL);
     s.s = &sysimg;
     s.const_data = &const_data;
@@ -3422,11 +3441,14 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     int64_t datastartpos = 0;
     JL_GC_PUSH4(&mod_array, &extext_methods, &new_ext_cis, &edges);
 
+    jl_query_cache query_cache;
+    init_query_cache(&query_cache);
+
     if (worklist) {
         mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
         // Generate _native_data`
         if (_native_data != NULL) {
-            jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, NULL);
+            jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, NULL, &query_cache);
             jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
             *_native_data = jl_precompile_worklist(worklist, extext_methods, new_ext_cis);
             jl_precompile_toplevel_module = NULL;
@@ -3457,7 +3479,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     assert((ct->reentrant_timing & 0b1110) == 0);
     ct->reentrant_timing |= 0b1000;
     if (worklist) {
-        jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, &edges);
+        jl_prepare_serialization_data(mod_array, newly_inferred, &extext_methods, &new_ext_cis, &edges, &query_cache);
         if (!emit_split) {
             write_int32(f, 0); // No clone_targets
             write_padding(f, LLT_ALIGN(ios_pos(f), JL_CACHE_BYTE_ALIGNMENT) - ios_pos(f));
@@ -3469,7 +3491,7 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
     }
     if (_native_data != NULL)
         native_functions = *_native_data;
-    jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, edges);
+    jl_save_system_image_to_stream(ff, mod_array, worklist, extext_methods, new_ext_cis, edges, &query_cache);
     if (_native_data != NULL)
         native_functions = NULL;
     // make sure we don't run any Julia code concurrently before this point
@@ -3498,6 +3520,8 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         }
     }
 
+    destroy_query_cache(&query_cache);
+
     JL_GC_POP();
     *s = f;
     if (emit_split)
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
index e9f464b64470e..9bfd4c355efe6 100644
--- a/src/staticdata_utils.c
+++ b/src/staticdata_utils.c
@@ -131,63 +131,81 @@ JL_DLLEXPORT void jl_push_newly_inferred(jl_value_t* ci)
     JL_UNLOCK(&newly_inferred_mutex);
 }
 
-
 // compute whether a type references something internal to worklist
 // and thus could not have existed before deserialize
 // and thus does not need delayed unique-ing
-static int type_in_worklist(jl_value_t *v) JL_NOTSAFEPOINT
+static int type_in_worklist(jl_value_t *v, jl_query_cache *cache) JL_NOTSAFEPOINT
 {
     if (jl_object_in_image(v))
         return 0; // fast-path for rejection
+
+    void *cached = HT_NOTFOUND;
+    if (cache != NULL)
+        cached = ptrhash_get(&cache->type_in_worklist, v);
+
+    // fast-path for memoized results
+    if (cached != HT_NOTFOUND)
+        return cached == v;
+
+    int result = 0;
     if (jl_is_uniontype(v)) {
         jl_uniontype_t *u = (jl_uniontype_t*)v;
-        return type_in_worklist(u->a) ||
-               type_in_worklist(u->b);
+        result = type_in_worklist(u->a, cache) ||
+                 type_in_worklist(u->b, cache);
     }
     else if (jl_is_unionall(v)) {
         jl_unionall_t *ua = (jl_unionall_t*)v;
-        return type_in_worklist((jl_value_t*)ua->var) ||
-               type_in_worklist(ua->body);
+        result = type_in_worklist((jl_value_t*)ua->var, cache) ||
+                 type_in_worklist(ua->body, cache);
     }
     else if (jl_is_typevar(v)) {
         jl_tvar_t *tv = (jl_tvar_t*)v;
-        return type_in_worklist(tv->lb) ||
-               type_in_worklist(tv->ub);
+        result = type_in_worklist(tv->lb, cache) ||
+                 type_in_worklist(tv->ub, cache);
     }
     else if (jl_is_vararg(v)) {
         jl_vararg_t *tv = (jl_vararg_t*)v;
-        if (tv->T && type_in_worklist(tv->T))
-            return 1;
-        if (tv->N && type_in_worklist(tv->N))
-            return 1;
+        result = ((tv->T && type_in_worklist(tv->T, cache)) ||
+                  (tv->N && type_in_worklist(tv->N, cache)));
     }
     else if (jl_is_datatype(v)) {
         jl_datatype_t *dt = (jl_datatype_t*)v;
-        if (!jl_object_in_image((jl_value_t*)dt->name))
-            return 1;
-        jl_svec_t *tt = dt->parameters;
-        size_t i, l = jl_svec_len(tt);
-        for (i = 0; i < l; i++)
-            if (type_in_worklist(jl_tparam(dt, i)))
-                return 1;
+        if (!jl_object_in_image((jl_value_t*)dt->name)) {
+            result = 1;
+        }
+        else {
+            jl_svec_t *tt = dt->parameters;
+            size_t i, l = jl_svec_len(tt);
+            for (i = 0; i < l; i++) {
+                if (type_in_worklist(jl_tparam(dt, i), cache)) {
+                    result = 1;
+                    break;
+                }
+            }
+        }
     }
     else {
-        return type_in_worklist(jl_typeof(v));
+        return type_in_worklist(jl_typeof(v), cache);
     }
-    return 0;
+
+    // Memoize result
+    if (cache != NULL)
+        ptrhash_put(&cache->type_in_worklist, (void*)v, result ? (void*)v : NULL);
+
+    return result;
 }
 
 // When we infer external method instances, ensure they link back to the
 // package. Otherwise they might be, e.g., for external macros.
 // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, arraylist_t *stack)
+static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited, arraylist_t *stack, jl_query_cache *query_cache)
 {
     jl_module_t *mod = mi->def.module;
     if (jl_is_method(mod))
         mod = ((jl_method_t*)mod)->module;
     assert(jl_is_module(mod));
     uint8_t is_precompiled = jl_atomic_load_relaxed(&mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED;
-    if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
+    if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes, query_cache)) {
         return 1;
     }
     if (!mi->backedges) {
@@ -211,7 +229,7 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
         jl_code_instance_t *be;
         i = get_next_edge(mi->backedges, i, NULL, &be);
         JL_GC_PROMISE_ROOTED(be); // get_next_edge propagates the edge for us here
-        int child_found = has_backedge_to_worklist(jl_get_ci_mi(be), visited, stack);
+        int child_found = has_backedge_to_worklist(jl_get_ci_mi(be), visited, stack, query_cache);
         if (child_found == 1 || child_found == 2) {
             // found what we were looking for, so terminate early
             found = 1;
@@ -243,7 +261,7 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
 // from the worklist or explicitly added by a `precompile` statement, and
 // (4) are the most recently computed result for that method.
 // These will be preserved in the image.
-static jl_array_t *queue_external_cis(jl_array_t *list)
+static jl_array_t *queue_external_cis(jl_array_t *list, jl_query_cache *query_cache)
 {
     if (list == NULL)
         return NULL;
@@ -262,7 +280,7 @@ static jl_array_t *queue_external_cis(jl_array_t *list)
         jl_method_instance_t *mi = jl_get_ci_mi(ci);
         jl_method_t *m = mi->def.method;
         if (ci->owner == jl_nothing && jl_atomic_load_relaxed(&ci->inferred) && jl_is_method(m) && jl_object_in_image((jl_value_t*)m->module)) {
-            int found = has_backedge_to_worklist(mi, &visited, &stack);
+            int found = has_backedge_to_worklist(mi, &visited, &stack, query_cache);
             assert(found == 0 || found == 1 || found == 2);
             assert(stack.len == 0);
             if (found == 1 && jl_atomic_load_relaxed(&ci->max_world) == ~(size_t)0) {

From 318028a91464d1b167d0817d2aec80424a4f5479 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Mon, 31 Mar 2025 04:04:21 -0400
Subject: [PATCH 15/20] fix opaque_closure sparam capture (#57928)

Fix #54236
Fix #54357

(cherry picked from commit 3a3926eba563e147d7d6a48c44fb7f47088d24c5)
---
 src/julia-syntax.scm   |  1 +
 test/opaque_closure.jl | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 739fa45e088ca..b5d90a24ea13f 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -4105,6 +4105,7 @@ f(x) = yt(x)
                                            (capt-var-access v fname opaq)
                                            v)))
                                    cvs)))
+               (set-car! (cdddr (lam:vinfo lam2)) '()) ;; must capture static_parameters as values inside opaque_closure
                `(new_opaque_closure
                  ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) ,allow-partial
                  (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs) parsed-method-stack))
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
index 6c988b068a668..7b02578a86621 100644
--- a/test/opaque_closure.jl
+++ b/test/opaque_closure.jl
@@ -390,3 +390,20 @@ let ir = first(only(Base.code_ircode(sin, (Int,))))
     oc = Core.OpaqueClosure(ir; do_compile=false)
     @test oc(1) == sin(1)
 end
+
+function typed_add54236(::Type{T}) where T
+    return @opaque (x::Int)->T(x) + T(1)
+end
+let f = typed_add54236(Float64)
+    @test f isa Core.OpaqueClosure
+    @test f(32) === 33.0
+end
+
+f54357(g, ::Type{AT}) where {AT} = Base.Experimental.@opaque AT->_ (args...) -> g((args::AT)...)
+let f = f54357(+, Tuple{Int,Int})
+    @test f isa Core.OpaqueClosure
+    @test f(32, 34) === 66
+    g = f54357(+, Tuple{Float64,Float64})
+    @test g isa Core.OpaqueClosure
+    @test g(32.0, 34.0) === 66.0
+end

From 7a9c684135b3fd6e8b1a93d5cd86edc547e5fff2 Mon Sep 17 00:00:00 2001
From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com>
Date: Thu, 27 Mar 2025 18:57:51 +0900
Subject: [PATCH 16/20] inference: fix exct modeling of `setglobal!` (#57896)

(cherry picked from commit 626d541de6129179ff3934d6a4f95ef05c4c232c)
---
 Compiler/src/abstractinterpretation.jl | 7 +++----
 Compiler/test/inference.jl             | 5 +++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl
index 1c12bbf0c5e64..2a535cb20b4d0 100644
--- a/Compiler/src/abstractinterpretation.jl
+++ b/Compiler/src/abstractinterpretation.jl
@@ -2561,7 +2561,6 @@ function abstract_eval_setglobalonce!(interp::AbstractInterpreter, sv::AbsIntSta
     end
 end
 
-
 function abstract_eval_replaceglobal!(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, argtypes::Vector{Any})
     if length(argtypes) in (5, 6, 7)
         (M, s, x, v) = argtypes[2], argtypes[3], argtypes[4], argtypes[5]
@@ -3624,7 +3623,7 @@ end
 
 function global_assignment_rt_exct(interp::AbstractInterpreter, sv::AbsIntState, saw_latestworld::Bool, g::GlobalRef, @nospecialize(newty))
     if saw_latestworld
-        return Pair{Any,Any}(newty, Union{ErrorException, TypeError})
+        return Pair{Any,Any}(newty, ErrorException)
     end
     (valid_worlds, ret) = scan_partitions((interp, _, partition)->global_assignment_binding_rt_exct(interp, partition, newty), interp, g, sv.world)
     update_valid_age!(sv, valid_worlds)
@@ -3641,10 +3640,10 @@ function global_assignment_binding_rt_exct(interp::AbstractInterpreter, partitio
     ty = kind == PARTITION_KIND_DECLARED ? Any : partition_restriction(partition)
     wnewty = widenconst(newty)
     if !hasintersect(wnewty, ty)
-        return Pair{Any,Any}(Bottom, TypeError)
+        return Pair{Any,Any}(Bottom, ErrorException)
     elseif !(wnewty <: ty)
         retty = tmeet(typeinf_lattice(interp), newty, ty)
-        return Pair{Any,Any}(retty, TypeError)
+        return Pair{Any,Any}(retty, ErrorException)
     end
     return Pair{Any,Any}(newty, Bottom)
 end
diff --git a/Compiler/test/inference.jl b/Compiler/test/inference.jl
index b77c99513a8b6..ec569a0ba04b5 100644
--- a/Compiler/test/inference.jl
+++ b/Compiler/test/inference.jl
@@ -6194,3 +6194,8 @@ f57292(xs::Union{Tuple{String}, Int}...) = getfield(xs...)
 g57292(xs::String...) = getfield(("abc",), 1, :not_atomic, xs...)
 @test Base.infer_return_type(f57292) == String
 @test Base.infer_return_type(g57292) == String
+
+global invalid_setglobal!_exct_modeling::Int
+@test Base.infer_exception_type((Float64,)) do x
+    setglobal!(@__MODULE__, :invalid_setglobal!_exct_modeling, x)
+end == ErrorException

From fceace7cb912af3c11d2cadf6e8f3b1924e34bbc Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Mon, 31 Mar 2025 04:01:04 -0400
Subject: [PATCH 17/20] fix trimming size regression due to handling binding
 backedges in the wrong place (#57927)

(cherry picked from commit fe613d4129f38efccb7821f0d37523abc649cb91)
---
 src/staticdata.c          | 4 +++-
 test/trimming/trimming.jl | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/staticdata.c b/src/staticdata.c
index cab6e6be510b0..2aa2180fc6923 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -841,7 +841,6 @@ static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_
                      // ... or point to Base functions accessed by the runtime
                      (m == jl_base_module && (!strcmp(jl_symbol_name(b->globalref->name), "wait") ||
                                               !strcmp(jl_symbol_name(b->globalref->name), "task_done_hook"))))) {
-                    record_field_change((jl_value_t**)&b->backedges, NULL);
                     jl_queue_for_serialization(s, b);
                 }
             }
@@ -1081,6 +1080,9 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
                         record_field_change((jl_value_t **)&tn->mt, NULL);
                 }
             }
+            else if (jl_is_binding(v)) {
+                record_field_change((jl_value_t**)&((jl_binding_t*)v)->backedges, NULL);
+            }
         }
         char *data = (char*)jl_data_ptr(v);
         size_t i, np = layout->npointers;
diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl
index 0c5226cba01fe..69d9adf9b003f 100644
--- a/test/trimming/trimming.jl
+++ b/test/trimming/trimming.jl
@@ -4,7 +4,7 @@ let exe_suffix = splitext(Base.julia_exename())[2]
 
     hello_exe = joinpath(@__DIR__, "hello" * exe_suffix)
     @test readchomp(`$hello_exe`) == "Hello, world!"
-    @test filesize(hello_exe) < filesize(unsafe_string(Base.JLOptions().image_file))/10
+    @test filesize(hello_exe) < 2_000_000
 
     basic_jll_exe = joinpath(@__DIR__, "basic_jll" * exe_suffix)
     lines = split(readchomp(`$basic_jll_exe`), "\n")

From 189359e83ac3c5eb78834e15a6c35329fd2632b7 Mon Sep 17 00:00:00 2001
From: Tommy Hofmann <thofma@gmail.com>
Date: Fri, 21 Feb 2025 19:41:27 +0100
Subject: [PATCH 18/20] Clarify disabling of tab-completion hinting in the
 documentation (#57493)

(cherry picked from commit e2cc68ced04dbfa4ea2005a4a44e05fe1e6d6336)
---
 stdlib/REPL/docs/src/index.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index eabd7e729280e..ddd0a0953fcfc 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -343,7 +343,15 @@ mapfoldl mapfoldr
 
 When a single complete tab-complete result is available at the end of an input line and 2 or more characters
 have been typed, a hint of the completion will show in a lighter color.
-This can be disabled via `Base.active_repl.options.hint_tab_completes = false`.
+This can be disabled via `Base.active_repl.options.hint_tab_completes = false` or by adding
+```
+atreplinit() do repl
+    if VERSION >= v"1.11.0-0"
+        repl.options.hint_tab_completes = false
+    end
+end
+```
+to your `~/.julia/config/startup.jl`.
 
 !!! compat "Julia 1.11"
     Tab-complete hinting was added in Julia 1.11

From 4bf5a26baa07e93d311d12afcd35bdb72ecf8be9 Mon Sep 17 00:00:00 2001
From: Kristoffer <kcarlsson89@gmail.com>
Date: Mon, 31 Mar 2025 19:51:25 +0200
Subject: [PATCH 19/20] change file size test for trimming

---
 test/trimming/trimming.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl
index 69d9adf9b003f..4a0bb14ceebe2 100644
--- a/test/trimming/trimming.jl
+++ b/test/trimming/trimming.jl
@@ -4,7 +4,7 @@ let exe_suffix = splitext(Base.julia_exename())[2]
 
     hello_exe = joinpath(@__DIR__, "hello" * exe_suffix)
     @test readchomp(`$hello_exe`) == "Hello, world!"
-    @test filesize(hello_exe) < 2_000_000
+    @test filesize(hello_exe) < 20_000_000
 
     basic_jll_exe = joinpath(@__DIR__, "basic_jll" * exe_suffix)
     lines = split(readchomp(`$basic_jll_exe`), "\n")

From a3b193c48aaf4e6000049b9df375ae0c6fdcf815 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Wed, 26 Mar 2025 18:50:45 -0400
Subject: [PATCH 20/20] add trimming of new usings_backedges and
 scanned_methods fields (#57879)

(cherry picked from commit 8fd3fb118222f3cbdb78c53c685e5ec994fca125)
---
 src/staticdata.c          | 16 +++++++++++-----
 test/trimming/trimming.jl |  2 +-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/staticdata.c b/src/staticdata.c
index 2aa2180fc6923..b51013e7e0563 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -851,8 +851,14 @@ static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_
         jl_queue_for_serialization(s, module_usings_getmod(m, i));
     }
 
-    jl_queue_for_serialization(s, m->usings_backedges);
-    jl_queue_for_serialization(s, m->scanned_methods);
+    if (jl_options.trim || jl_options.strip_ir) {
+        record_field_change((jl_value_t**)&m->usings_backedges, jl_nothing);
+        record_field_change((jl_value_t**)&m->scanned_methods, jl_nothing);
+    }
+    else {
+        jl_queue_for_serialization(s, m->usings_backedges);
+        jl_queue_for_serialization(s, m->scanned_methods);
+    }
 }
 
 // Anything that requires uniquing or fixing during deserialization needs to be "toplevel"
@@ -1367,10 +1373,10 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
         newm->line = 0;
     newm->usings_backedges = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, usings_backedges)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->usings_backedges, s->link_ids_relocs));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, get_replaceable_field(&m->usings_backedges, 1), s->link_ids_relocs));
     newm->scanned_methods = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, scanned_methods)));
-    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->scanned_methods, s->link_ids_relocs));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, get_replaceable_field(&m->scanned_methods, 1), s->link_ids_relocs));
 
     // After reload, everything that has happened in this process happened semantically at
     // (for .incremental) or before jl_require_world, so reset this flag.
@@ -3635,7 +3641,7 @@ static int jl_validate_binding_partition(jl_binding_t *b, jl_binding_partition_t
         jl_sym_t *name = b->globalref->name;
         JL_LOCK(&mod->lock);
         jl_atomic_store_release(&mod->export_set_changed_since_require_world, 1);
-        if (mod->usings_backedges) {
+        if (mod->usings_backedges != jl_nothing) {
             for (size_t i = 0; i < jl_array_len(mod->usings_backedges); i++) {
                 jl_module_t *edge = (jl_module_t*)jl_array_ptr_ref(mod->usings_backedges, i);
                 jl_binding_t *importee = jl_get_module_binding(edge, name, 0);
diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl
index 4a0bb14ceebe2..a752c69460ad4 100644
--- a/test/trimming/trimming.jl
+++ b/test/trimming/trimming.jl
@@ -4,7 +4,7 @@ let exe_suffix = splitext(Base.julia_exename())[2]
 
     hello_exe = joinpath(@__DIR__, "hello" * exe_suffix)
     @test readchomp(`$hello_exe`) == "Hello, world!"
-    @test filesize(hello_exe) < 20_000_000
+    @test filesize(hello_exe) < 2000000
 
     basic_jll_exe = joinpath(@__DIR__, "basic_jll" * exe_suffix)
     lines = split(readchomp(`$basic_jll_exe`), "\n")