From 86cb6c7ac7ed2b39e375e1c3a6013a86ec0f0d9c Mon Sep 17 00:00:00 2001
From: Andrew Adams <andrew.b.adams@gmail.com>
Date: Tue, 26 Oct 2021 16:12:52 -0700
Subject: [PATCH] Deprecate JIT runtime override methods that take void *
 (#6344)

* Deprecate JIT runtime override methods that take void *

* Clean up comments
---
 src/Func.cpp                                  | 21 ++--
 src/Func.h                                    | 93 ++----------------
 src/JITModule.cpp                             |  4 +-
 src/JITModule.h                               | 74 +++++++++++++-
 src/Pipeline.h                                | 96 ++-----------------
 src/runtime/HalideRuntime.h                   |  6 +-
 test/correctness/align_bounds.cpp             |  8 +-
 .../assertion_failure_in_parallel_for.cpp     |  4 +-
 test/correctness/compute_with.cpp             | 38 ++++----
 test/correctness/constraints.cpp              | 14 +--
 test/correctness/custom_allocator.cpp         |  9 +-
 test/correctness/extern_error.cpp             |  6 +-
 test/correctness/force_onto_stack.cpp         | 19 ++--
 test/correctness/gpu_assertion_in_kernel.cpp  |  8 +-
 test/correctness/heap_cleanup.cpp             | 13 +--
 test/correctness/input_image_bounds_check.cpp | 10 +-
 .../input_larger_than_two_gigs.cpp            |  8 +-
 test/correctness/iterate_over_circle.cpp      |  4 +-
 test/correctness/load_library.cpp             |  4 +-
 .../loop_invariant_extern_calls.cpp           |  6 +-
 test/correctness/memoize.cpp                  | 13 +--
 .../multi_output_pipeline_with_bad_sizes.cpp  |  4 +-
 test/correctness/nested_tail_strategies.cpp   |  9 +-
 test/correctness/out_of_memory.cpp            | 13 +--
 .../output_larger_than_two_gigs.cpp           |  8 +-
 test/correctness/parameter_constraints.cpp    |  6 +-
 test/correctness/print.cpp                    | 18 ++--
 test/correctness/pseudostack_shares_slots.cpp | 12 ++-
 .../realize_larger_than_two_gigs.cpp          |  8 +-
 .../correctness/reduction_non_rectangular.cpp | 18 ++--
 test/correctness/reorder_storage.cpp          |  9 +-
 test/correctness/require.cpp                  | 10 +-
 test/correctness/reschedule.cpp               |  4 +-
 test/correctness/rfactor.cpp                  |  4 +-
 test/correctness/set_custom_trace.cpp         |  4 +-
 test/correctness/skip_stages_memoize.cpp      | 12 +--
 test/correctness/sliding_window.cpp           |  9 +-
 test/correctness/specialize.cpp               | 25 ++---
 test/correctness/stack_allocations.cpp        |  9 +-
 .../stencil_chain_in_update_definitions.cpp   |  4 +-
 test/correctness/storage_folding.cpp          | 38 +++-----
 test/correctness/store_in.cpp                 |  9 +-
 test/correctness/tracing.cpp                  |  4 +-
 test/correctness/tracing_broadcast.cpp        |  4 +-
 test/correctness/tracing_stack.cpp            |  4 +-
 test/correctness/uninitialized_read.cpp       |  4 +-
 test/correctness/vectorize_guard_with_if.cpp  |  4 +-
 ...ealize_constantly_larger_than_two_gigs.cpp | 10 +-
 test/error/undefined_rdom_dimension.cpp       |  8 +-
 test/performance/memory_profiler.cpp          | 22 ++---
 test/performance/profiler.cpp                 |  6 +-
 test/warning/require_const_false.cpp          |  4 +-
 52 files changed, 341 insertions(+), 420 deletions(-)
diff --git a/src/Func.cpp b/src/Func.cpp
index 03fba3629da6..35e89f57cc71 100644
--- a/src/Func.cpp
+++ b/src/Func.cpp
@@ -3275,29 +3275,38 @@ void Func::compile_to_assembly(const string &filename, const vector<Argument> &a
 
 // JIT-related code
 
+namespace {
+template<typename A, typename B>
+void set_handler(A &a, B b) {
+    a = (A)b;
+}
+}  // namespace
+
+// Deprecated setters for JIT handlers
 void Func::set_error_handler(void (*handler)(void *, const char *)) {
-    pipeline().set_error_handler(handler);
+    set_handler(jit_handlers().custom_error, handler);
 }
 
 void Func::set_custom_allocator(void *(*cust_malloc)(void *, size_t),
                                 void (*cust_free)(void *, void *)) {
-    pipeline().set_custom_allocator(cust_malloc, cust_free);
+    set_handler(jit_handlers().custom_malloc, cust_malloc);
+    set_handler(jit_handlers().custom_free, cust_free);
 }
 
 void Func::set_custom_do_par_for(int (*cust_do_par_for)(void *, int (*)(void *, int, uint8_t *), int, int, uint8_t *)) {
-    pipeline().set_custom_do_par_for(cust_do_par_for);
+    set_handler(jit_handlers().custom_do_par_for, cust_do_par_for);
 }
 
 void Func::set_custom_do_task(int (*cust_do_task)(void *, int (*)(void *, int, uint8_t *), int, uint8_t *)) {
-    pipeline().set_custom_do_task(cust_do_task);
+    set_handler(jit_handlers().custom_do_task, cust_do_task);
 }
 
 void Func::set_custom_trace(int (*trace_fn)(void *, const halide_trace_event_t *)) {
-    pipeline().set_custom_trace(trace_fn);
+    set_handler(jit_handlers().custom_trace, trace_fn);
 }
 
 void Func::set_custom_print(void (*cust_print)(void *, const char *)) {
-    pipeline().set_custom_print(cust_print);
+    set_handler(jit_handlers().custom_print, cust_print);
 }
 
 void Func::add_custom_lowering_pass(IRMutator *pass, std::function<void()> deleter) {
diff --git a/src/Func.h b/src/Func.h
index c99835a987c2..bbfffeab92d5 100644
--- a/src/Func.h
+++ b/src/Func.h
@@ -1048,101 +1048,28 @@ class Func {
      */
     void compile_jit(const Target &target = get_jit_target_from_environment());
 
-    /** Set the error handler function that be called in the case of
-     * runtime errors during halide pipelines. If you are compiling
-     * statically, you can also just define your own function with
-     * signature
-     \code
-     extern "C" void halide_error(void *user_context, const char *);
-     \endcode
-     * This will clobber Halide's version.
-     */
+    /** Deprecated variants of the above that use a void pointer
+     * instead of a JITUserContext pointer. */
+    // @{
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_error_handler(void (*handler)(void *, const char *));
-
-    /** Set a custom malloc and free for halide to use. Malloc should
-     * return 32-byte aligned chunks of memory, and it should be safe
-     * for Halide to read slightly out of bounds (up to 8 bytes before
-     * the start or beyond the end). If compiling statically, routines
-     * with appropriate signatures can be provided directly
-    \code
-     extern "C" void *halide_malloc(void *, size_t)
-     extern "C" void halide_free(void *, void *)
-     \endcode
-     * These will clobber Halide's versions. See HalideRuntime.h
-     * for declarations.
-     */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_allocator(void *(*malloc)(void *, size_t),
                               void (*free)(void *, void *));
-
-    /** Set a custom task handler to be called by the parallel for
-     * loop. It is useful to set this if you want to do some
-     * additional bookkeeping at the granularity of parallel
-     * tasks. The default implementation does this:
-     \code
-     extern "C" int halide_do_task(void *user_context,
-                                   int (*f)(void *, int, uint8_t *),
-                                   int idx, uint8_t *state) {
-         return f(user_context, idx, state);
-     }
-     \endcode
-     * If you are statically compiling, you can also just define your
-     * own version of the above function, and it will clobber Halide's
-     * version.
-     *
-     * If you're trying to use a custom parallel runtime, you probably
-     * don't want to call this. See instead \ref Func::set_custom_do_par_for .
-    */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_do_task(
         int (*custom_do_task)(void *, int (*)(void *, int, uint8_t *),
                               int, uint8_t *));
-
-    /** Set a custom parallel for loop launcher. Useful if your app
-     * already manages a thread pool. The default implementation is
-     * equivalent to this:
-     \code
-     extern "C" int halide_do_par_for(void *user_context,
-                                      int (*f)(void *, int, uint8_t *),
-                                      int min, int extent, uint8_t *state) {
-         int exit_status = 0;
-         parallel for (int idx = min; idx < min+extent; idx++) {
-             int job_status = halide_do_task(user_context, f, idx, state);
-             if (job_status) exit_status = job_status;
-         }
-         return exit_status;
-     }
-     \endcode
-     *
-     * However, notwithstanding the above example code, if one task
-     * fails, we may skip over other tasks, and if two tasks return
-     * different error codes, we may select one arbitrarily to return.
-     *
-     * If you are statically compiling, you can also just define your
-     * own version of the above function, and it will clobber Halide's
-     * version.
-     */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_do_par_for(
         int (*custom_do_par_for)(void *, int (*)(void *, int, uint8_t *), int,
                                  int, uint8_t *));
-
-    /** Set custom routines to call when tracing is enabled. Call this
-     * on the output Func of your pipeline. This then sets custom
-     * routines for the entire pipeline, not just calls to this
-     * Func.
-     *
-     * If you are statically compiling, you can also just define your
-     * own versions of the tracing functions (see HalideRuntime.h),
-     * and they will clobber Halide's versions. */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_trace(int (*trace_fn)(void *, const halide_trace_event_t *));
 
-    /** Set the function called to print messages from the runtime.
-     * If you are compiling statically, you can also just define your
-     * own function with signature
-     \code
-     extern "C" void halide_print(void *user_context, const char *);
-     \endcode
-     * This will clobber Halide's version.
-     */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_print(void (*handler)(void *, const char *));
+    // @}
 
     /** Get a struct containing the currently set custom functions
      * used by JIT. This can be mutated. Changes will take effect the
diff --git a/src/JITModule.cpp b/src/JITModule.cpp
index 372870bf4b59..0033cb212c16 100644
--- a/src/JITModule.cpp
+++ b/src/JITModule.cpp
@@ -532,7 +532,7 @@ void free_handler(JITUserContext *context, void *ptr) {
     }
 }
 
-int do_task_handler(JITUserContext *context, halide_task_t f, int idx,
+int do_task_handler(JITUserContext *context, int (*f)(JITUserContext *, int, uint8_t *), int idx,
                     uint8_t *closure) {
     if (context) {
         return (*context->handlers.custom_do_task)(context, f, idx, closure);
@@ -541,7 +541,7 @@ int do_task_handler(JITUserContext *context, halide_task_t f, int idx,
     }
 }
 
-int do_par_for_handler(JITUserContext *context, halide_task_t f,
+int do_par_for_handler(JITUserContext *context, int (*f)(JITUserContext *, int, uint8_t *),
                        int min, int size, uint8_t *closure) {
     if (context) {
         return (*context->handlers.custom_do_par_for)(context, f, min, size, closure);
diff --git a/src/JITModule.h b/src/JITModule.h
index ca23e67e6f3d..5208d31698ac 100644
--- a/src/JITModule.h
+++ b/src/JITModule.h
@@ -26,17 +26,85 @@ class Module;
 
 struct JITUserContext;
 
-/** A set of custom overrides of runtime functions */
+/** A set of custom overrides of runtime functions. These only apply
+ * when JIT-compiling code. If you are doing AOT compilation, see
+ * HalideRuntime.h for instructions on how to replace runtime
+ * functions. */
 struct JITHandlers {
+    /** Set the function called to print messages from the runtime. */
     void (*custom_print)(JITUserContext *, const char *){nullptr};
+
+    /** A custom malloc and free for halide to use. Malloc should
+     * return 32-byte aligned chunks of memory, and it should be safe
+     * for Halide to read slightly out of bounds (up to 8 bytes before
+     * the start or beyond the end). */
+    // @{
     void *(*custom_malloc)(JITUserContext *, size_t){nullptr};
     void (*custom_free)(JITUserContext *, void *){nullptr};
-    int (*custom_do_task)(JITUserContext *, halide_task_t, int, uint8_t *){nullptr};
-    int (*custom_do_par_for)(JITUserContext *, halide_task_t, int, int, uint8_t *){nullptr};
+    // @}
+
+    /** A custom task handler to be called by the parallel for
+     * loop. It is useful to set this if you want to do some
+     * additional bookkeeping at the granularity of parallel
+     * tasks. The default implementation does this:
+     \code
+     extern "C" int halide_do_task(JITUserContext *user_context,
+                                   int (*f)(void *, int, uint8_t *),
+                                   int idx, uint8_t *state) {
+         return f(user_context, idx, state);
+     }
+     \endcode
+     *
+     * If you're trying to use a custom parallel runtime, you probably
+     * don't want to call this. See instead custom_do_par_for.
+    */
+    int (*custom_do_task)(JITUserContext *, int (*)(JITUserContext *, int, uint8_t *), int, uint8_t *){nullptr};
+
+    /** A custom parallel for loop launcher. Useful if your app
+     * already manages a thread pool. The default implementation is
+     * equivalent to this:
+     \code
+     extern "C" int halide_do_par_for(JITUserContext *user_context,
+                                      int (*f)(void *, int, uint8_t *),
+                                      int min, int extent, uint8_t *state) {
+         int exit_status = 0;
+         parallel for (int idx = min; idx < min+extent; idx++) {
+             int job_status = halide_do_task(user_context, f, idx, state);
+             if (job_status) exit_status = job_status;
+         }
+         return exit_status;
+     }
+     \endcode
+     *
+     * However, notwithstanding the above example code, if one task
+     * fails, we may skip over other tasks, and if two tasks return
+     * different error codes, we may select one arbitrarily to return.
+     */
+    int (*custom_do_par_for)(JITUserContext *, int (*)(JITUserContext *, int, uint8_t *), int, int, uint8_t *){nullptr};
+
+    /** The error handler function that be called in the case of
+     * runtime errors during halide pipelines. */
     void (*custom_error)(JITUserContext *, const char *){nullptr};
+
+    /** A custom routine to call when tracing is enabled. Call this
+     * on the output Func of your pipeline. This then sets custom
+     * routines for the entire pipeline, not just calls to this
+     * Func. */
     int32_t (*custom_trace)(JITUserContext *, const halide_trace_event_t *){nullptr};
+
+    /** A method to use for Halide to resolve symbol names dynamically
+     * in the calling process or library from within the Halide
+     * runtime. Equivalent to dlsym with a null first argument. */
     void *(*custom_get_symbol)(const char *name){nullptr};
+
+    /** A method to use for Halide to dynamically load libraries from
+     * within the runtime. Equivalent to dlopen. Returns a handle to
+     * the opened library. */
     void *(*custom_load_library)(const char *name){nullptr};
+
+    /** A method to use for Halide to dynamically find a symbol within
+     * an opened library. Equivalent to dlsym. Takes a handle
+     * returned by custom_load_library as the first argument. */
     void *(*custom_get_library_symbol)(void *lib, const char *name){nullptr};
 };
 
diff --git a/src/Pipeline.h b/src/Pipeline.h
index 3e9c11710d06..35644be3ba3e 100644
--- a/src/Pipeline.h
+++ b/src/Pipeline.h
@@ -349,103 +349,27 @@ class Pipeline {
      */
     void compile_jit(const Target &target = get_jit_target_from_environment());
 
-    // TODO: deprecate all of these and replace with versions that take a JITUserContext
-
-    /** Set the error handler function that be called in the case of
-     * runtime errors during halide pipelines. If you are compiling
-     * statically, you can also just define your own function with
-     * signature
-     \code
-     extern "C" void halide_error(void *user_context, const char *);
-     \endcode
-     * This will clobber Halide's version.
-     */
+    /** Deprecated variants of the above that use a void pointer
+     * instead of a JITUserContext pointer. */
+    // @{
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_error_handler(void (*handler)(void *, const char *));
-
-    /** Set a custom malloc and free for halide to use. Malloc should
-     * return 32-byte aligned chunks of memory, and it should be safe
-     * for Halide to read slightly out of bounds (up to 8 bytes before
-     * the start or beyond the end). If compiling statically, routines
-     * with appropriate signatures can be provided directly
-    \code
-     extern "C" void *halide_malloc(void *, size_t)
-     extern "C" void halide_free(void *, void *)
-     \endcode
-     * These will clobber Halide's versions. See HalideRuntime.h
-     * for declarations.
-     */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_allocator(void *(*malloc)(void *, size_t),
                               void (*free)(void *, void *));
-
-    /** Set a custom task handler to be called by the parallel for
-     * loop. It is useful to set this if you want to do some
-     * additional bookkeeping at the granularity of parallel
-     * tasks. The default implementation does this:
-     \code
-     extern "C" int halide_do_task(void *user_context,
-                                   int (*f)(void *, int, uint8_t *),
-                                   int idx, uint8_t *state) {
-         return f(user_context, idx, state);
-     }
-     \endcode
-     * If you are statically compiling, you can also just define your
-     * own version of the above function, and it will clobber Halide's
-     * version.
-     *
-     * If you're trying to use a custom parallel runtime, you probably
-     * don't want to call this. See instead \ref Func::set_custom_do_par_for .
-    */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_do_task(
         int (*custom_do_task)(void *, int (*)(void *, int, uint8_t *),
                               int, uint8_t *));
-
-    /** Set a custom parallel for loop launcher. Useful if your app
-     * already manages a thread pool. The default implementation is
-     * equivalent to this:
-     \code
-     extern "C" int halide_do_par_for(void *user_context,
-                                      int (*f)(void *, int, uint8_t *),
-                                      int min, int extent, uint8_t *state) {
-         int exit_status = 0;
-         parallel for (int idx = min; idx < min+extent; idx++) {
-             int job_status = halide_do_task(user_context, f, idx, state);
-             if (job_status) exit_status = job_status;
-         }
-         return exit_status;
-     }
-     \endcode
-     *
-     * However, notwithstanding the above example code, if one task
-     * fails, we may skip over other tasks, and if two tasks return
-     * different error codes, we may select one arbitrarily to return.
-     *
-     * If you are statically compiling, you can also just define your
-     * own version of the above function, and it will clobber Halide's
-     * version.
-     */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_do_par_for(
         int (*custom_do_par_for)(void *, int (*)(void *, int, uint8_t *), int,
                                  int, uint8_t *));
-
-    /** Set custom routines to call when tracing is enabled. Call this
-     * on the output Func of your pipeline. This then sets custom
-     * routines for the entire pipeline, not just calls to this
-     * Func.
-     *
-     * If you are statically compiling, you can also just define your
-     * own versions of the tracing functions (see HalideRuntime.h),
-     * and they will clobber Halide's versions. */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_trace(int (*trace_fn)(void *, const halide_trace_event_t *));
-
-    /** Set the function called to print messages from the runtime.
-     * If you are compiling statically, you can also just define your
-     * own function with signature
-     \code
-     extern "C" void halide_print(void *user_context, const char *);
-     \endcode
-     * This will clobber Halide's version.
-     */
+    HALIDE_ATTRIBUTE_DEPRECATED("Custom handlers should by set by modifying the struct returned by jit_handlers()")
     void set_custom_print(void (*handler)(void *, const char *));
+    // @}
 
     /** Install a set of external C functions or Funcs to satisfy
      * dependencies introduced by HalideExtern and define_extern
diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h
index 07800f2e3ceb..394a45c17cfa 100644
--- a/src/runtime/HalideRuntime.h
+++ b/src/runtime/HalideRuntime.h
@@ -62,9 +62,9 @@ extern "C" {
  * replaced with user-defined versions by defining an extern "C"
  * function with the same name and signature.
  *
- * When doing Just In Time (JIT) compilation methods on the Func being
- * compiled must be called instead. The corresponding methods are
- * documented below.
+ * When doing Just In Time (JIT) compilation members of
+ * some_pipeline_or_func.jit_handlers() must be replaced instead. The
+ * corresponding methods are documented below.
  *
  * All of these functions take a "void *user_context" parameter as their
  * first argument; if the Halide kernel that calls back to any of these
diff --git a/test/correctness/align_bounds.cpp b/test/correctness/align_bounds.cpp
index 79abc8a5a991..e8b553a07fed 100644
--- a/test/correctness/align_bounds.cpp
+++ b/test/correctness/align_bounds.cpp
@@ -14,7 +14,7 @@ class CheckForSelects : public IRVisitor {
 };
 
 int trace_min, trace_extent;
-int my_trace(void *user_context, const halide_trace_event_t *e) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     if (e->event == 2) {
         trace_min = e->coordinates[0];
         trace_extent = e->coordinates[1];
@@ -48,7 +48,7 @@ int main(int argc, char **argv) {
         }
 
         p.set(3);
-        h.set_custom_trace(my_trace);
+        h.jit_handlers().custom_trace = my_trace;
         Buffer<int> result = h.realize({10});
 
         for (int i = 0; i < 10; i++) {
@@ -111,7 +111,7 @@ int main(int argc, char **argv) {
         }
 
         p.set(3);
-        h.set_custom_trace(my_trace);
+        h.jit_handlers().custom_trace = my_trace;
         Buffer<int> result = h.realize({10});
 
         for (int i = 0; i < 10; i++) {
@@ -162,7 +162,7 @@ int main(int argc, char **argv) {
         g.compute_root().align_extent(x, 32).trace_realizations();
 
         p.set(3);
-        h.set_custom_trace(my_trace);
+        h.jit_handlers().custom_trace = my_trace;
         Buffer<int> result = h.realize({10});
 
         for (int i = 0; i < 10; i++) {
diff --git a/test/correctness/assertion_failure_in_parallel_for.cpp b/test/correctness/assertion_failure_in_parallel_for.cpp
index 9fbd72b05804..8042fff93bf8 100644
--- a/test/correctness/assertion_failure_in_parallel_for.cpp
+++ b/test/correctness/assertion_failure_in_parallel_for.cpp
@@ -6,7 +6,7 @@ using namespace Halide;
 
 std::atomic<bool> error_occurred{false};
 
-void halide_error(void *ctx, const char *msg) {
+void halide_error(JITUserContext *ctx, const char *msg) {
     printf("Expected: %s\n", msg);
     error_occurred = true;
 }
@@ -35,7 +35,7 @@ int main(int argc, char **argv) {
 
     split.set(11);
 
-    g.set_error_handler(&halide_error);
+    g.jit_handlers().custom_error = halide_error;
     g.realize({40, 40});
 
     if (!error_occurred) {
diff --git a/test/correctness/compute_with.cpp b/test/correctness/compute_with.cpp
index 2feb93e61264..5a5e0d25f0a8 100644
--- a/test/correctness/compute_with.cpp
+++ b/test/correctness/compute_with.cpp
@@ -62,7 +62,7 @@ bool check_coordinates(const Bound &b, const int32_t *coordinates, int32_t dims,
 }
 
 // A trace that check the region accessed by stores/loads of a buffer
-int my_trace(void *user_context, const halide_trace_event_t *e) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     string fname = std::string(e->func);
     if (e->event == halide_trace_store) {
         std::lock_guard<std::mutex> lock(stores_mutex);
@@ -129,7 +129,7 @@ int split_test() {
             {g.name(), Bound(2, 201, -2, 197)},
             {h.name(), Bound()},  // There shouldn't be any load from h
         };
-        h.set_custom_trace(&my_trace);
+        h.jit_handlers().custom_trace = &my_trace;
 
         im = h.realize({200, 200});
     }
@@ -184,7 +184,7 @@ int fuse_test() {
             {g.name(), Bound(-5, size - 6, -6, size - 7, 2, size + 1)},
             {h.name(), Bound()},  // There shouldn't be any load from h
         };
-        h.set_custom_trace(&my_trace);
+        h.jit_handlers().custom_trace = &my_trace;
 
         im = h.realize({size, size, size});
     }
@@ -265,7 +265,7 @@ int multiple_fuse_group_test() {
             {p.name(), Bound(0, 199, 0, 199)},
             {q.name(), Bound()},  // There shouldn't be any load from q
         };
-        q.set_custom_trace(&my_trace);
+        q.jit_handlers().custom_trace = &my_trace;
 
         im = q.realize({200, 200});
     }
@@ -322,7 +322,7 @@ int multiple_outputs_test() {
         };
 
         Pipeline p({f, g});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({f_im, g_im});
     }
 
@@ -403,7 +403,7 @@ int fuse_compute_at_test() {
             {q.name(), Bound(-1, 165, 0, 166)},
             {r.name(), Bound()},  // There shouldn't be any load from r
         };
-        r.set_custom_trace(&my_trace);
+        r.jit_handlers().custom_trace = &my_trace;
 
         im = r.realize({167, 167});
     }
@@ -460,7 +460,7 @@ int double_split_fuse_test() {
             {g.name(), Bound(0, 199, 0, 199)},
             {h.name(), Bound()},  // There shouldn't be any load from h
         };
-        h.set_custom_trace(&my_trace);
+        h.jit_handlers().custom_trace = &my_trace;
 
         im = h.realize({200, 200});
     }
@@ -510,7 +510,7 @@ int rgb_yuv420_test() {
         loads_total = 0;
         stores_total = 0;
         Pipeline p({y_part, u_part, v_part});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({y_im_ref, u_im_ref, v_im_ref}, get_jit_target_from_environment().with_feature(Target::TraceLoads).with_feature(Target::TraceStores));
         load_count_ref = loads_total;
         store_count_ref = stores_total;
@@ -573,7 +573,7 @@ int rgb_yuv420_test() {
         loads_total = 0;
         stores_total = 0;
         Pipeline p({y_part, u_part, v_part});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({y_im, u_im, v_im}, get_jit_target_from_environment().with_feature(Target::TraceLoads).with_feature(Target::TraceStores));
 
         bool too_many_memops = false;
@@ -683,7 +683,7 @@ int vectorize_test() {
             {g.name(), Bound(2, width + 1, -2, height - 3)},
             {h.name(), Bound()},  // There shouldn't be any load from h
         };
-        h.set_custom_trace(&my_trace);
+        h.jit_handlers().custom_trace = &my_trace;
 
         im = h.realize({width, height});
     }
@@ -745,7 +745,7 @@ int some_are_skipped_test() {
             {p.name(), Bound(0, 199, 0, 199)},
             {h.name(), Bound(0, 199, 0, 199)},
         };
-        h.set_custom_trace(&my_trace);
+        h.jit_handlers().custom_trace = &my_trace;
 
         im = h.realize({200, 200});
     }
@@ -886,7 +886,7 @@ int mixed_tile_factor_test() {
         };
 
         Pipeline p({f, g, h});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({f_im, g_im, h_im});
     }
 
@@ -984,7 +984,7 @@ int multi_tile_mixed_tile_factor_test() {
         };
 
         Pipeline p({f, g, h});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({f_im, g_im, h_im});
     }
 
@@ -1075,7 +1075,7 @@ int only_some_are_tiled_test() {
         };
 
         Pipeline p({f, g, h});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({f_im, g_im, h_im});
     }
 
@@ -1145,7 +1145,7 @@ int with_specialization_test() {
             {g.name(), Bound(2, 201, -2, 197)},
             {h.name(), Bound()},  // There shouldn't be any load from h
         };
-        h.set_custom_trace(&my_trace);
+        h.jit_handlers().custom_trace = &my_trace;
 
         tile.set(true);
         im = h.realize({200, 200});
@@ -1212,7 +1212,7 @@ int nested_compute_with_test() {
         };
 
         Pipeline p({g1, g2});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({g1_im, g2_im});
     }
 
@@ -1763,7 +1763,7 @@ int vectorize_inlined_test() {
         loads_total = 0;
         stores_total = 0;
         Pipeline p({h, g});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({h_im_ref, g_im_ref}, get_jit_target_from_environment().with_feature(Target::TraceLoads).with_feature(Target::TraceStores));
         load_count_ref = loads_total;
         store_count_ref = stores_total;
@@ -1799,7 +1799,7 @@ int vectorize_inlined_test() {
         loads_total = 0;
         stores_total = 0;
         Pipeline p({h, g});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({h_im, g_im}, get_jit_target_from_environment().with_feature(Target::TraceLoads).with_feature(Target::TraceStores));
 
         bool too_many_memops = false;
@@ -1945,7 +1945,7 @@ int different_arg_num_compute_at_test() {
         loads_total = 0;
         stores_total = 0;
         Pipeline p({output_a, output_b});
-        p.set_custom_trace(&my_trace);
+        p.jit_handlers().custom_trace = &my_trace;
         p.realize({buffer_a, buffer_b}, get_jit_target_from_environment().with_feature(Target::TraceLoads).with_feature(Target::TraceStores));
 
         bool too_many_memops = false;
diff --git a/test/correctness/constraints.cpp b/test/correctness/constraints.cpp
index b60fed774eea..a08ba3a6b34b 100644
--- a/test/correctness/constraints.cpp
+++ b/test/correctness/constraints.cpp
@@ -7,7 +7,7 @@
 using namespace Halide;
 
 bool error_occurred = false;
-void my_error_handler(void *user_context, const char *msg) {
+void my_error_handler(JITUserContext *user_context, const char *msg) {
     //printf("%s\n", msg);
     error_occurred = true;
 }
@@ -23,7 +23,7 @@ int basic_constraints() {
 
     param.dim(0).set_bounds(0, 128);
 
-    f.set_error_handler(my_error_handler);
+    f.jit_handlers().custom_error = my_error_handler;
 
     // This should be fine
     param.set(image1);
@@ -46,7 +46,7 @@ int basic_constraints() {
 
     // Now try constraining the output buffer of a function
     g(x, y) = x * y;
-    g.set_error_handler(my_error_handler);
+    g.jit_handlers().custom_error = my_error_handler;
     g.output_buffer().dim(0).set_stride(2);
     error_occurred = false;
     g.realize(image1);
@@ -57,7 +57,7 @@ int basic_constraints() {
 
     Func h;
     h(x, y) = x * y;
-    h.set_error_handler(my_error_handler);
+    h.jit_handlers().custom_error = my_error_handler;
     h.output_buffer()
         .dim(0)
         .set_stride(1)
@@ -155,7 +155,7 @@ int unstructured_constraints() {
     pf.add_requirement(param.dim(0).min() == required_min && param.dim(0).extent() == required_extent,
                        "Custom message:", param.dim(0).min(), param.dim(0).max());
 
-    pf.set_error_handler(my_error_handler);
+    pf.jit_handlers().custom_error = my_error_handler;
 
     // This should be fine
     param.set(image1);
@@ -184,7 +184,7 @@ int unstructured_constraints() {
     Param<int> required_stride;
     required_stride.set(2);
     pg.add_requirement(g.output_buffer().dim(0).stride() == required_stride);
-    pg.set_error_handler(my_error_handler);
+    pg.jit_handlers().custom_error = my_error_handler;
 
     error_occurred = false;
     pg.realize(image1);
@@ -197,7 +197,7 @@ int unstructured_constraints() {
     h(x, y) = x * y;
 
     Pipeline ph(h);
-    ph.set_error_handler(my_error_handler);
+    ph.jit_handlers().custom_error = my_error_handler;
     ph.add_requirement(h.output_buffer().dim(0).stride() == 1);
     ph.add_requirement(h.output_buffer().dim(0).min() == 0);
     ph.add_requirement(h.output_buffer().dim(0).extent() % 8 == 0);
diff --git a/test/correctness/custom_allocator.cpp b/test/correctness/custom_allocator.cpp
index 52a57b5bfa12..66978ae51f5e 100644
--- a/test/correctness/custom_allocator.cpp
+++ b/test/correctness/custom_allocator.cpp
@@ -8,7 +8,7 @@ using namespace Halide;
 bool custom_malloc_called = false;
 bool custom_free_called = false;
 
-void *my_malloc(void *user_context, size_t x) {
+void *my_malloc(JITUserContext *user_context, size_t x) {
     custom_malloc_called = true;
     void *orig = malloc(x + 32);
     void *ptr = (void *)((((size_t)orig + 32) >> 5) << 5);
@@ -16,14 +16,14 @@ void *my_malloc(void *user_context, size_t x) {
     return ptr;
 }
 
-void my_free(void *user_context, void *ptr) {
+void my_free(JITUserContext *user_context, void *ptr) {
     custom_free_called = true;
     free(((void **)ptr)[-1]);
 }
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -34,7 +34,8 @@ int main(int argc, char **argv) {
     g(x) = f(x);
     f.compute_root();
 
-    g.set_custom_allocator(my_malloc, my_free);
+    g.jit_handlers().custom_malloc = my_malloc;
+    g.jit_handlers().custom_free = my_free;
 
     Buffer<int> im = g.realize({100000});
 
diff --git a/test/correctness/extern_error.cpp b/test/correctness/extern_error.cpp
index 492914b532b9..a64444a25526 100644
--- a/test/correctness/extern_error.cpp
+++ b/test/correctness/extern_error.cpp
@@ -10,13 +10,13 @@ using namespace Halide;
 #endif
 
 bool extern_error_called = false;
-extern "C" DLLEXPORT int extern_error(void *user_context, halide_buffer_t *out) {
+extern "C" DLLEXPORT int extern_error(JITUserContext *user_context, halide_buffer_t *out) {
     extern_error_called = true;
     return -1;
 }
 
 bool error_occurred = false;
-extern "C" DLLEXPORT void my_halide_error(void *user_context, const char *msg) {
+extern "C" DLLEXPORT void my_halide_error(JITUserContext *user_context, const char *msg) {
     printf("Expected: %s\n", msg);
     error_occurred = true;
 }
@@ -27,7 +27,7 @@ int main(int argc, char **argv) {
 
     Func f;
     f.define_extern("extern_error", args, Float(32), 1);
-    f.set_error_handler(&my_halide_error);
+    f.jit_handlers().custom_error = my_halide_error;
     f.realize({100});
 
     if (!error_occurred || !extern_error_called) {
diff --git a/test/correctness/force_onto_stack.cpp b/test/correctness/force_onto_stack.cpp
index e7060bd6c62a..0498390fc854 100644
--- a/test/correctness/force_onto_stack.cpp
+++ b/test/correctness/force_onto_stack.cpp
@@ -1,17 +1,17 @@
 #include "Halide.h"
 using namespace Halide;
 
-void *my_malloc(void *user_context, size_t x) {
+void *my_malloc(JITUserContext *user_context, size_t x) {
     printf("There was not supposed to be a heap allocation\n");
     exit(-1);
     return nullptr;
 }
 
-void my_free(void *user_context, void *ptr) {
+void my_free(JITUserContext *user_context, void *ptr) {
 }
 
 bool errored = false;
-void my_error(void *user_context, const char *msg) {
+void my_error(JITUserContext *user_context, const char *msg) {
     errored = true;
     char expected[] = "Bounds given for f in x (from 0 to 7) do not cover required region (from 0 to 9)";
     if (strncmp(expected, msg, sizeof(expected) - 1)) {
@@ -22,7 +22,7 @@ void my_error(void *user_context, const char *msg) {
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -42,13 +42,15 @@ int main(int argc, char **argv) {
         f.compute_at(g, xo).bound_extent(x, 8).vectorize(x);
 
         // Check there's no malloc when the bound is good
-        g.set_custom_allocator(&my_malloc, &my_free);
+        g.jit_handlers().custom_malloc = my_malloc;
+        g.jit_handlers().custom_free = my_free;
         p.set(5);
         g.realize({20});
-        g.set_custom_allocator(nullptr, nullptr);
+        g.jit_handlers().custom_malloc = nullptr;
+        g.jit_handlers().custom_free = nullptr;
 
         // Check there was an assertion failure of the appropriate type when the bound is violated
-        g.set_error_handler(&my_error);
+        g.jit_handlers().custom_error = my_error;
         p.set(10);
         g.realize({20});
 
@@ -77,7 +79,8 @@ int main(int argc, char **argv) {
         // nasty thing), so we'll add a bound.
         f.bound_extent(x, 8);
 
-        g.set_custom_allocator(&my_malloc, &my_free);
+        g.jit_handlers().custom_malloc = my_malloc;
+        g.jit_handlers().custom_free = my_free;
         g.realize({20});
     }
 
diff --git a/test/correctness/gpu_assertion_in_kernel.cpp b/test/correctness/gpu_assertion_in_kernel.cpp
index 18ab241dc549..d12356ba3336 100644
--- a/test/correctness/gpu_assertion_in_kernel.cpp
+++ b/test/correctness/gpu_assertion_in_kernel.cpp
@@ -3,7 +3,7 @@
 using namespace Halide;
 
 bool errored = false;
-void my_error(void *, const char *msg) {
+void my_error(JITUserContext *, const char *msg) {
     // Emitting "error.*:" to stdout or stderr will cause CMake to report the
     // test as a failure on Windows, regardless of error code returned,
     // hence the abbreviation to "err".
@@ -11,7 +11,7 @@ void my_error(void *, const char *msg) {
     errored = true;
 }
 
-void my_print(void *, const char *msg) {
+void my_print(JITUserContext *, const char *msg) {
     // Empty to neuter debug message spew
 }
 
@@ -39,8 +39,8 @@ int main(int argc, char **argv) {
     g.gpu_tile(x, xi, 8);
     f.compute_at(g, x).gpu_threads(x);
 
-    g.set_error_handler(&my_error);
-    g.set_custom_print(&my_print);
+    g.jit_handlers().custom_error = my_error;
+    g.jit_handlers().custom_print = my_print;
 
     // Should succeed
     g.realize({3, 100}, t);
diff --git a/test/correctness/heap_cleanup.cpp b/test/correctness/heap_cleanup.cpp
index da7b0e31cb98..ef7ea808bbcc 100644
--- a/test/correctness/heap_cleanup.cpp
+++ b/test/correctness/heap_cleanup.cpp
@@ -9,7 +9,7 @@ using namespace Halide;
 std::atomic<int> malloc_count{0};
 std::atomic<int> free_count{0};
 
-void *my_malloc(void *user_context, size_t x) {
+void *my_malloc(JITUserContext *user_context, size_t x) {
     malloc_count++;
     void *orig = malloc(x + 32);
     void *ptr = (void *)((((size_t)orig + 32) >> 5) << 5);
@@ -17,19 +17,19 @@ void *my_malloc(void *user_context, size_t x) {
     return ptr;
 }
 
-void my_free(void *user_context, void *ptr) {
+void my_free(JITUserContext *user_context, void *ptr) {
     free_count++;
     free(((void **)ptr)[-1]);
 }
 
 bool error_occurred = false;
-void my_error_handler(void *user_context, const char *) {
+void my_error_handler(JITUserContext *user_context, const char *) {
     error_occurred = true;
 }
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -49,8 +49,9 @@ int main(int argc, char **argv) {
     int g_size = 100000;
     g.bound(x, 0, g_size);
 
-    h.set_custom_allocator(my_malloc, my_free);
-    h.set_error_handler(my_error_handler);
+    h.jit_handlers().custom_malloc = my_malloc;
+    h.jit_handlers().custom_free = my_free;
+    h.jit_handlers().custom_error = my_error_handler;
 
     Buffer<int> im = h.realize({g_size + 100});
 
diff --git a/test/correctness/input_image_bounds_check.cpp b/test/correctness/input_image_bounds_check.cpp
index f9f942724606..967de67c3a26 100644
--- a/test/correctness/input_image_bounds_check.cpp
+++ b/test/correctness/input_image_bounds_check.cpp
@@ -8,13 +8,11 @@ using namespace Halide;
 // Custom error handler. If we don't define this, it'll just print out
 // an error message and quit
 bool error_occurred = false;
-void halide_error(void *, const char *msg) {
+void halide_error(JITUserContext *, const char *msg) {
     printf("%s\n", msg);
     error_occurred = true;
 }
 
-extern "C" void set_error_handler(void (*)(void *, const char *));
-
 int main(int argc, char **argv) {
     Buffer<float> input(19);
     for (int i = 0; i < 19; i++) {
@@ -24,7 +22,7 @@ int main(int argc, char **argv) {
     Func f;
     f(x) = input(x) * 2;
 
-    f.set_error_handler(&halide_error);
+    f.jit_handlers().custom_error = halide_error;
 
     // One easy way to read out of bounds
     f.realize({23});
@@ -43,7 +41,7 @@ int main(int argc, char **argv) {
     h(x) = g(x);
     g.compute_root().vectorize(x, 4);
 
-    h.set_error_handler(&halide_error);
+    h.jit_handlers().custom_error = halide_error;
     h.realize({18});
 
     if (error_occurred) {
@@ -57,7 +55,7 @@ int main(int argc, char **argv) {
     Func i;
     i(x) = small_input(x);
     i.vectorize(x, 4);
-    i.set_error_handler(&halide_error);
+    i.jit_handlers().custom_error = halide_error;
     i.realize({4});
     if (!error_occurred) {
         printf("There should have been an out-of-bounds error\n");
diff --git a/test/correctness/input_larger_than_two_gigs.cpp b/test/correctness/input_larger_than_two_gigs.cpp
index b8aede5299c6..29e6fb4fdd5b 100644
--- a/test/correctness/input_larger_than_two_gigs.cpp
+++ b/test/correctness/input_larger_than_two_gigs.cpp
@@ -2,14 +2,14 @@
 #include <memory>
 #include <stdio.h>
 
+using namespace Halide;
+
 int error_occurred = false;
-void halide_error(void *ctx, const char *msg) {
+void halide_error(JITUserContext *ctx, const char *msg) {
     printf("Expected: %s\n", msg);
     error_occurred = true;
 }
 
-using namespace Halide;
-
 int main(int argc, char **argv) {
     uint8_t c[4096];
     memset(c, 42, sizeof(c));
@@ -25,7 +25,7 @@ int main(int argc, char **argv) {
     Var x;
     Func grand_total;
     grand_total() = cast<uint64_t>(input(0, 0, 0) + input(input.dim(0).extent() - 1, input.dim(1).extent() - 1, input.dim(2).extent() - 1));
-    grand_total.set_error_handler(&halide_error);
+    grand_total.jit_handlers().custom_error = halide_error;
 
     Target t = get_jit_target_from_environment();
 
diff --git a/test/correctness/iterate_over_circle.cpp b/test/correctness/iterate_over_circle.cpp
index 13b4fa362636..f397d4a46fba 100644
--- a/test/correctness/iterate_over_circle.cpp
+++ b/test/correctness/iterate_over_circle.cpp
@@ -4,7 +4,7 @@
 using namespace Halide;
 
 int count = 0;
-int my_trace(void *user_context, const halide_trace_event_t *ev) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *ev) {
     if (ev->event == halide_trace_load) {
         count++;
     }
@@ -28,7 +28,7 @@ int main(int argc, char **argv) {
     f(x, y) += select(x > -t && x < t, in(x, y), 0);
 
     in.trace_loads();
-    f.set_custom_trace(my_trace);
+    f.jit_handlers().custom_trace = my_trace;
     f.realize({20, 20});
 
     int c = 0;
diff --git a/test/correctness/load_library.cpp b/test/correctness/load_library.cpp
index 9d709ab719ed..fef43bd68528 100644
--- a/test/correctness/load_library.cpp
+++ b/test/correctness/load_library.cpp
@@ -15,7 +15,7 @@ namespace {
 int load_library_calls = 0;
 int get_library_symbol_calls = 0;
 
-void my_error_handler(void *u, const char *msg) {
+void my_error_handler(JITUserContext *u, const char *msg) {
     // Emitting "error.*:" to stdout or stderr will cause CMake to report the
     // test as a failure on Windows, regardless of error code returned,
     // hence the abbreviation to "err".
@@ -84,7 +84,7 @@ int main(int argc, char **argv) {
     Func f;
     f(x, y) = cast<int32_t>(x + y);
     f.gpu_tile(x, y, xi, yi, 8, 8, TailStrategy::Auto, DeviceAPI::OpenCL);
-    f.set_error_handler(my_error_handler);
+    f.jit_handlers().custom_error = my_error_handler;
 
     Buffer<int32_t> out = f.realize({64, 64}, target);
 
diff --git a/test/correctness/loop_invariant_extern_calls.cpp b/test/correctness/loop_invariant_extern_calls.cpp
index 2a345ab47435..37c617e783a6 100644
--- a/test/correctness/loop_invariant_extern_calls.cpp
+++ b/test/correctness/loop_invariant_extern_calls.cpp
@@ -25,7 +25,7 @@ extern "C" DLLEXPORT int my_impure_func(int counter, int x) {
 HalideExtern_2(int, my_impure_func, int, int);
 
 // A parallel for loop runner that isn't actually parallel
-int not_really_parallel_for(void *ctx, int (*f)(void *, int, uint8_t *), int min, int extent, uint8_t *closure) {
+int not_really_parallel_for(JITUserContext *ctx, int (*f)(JITUserContext *, int, uint8_t *), int min, int extent, uint8_t *closure) {
     for (int i = min; i < min + extent; i++) {
         f(ctx, i, closure);
     }
@@ -34,7 +34,7 @@ int not_really_parallel_for(void *ctx, int (*f)(void *, int, uint8_t *), int min
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] Skipping test for WebAssembly as the wasm JIT cannot support set_custom_do_par_for().\n");
+        printf("[SKIP] Skipping test for WebAssembly as the wasm JIT cannot support custom parallel runtimes\n");
         return 0;
     }
 
@@ -73,7 +73,7 @@ int main(int argc, char **argv) {
     g(x, y) = my_func(3, Expr(0)) + my_impure_func(4, Expr(0));
     g.parallel(y);
     // Avoid the race condition by not actually being parallel
-    g.set_custom_do_par_for(&not_really_parallel_for);
+    g.jit_handlers().custom_do_par_for = not_really_parallel_for;
     g.realize({32, 32});
 
     if (call_counter[3] != 1 || call_counter[4] != 32 * 32) {
diff --git a/test/correctness/memoize.cpp b/test/correctness/memoize.cpp
index fd8df0309aa5..756a78a6c6a9 100644
--- a/test/correctness/memoize.cpp
+++ b/test/correctness/memoize.cpp
@@ -65,11 +65,11 @@ extern "C" DLLEXPORT int computed_eviction_key(int a) {
 }
 HalideExtern_1(int, computed_eviction_key, int);
 
-void simple_free(void *user_context, void *ptr) {
+void simple_free(JITUserContext *user_context, void *ptr) {
     free(ptr);
 }
 
-void *flakey_malloc(void * /* user_context */, size_t x) {
+void *flakey_malloc(JITUserContext * /* user_context */, size_t x) {
     if ((rand() % 4) == 0) {
         return nullptr;
     } else {
@@ -78,7 +78,7 @@ void *flakey_malloc(void * /* user_context */, size_t x) {
 }
 
 bool error_occured = false;
-void record_error(void *user_context, const char *msg) {
+void record_error(JITUserContext *user_context, const char *msg) {
     error_occured = true;
 }
 
@@ -586,7 +586,7 @@ int main(int argc, char **argv) {
     }
 
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     } else {
         // Test out of memory handling.
@@ -605,8 +605,9 @@ int main(int argc, char **argv) {
         g(x, y) = Tuple(f(x, y)[0] + f(x - 1, y)[0] + f(x + 1, y)[0], f(x, y)[1]);
 
         Pipeline pipe(g);
-        pipe.set_error_handler(record_error);
-        pipe.set_custom_allocator(flakey_malloc, simple_free);
+        pipe.jit_handlers().custom_error = record_error;
+        pipe.jit_handlers().custom_malloc = flakey_malloc;
+        pipe.jit_handlers().custom_free = simple_free;
 
         int total_errors = 0;
         int completed = 0;
diff --git a/test/correctness/multi_output_pipeline_with_bad_sizes.cpp b/test/correctness/multi_output_pipeline_with_bad_sizes.cpp
index 0094275d32a4..60d1fbc175cd 100644
--- a/test/correctness/multi_output_pipeline_with_bad_sizes.cpp
+++ b/test/correctness/multi_output_pipeline_with_bad_sizes.cpp
@@ -4,7 +4,7 @@
 using namespace Halide;
 
 bool error_occurred;
-void halide_error(void *user_context, const char *msg) {
+void halide_error(JITUserContext *user_context, const char *msg) {
     printf("%s\n", msg);
     error_occurred = true;
 }
@@ -18,7 +18,7 @@ int main(int argc, char **argv) {
     Buffer<int> x_out(100);
     Buffer<float> sin_x_out(101);
 
-    f.set_error_handler(&halide_error);
+    f.jit_handlers().custom_error = &halide_error;
     error_occurred = false;
 
     Realization r(x_out, sin_x_out);
diff --git a/test/correctness/nested_tail_strategies.cpp b/test/correctness/nested_tail_strategies.cpp
index 7bae4d34a907..2a0ddc7a6bf8 100644
--- a/test/correctness/nested_tail_strategies.cpp
+++ b/test/correctness/nested_tail_strategies.cpp
@@ -4,7 +4,7 @@ using namespace Halide;
 
 size_t largest_allocation = 0;
 
-void *my_malloc(void *user_context, size_t x) {
+void *my_malloc(JITUserContext *user_context, size_t x) {
     largest_allocation = std::max(x, largest_allocation);
     void *orig = malloc(x + 32);
     void *ptr = (void *)((((size_t)orig + 32) >> 5) << 5);
@@ -12,7 +12,7 @@ void *my_malloc(void *user_context, size_t x) {
     return ptr;
 }
 
-void my_free(void *user_context, void *ptr) {
+void my_free(JITUserContext *user_context, void *ptr) {
     free(((void **)ptr)[-1]);
 }
 
@@ -41,7 +41,8 @@ void check(Func out, int line, std::vector<TailStrategy> tails) {
         sizes_to_try.push_back(3);
     }
 
-    out.set_custom_allocator(my_malloc, my_free);
+    out.jit_handlers().custom_malloc = my_malloc;
+    out.jit_handlers().custom_free = my_free;
 
     for (int s : sizes_to_try) {
         largest_allocation = 0;
@@ -63,7 +64,7 @@ void check(Func out, int line, std::vector<TailStrategy> tails) {
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
diff --git a/test/correctness/out_of_memory.cpp b/test/correctness/out_of_memory.cpp
index 40a9bd4fa07f..7816da89bb9b 100644
--- a/test/correctness/out_of_memory.cpp
+++ b/test/correctness/out_of_memory.cpp
@@ -12,7 +12,7 @@ size_t total_allocated = 0;
 // and bug behaviors, etc. Cheap enough to be good in testing.
 std::map<void *, size_t> allocation_sizes;
 
-extern "C" void *test_malloc(void *user_context, size_t x) {
+extern "C" void *test_malloc(JITUserContext *user_context, size_t x) {
     if (total_allocated + x > mem_limit)
         return nullptr;
 
@@ -25,21 +25,21 @@ extern "C" void *test_malloc(void *user_context, size_t x) {
     return result;
 }
 
-extern "C" void test_free(void *user_context, void *ptr) {
+extern "C" void test_free(JITUserContext *user_context, void *ptr) {
     total_allocated -= allocation_sizes[ptr];
     allocation_sizes.erase(ptr);
     free(ptr);
 }
 
 bool error_occurred = false;
-extern "C" void handler(void *user_context, const char *msg) {
+extern "C" void handler(JITUserContext *user_context, const char *msg) {
     printf("%s\n", msg);
     error_occurred = true;
 }
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -57,8 +57,9 @@ int main(int argc, char **argv) {
     // Limit ourselves to two stages worth of address space
     mem_limit = big << 1;
 
-    funcs[funcs.size() - 1].set_custom_allocator(&test_malloc, &test_free);
-    funcs[funcs.size() - 1].set_error_handler(&handler);
+    funcs[funcs.size() - 1].jit_handlers().custom_malloc = test_malloc;
+    funcs[funcs.size() - 1].jit_handlers().custom_free = test_free;
+    funcs[funcs.size() - 1].jit_handlers().custom_error = handler;
     funcs[funcs.size() - 1].realize({1});
 
     if (!error_occurred) {
diff --git a/test/correctness/output_larger_than_two_gigs.cpp b/test/correctness/output_larger_than_two_gigs.cpp
index 44e2c55351d5..154fbd97e63e 100644
--- a/test/correctness/output_larger_than_two_gigs.cpp
+++ b/test/correctness/output_larger_than_two_gigs.cpp
@@ -2,14 +2,14 @@
 #include <memory>
 #include <stdio.h>
 
+using namespace Halide;
+
 int error_occurred = false;
-void halide_error(void *ctx, const char *msg) {
+void halide_error(JITUserContext *ctx, const char *msg) {
     printf("Expected: %s\n", msg);
     error_occurred = true;
 }
 
-using namespace Halide;
-
 int main(int argc, char **argv) {
     Var x, y, z;
     Func identity_uint8;
@@ -23,7 +23,7 @@ int main(int argc, char **argv) {
                                   {0, 256, 0}};
     Buffer<uint8_t> output(c, 3, shape);
 
-    identity_uint8.set_error_handler(&halide_error);
+    identity_uint8.jit_handlers().custom_error = halide_error;
 
     Target t = get_jit_target_from_environment();
 
diff --git a/test/correctness/parameter_constraints.cpp b/test/correctness/parameter_constraints.cpp
index eccd2fd7173e..4e6edad28435 100644
--- a/test/correctness/parameter_constraints.cpp
+++ b/test/correctness/parameter_constraints.cpp
@@ -4,7 +4,7 @@
 using namespace Halide;
 
 bool error_occurred;
-void my_error_handler(void *user_callback, const char *msg) {
+void my_error_handler(JITUserContext *user_context, const char *msg) {
     error_occurred = true;
 }
 
@@ -24,7 +24,7 @@ int main(int argc, char **argv) {
         g.compute_root();
         f(x, y) = g(cast<int>(x / p), y);
 
-        f.set_error_handler(my_error_handler);
+        f.jit_handlers().custom_error = my_error_handler;
 
         error_occurred = false;
         p.set(2);
@@ -55,7 +55,7 @@ int main(int argc, char **argv) {
         g.compute_root();
         f(x, y) = g(cast<int>(x / p), y);
 
-        f.set_error_handler(my_error_handler);
+        f.jit_handlers().custom_error = my_error_handler;
 
         error_occurred = false;
         f.realize({100, 100});
diff --git a/test/correctness/print.cpp b/test/correctness/print.cpp
index b146ef313127..d0e7b09050aa 100644
--- a/test/correctness/print.cpp
+++ b/test/correctness/print.cpp
@@ -8,8 +8,8 @@ using namespace Halide;
 
 std::vector<std::string> messages;
 
-extern "C" void halide_print(void *user_context, const char *message) {
-    //printf("%s", message);
+void my_print(JITUserContext *user_context, const char *message) {
+    // printf("%s", message);
     messages.push_back(message);
 }
 
@@ -39,7 +39,7 @@ int main(int argc, char **argv) {
         Func f;
 
         f(x) = print(x * x, "the answer is", 42.0f, "unsigned", cast<uint32_t>(145));
-        f.set_custom_print(halide_print);
+        f.jit_handlers().custom_print = my_print;
         Buffer<int32_t> result = f.realize({10});
 
         for (int32_t i = 0; i < 10; i++) {
@@ -72,7 +72,7 @@ int main(int argc, char **argv) {
 
         // Test a string containing a printf format specifier (It should print it as-is).
         f(x) = print_when(x == 3, x * x, "g", 42.0f, "%s", param);
-        f.set_custom_print(halide_print);
+        f.jit_handlers().custom_print = my_print;
         Buffer<int32_t> result = f.realize({10});
 
         for (int32_t i = 0; i < 10; i++) {
@@ -115,7 +115,7 @@ int main(int argc, char **argv) {
             args.push_back(dn);
         }
         f(x) = print(args);
-        f.set_custom_print(halide_print);
+        f.jit_handlers().custom_print = my_print;
         Buffer<uint64_t> result = f.realize({1});
 
         if (result(0) != 100) {
@@ -157,7 +157,7 @@ int main(int argc, char **argv) {
 
         f(x) = print(e);
 
-        f.set_custom_print(halide_print);
+        f.jit_handlers().custom_print = my_print;
         Buffer<float> imf = f.realize({N});
 
         assert(messages.size() == (size_t)N);
@@ -180,7 +180,7 @@ int main(int argc, char **argv) {
         messages.clear();
 
         g(x) = print(reinterpret(Float(64), (cast<uint64_t>(random_uint()) << 32) | random_uint()));
-        g.set_custom_print(halide_print);
+        g.jit_handlers().custom_print = my_print;
         Buffer<double> img = g.realize({N});
 
         assert(messages.size() == (size_t)N);
@@ -208,7 +208,7 @@ int main(int argc, char **argv) {
 
         // Test a vectorized print.
         f(x) = print(x * 3);
-        f.set_custom_print(halide_print);
+        f.jit_handlers().custom_print = my_print;
         f.vectorize(x, 32);
         if (target.has_feature(Target::HVX)) {
             f.hexagon();
@@ -233,7 +233,7 @@ int main(int argc, char **argv) {
 
         // Test a vectorized print_when.
         f(x) = print_when(x % 2 == 0, x * 3);
-        f.set_custom_print(halide_print);
+        f.jit_handlers().custom_print = my_print;
         f.vectorize(x, 32);
         if (target.has_feature(Target::HVX)) {
             f.hexagon();
diff --git a/test/correctness/pseudostack_shares_slots.cpp b/test/correctness/pseudostack_shares_slots.cpp
index 0b24ec137104..d0989a4342ef 100644
--- a/test/correctness/pseudostack_shares_slots.cpp
+++ b/test/correctness/pseudostack_shares_slots.cpp
@@ -5,7 +5,7 @@ using namespace Halide;
 const int tolerance = 3 * sizeof(int);
 std::vector<int> mallocs;
 
-void *my_malloc(void *user_context, size_t x) {
+void *my_malloc(JITUserContext *user_context, size_t x) {
     mallocs.push_back((int)x);
     void *orig = malloc(x + 32);
     void *ptr = (void *)((((size_t)orig + 32) >> 5) << 5);
@@ -13,13 +13,13 @@ void *my_malloc(void *user_context, size_t x) {
     return ptr;
 }
 
-void my_free(void *user_context, void *ptr) {
+void my_free(JITUserContext *user_context, void *ptr) {
     free(((void **)ptr)[-1]);
 }
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -47,7 +47,8 @@ int main(int argc, char **argv) {
         for (size_t i = 0; i < chain.size() - 1; i++) {
             chain[i].compute_at(chain.back(), xo).store_in(MemoryType::Stack);
         }
-        chain.back().set_custom_allocator(my_malloc, my_free);
+        chain.back().jit_handlers().custom_malloc = my_malloc;
+        chain.back().jit_handlers().custom_free = my_free;
 
         // Use sizes that trigger actual heap allocations
         for (int sz = 20000; sz <= 20016; sz += 8) {
@@ -91,7 +92,8 @@ int main(int argc, char **argv) {
         for (size_t i = 0; i < chain.size() - 1; i++) {
             chain[i].compute_at(chain.back(), xo).store_in(MemoryType::Stack);
         }
-        chain.back().set_custom_allocator(my_malloc, my_free);
+        chain.back().jit_handlers().custom_malloc = my_malloc;
+        chain.back().jit_handlers().custom_free = my_free;
 
         for (int sz = 160000; sz <= 160128; sz += 64) {
             mallocs.clear();
diff --git a/test/correctness/realize_larger_than_two_gigs.cpp b/test/correctness/realize_larger_than_two_gigs.cpp
index b575ccce1f16..2184ef9b8ae8 100644
--- a/test/correctness/realize_larger_than_two_gigs.cpp
+++ b/test/correctness/realize_larger_than_two_gigs.cpp
@@ -2,14 +2,14 @@
 #include <memory>
 #include <stdio.h>
 
+using namespace Halide;
+
 int error_occurred = false;
-void halide_error(void *ctx, const char *msg) {
+void halide_error(JITUserContext *ctx, const char *msg) {
     printf("Expected: %s\n", msg);
     error_occurred = true;
 }
 
-using namespace Halide;
-
 int main(int argc, char **argv) {
     Param<int> extent;
     Var x, y, z, w;
@@ -20,7 +20,7 @@ int main(int argc, char **argv) {
 
     Func grand_total;
     grand_total() = cast<uint8_t>(sum(big(r.x, r.y, r.z, r.w)));
-    grand_total.set_error_handler(&halide_error);
+    grand_total.jit_handlers().custom_error = halide_error;
 
     Target t = get_jit_target_from_environment();
     t.set_feature(Target::LargeBuffers);
diff --git a/test/correctness/reduction_non_rectangular.cpp b/test/correctness/reduction_non_rectangular.cpp
index 50079f2edf7d..ea658dc1c85e 100644
--- a/test/correctness/reduction_non_rectangular.cpp
+++ b/test/correctness/reduction_non_rectangular.cpp
@@ -9,7 +9,7 @@ bool run_tracer = false;
 int niters_expected = 0;
 int niters = 0;
 
-int intermediate_bound_depend_on_output_trace(void *user_context, const halide_trace_event_t *e) {
+int intermediate_bound_depend_on_output_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     std::string buffer_name = "g_" + std::to_string(buffer_index);
     if (std::string(e->func) == buffer_name) {
         if (e->event == halide_trace_produce) {
@@ -33,7 +33,7 @@ int intermediate_bound_depend_on_output_trace(void *user_context, const halide_t
     return 0;
 }
 
-int func_call_bound_trace(void *user_context, const halide_trace_event_t *e) {
+int func_call_bound_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     std::string buffer_name = "g_" + std::to_string(buffer_index);
     if (std::string(e->func) == buffer_name) {
         if (e->event == halide_trace_produce) {
@@ -55,7 +55,7 @@ int func_call_bound_trace(void *user_context, const halide_trace_event_t *e) {
     return 0;
 }
 
-int box_bound_trace(void *user_context, const halide_trace_event_t *e) {
+int box_bound_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     std::string buffer_name = "g_" + std::to_string(buffer_index);
     if (std::string(e->func) == buffer_name) {
         if (e->event == halide_trace_produce) {
@@ -187,7 +187,7 @@ int func_call_inside_bound_test(int index) {
     // Expect g to be computed over x=[10, 109].
     g.compute_root();
 
-    f.set_custom_trace(&func_call_bound_trace);
+    f.jit_handlers().custom_trace = &func_call_bound_trace;
     g.trace_stores();
     g.trace_realizations();
 
@@ -268,7 +268,7 @@ int two_linear_bounds_test(int index) {
     // Expect g to be computed over x=[0,99] and y=[1,99].
     g.compute_root();
 
-    f.set_custom_trace(&box_bound_trace);
+    f.jit_handlers().custom_trace = &box_bound_trace;
     g.trace_stores();
     g.trace_realizations();
 
@@ -322,7 +322,7 @@ int circle_bound_test(int index) {
     // i.e. f loop will still iterate over x=[0,99] and y=[0,99].
     g.compute_at(f, r.y);
 
-    f.set_custom_trace(&box_bound_trace);
+    f.jit_handlers().custom_trace = &box_bound_trace;
     g.trace_stores();
     g.trace_realizations();
 
@@ -364,7 +364,7 @@ int intermediate_computed_if_param_test(int index) {
     // than 3.
     g.compute_root();
 
-    f.set_custom_trace(&box_bound_trace);
+    f.jit_handlers().custom_trace = &box_bound_trace;
     g.trace_stores();
     g.trace_realizations();
 
@@ -438,7 +438,7 @@ int intermediate_bound_depend_on_output_test(int index) {
     // bound of f on r.x, which should have been r.x = [0, r.y) in this case
     g.compute_at(f, r.y);
 
-    f.set_custom_trace(&intermediate_bound_depend_on_output_trace);
+    f.jit_handlers().custom_trace = &intermediate_bound_depend_on_output_trace;
     g.trace_stores();
     g.trace_realizations();
 
@@ -492,7 +492,7 @@ int tile_intermediate_bound_depend_on_output_test(int index) {
     // bound of f on r.x, which should have been r.x = [0, r.y) in this case
     g.compute_at(f, ryi);
 
-    f.set_custom_trace(&intermediate_bound_depend_on_output_trace);
+    f.jit_handlers().custom_trace = &intermediate_bound_depend_on_output_trace;
     g.trace_stores();
     g.trace_realizations();
 
diff --git a/test/correctness/reorder_storage.cpp b/test/correctness/reorder_storage.cpp
index 953cd2b1e821..f3240ba1f392 100644
--- a/test/correctness/reorder_storage.cpp
+++ b/test/correctness/reorder_storage.cpp
@@ -7,7 +7,7 @@ using namespace Halide;
 int tolerance = 3 * sizeof(int);
 int expected_allocation = 0;
 
-void *my_malloc(void *user_context, size_t x) {
+void *my_malloc(JITUserContext *user_context, size_t x) {
     if (std::abs((int)x - expected_allocation) > tolerance) {
         printf("Error! Expected allocation of %d bytes, got %zu bytes (tolerance %d)\n", expected_allocation, x, tolerance);
         exit(-1);
@@ -15,13 +15,13 @@ void *my_malloc(void *user_context, size_t x) {
     return malloc(x);
 }
 
-void my_free(void *user_context, void *ptr) {
+void my_free(JITUserContext *user_context, void *ptr) {
     free(ptr);
 }
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -39,7 +39,8 @@ int main(int argc, char **argv) {
     g(x, y, c) = f(x, y, c);
 
     f.compute_root().reorder_storage(c, x, y);
-    g.set_custom_allocator(my_malloc, my_free);
+    g.jit_handlers().custom_malloc = my_malloc;
+    g.jit_handlers().custom_free = my_free;
 
     // Without any storage alignment, we should expect an allocation
     // that is the product of the extents of the realization.
diff --git a/test/correctness/require.cpp b/test/correctness/require.cpp
index 9585f651e2d9..625383f460df 100644
--- a/test/correctness/require.cpp
+++ b/test/correctness/require.cpp
@@ -2,8 +2,10 @@
 #include <memory>
 #include <stdio.h>
 
+using namespace Halide;
+
 int error_occurred = false;
-void halide_error(void *ctx, const char *msg) {
+void halide_error(JITUserContext *ctx, const char *msg) {
     // Emitting "error.*:" to stdout or stderr will cause CMake to report the
     // test as a failure on Windows, regardless of error code returned,
     // hence the abbreviation to "err".
@@ -11,8 +13,6 @@ void halide_error(void *ctx, const char *msg) {
     error_occurred = true;
 }
 
-using namespace Halide;
-
 static void test(int vector_width) {
     Target target = get_jit_target_from_environment();
 
@@ -36,7 +36,7 @@ static void test(int vector_width) {
     if (target.has_feature(Target::HVX)) {
         f.hexagon();
     }
-    f.set_error_handler(&halide_error);
+    f.jit_handlers().custom_error = halide_error;
 
     // choose values that will fail
     p1.set(1);
@@ -68,7 +68,7 @@ static void test(int vector_width) {
     ImageParam input(Int(32), 2);
     Expr h = require(p1 == p2, p1);
     Func clamped = BoundaryConditions::repeat_edge(input, {{0, 64}, {0, h}});
-    clamped.set_error_handler(&halide_error);
+    clamped.jit_handlers().custom_error = &halide_error;
 
     Buffer<int32_t> input_buf(64, 64);
     input_buf.fill(0);
diff --git a/test/correctness/reschedule.cpp b/test/correctness/reschedule.cpp
index 916505d6cf4f..34854a203db3 100644
--- a/test/correctness/reschedule.cpp
+++ b/test/correctness/reschedule.cpp
@@ -6,7 +6,7 @@ using namespace Halide;
 bool vector_store = false, scalar_store = false;
 
 // A trace that checks for vector and scalar stores
-int my_trace(void *user_context, const halide_trace_event_t *ev) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *ev) {
 
     if (ev->event == halide_trace_store) {
         if (ev->type.lanes > 1) {
@@ -23,7 +23,7 @@ int main(int argc, char **argv) {
     Var x;
 
     f(x) = x;
-    f.set_custom_trace(&my_trace);
+    f.jit_handlers().custom_trace = &my_trace;
     f.trace_stores();
 
     Buffer<int> result_1 = f.realize({10});
diff --git a/test/correctness/rfactor.cpp b/test/correctness/rfactor.cpp
index afcc009074d4..8511e7ac8e09 100644
--- a/test/correctness/rfactor.cpp
+++ b/test/correctness/rfactor.cpp
@@ -856,7 +856,7 @@ int argmin_rfactor_test() {
     return 0;
 }
 
-int allocation_bound_test_trace(void *user_context, const halide_trace_event_t *e) {
+int allocation_bound_test_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     // The schedule implies that f will be stored from 0 to 1
     if (e->event == 2 && std::string(e->func) == "f") {
         if (e->coordinates[1] != 2) {
@@ -884,7 +884,7 @@ int check_allocation_bound_test() {
     g.update(0).rfactor({{rxo, u}}).compute_at(g, rxo);
 
     f.trace_realizations();
-    g.set_custom_trace(allocation_bound_test_trace);
+    g.jit_handlers().custom_trace = allocation_bound_test_trace;
     g.realize({23});
 
     return 0;
diff --git a/test/correctness/set_custom_trace.cpp b/test/correctness/set_custom_trace.cpp
index beed79da9449..03036f1be6e5 100644
--- a/test/correctness/set_custom_trace.cpp
+++ b/test/correctness/set_custom_trace.cpp
@@ -39,7 +39,7 @@ class CheckCompute : public IRVisitor {
     }
 };
 
-int allocation_bound_test_trace(void *user_context, const halide_trace_event_t *e) {
+int allocation_bound_test_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     return 0;
 }
 
@@ -53,7 +53,7 @@ int main(int argc, char **argv) {
     g(x) += f(x);
 
     f.compute_at(g, x);
-    f.set_custom_trace(allocation_bound_test_trace);
+    f.jit_handlers().custom_trace = allocation_bound_test_trace;
 
     Module m = g.compile_to_module({g.infer_arguments()});
     CheckCompute checker;
diff --git a/test/correctness/skip_stages_memoize.cpp b/test/correctness/skip_stages_memoize.cpp
index cece66a81542..04b260305361 100644
--- a/test/correctness/skip_stages_memoize.cpp
+++ b/test/correctness/skip_stages_memoize.cpp
@@ -7,7 +7,7 @@ int buffer_index = 0;
 bool set_toggle1 = false;
 bool set_toggle2 = false;
 
-int single_toggle_trace(void *user_context, const halide_trace_event_t *e) {
+int single_toggle_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     if (!set_toggle1) {
         std::string buffer_name = "f1_" + std::to_string(buffer_index);
         if ((e->event == halide_trace_store) && (std::string(e->func) == buffer_name)) {
@@ -19,7 +19,7 @@ int single_toggle_trace(void *user_context, const halide_trace_event_t *e) {
     return 0;
 }
 
-int double_toggle_trace(void *user_context, const halide_trace_event_t *e) {
+int double_toggle_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     if (!set_toggle1) {
         std::string buffer_name = "f1_" + std::to_string(buffer_index);
         if ((e->event == halide_trace_store) && (std::string(e->func) == buffer_name)) {
@@ -86,7 +86,7 @@ int single_memoize_test(int index) {
 
     f1.compute_root().memoize();
 
-    f2.set_custom_trace(&single_toggle_trace);
+    f2.jit_handlers().custom_trace = &single_toggle_trace;
     f1.trace_stores();
 
     f2.compile_jit();
@@ -115,7 +115,7 @@ int tuple_memoize_test(int index) {
 
     f1.compute_root().memoize();
 
-    f2.set_custom_trace(&single_toggle_trace);
+    f2.jit_handlers().custom_trace = &single_toggle_trace;
     f1.trace_stores();
 
     f2.compile_jit();
@@ -153,7 +153,7 @@ int non_trivial_allocate_predicate_test(int index) {
     f1.compute_root().memoize();
     f2.compute_root().memoize();
 
-    f3.set_custom_trace(&double_toggle_trace);
+    f3.jit_handlers().custom_trace = &double_toggle_trace;
     f1.trace_stores();
     f2.trace_stores();
 
@@ -186,7 +186,7 @@ int double_memoize_test(int index) {
     f1.compute_root().memoize();
     f2.compute_root().memoize();
 
-    f3.set_custom_trace(&double_toggle_trace);
+    f3.jit_handlers().custom_trace = &double_toggle_trace;
     f1.trace_stores();
     f2.trace_stores();
 
diff --git a/test/correctness/sliding_window.cpp b/test/correctness/sliding_window.cpp
index 70fe8b7a1baa..04fd70635b55 100644
--- a/test/correctness/sliding_window.cpp
+++ b/test/correctness/sliding_window.cpp
@@ -16,19 +16,16 @@ extern "C" DLLEXPORT int call_counter(int x, int y) {
 }
 HalideExtern_2(int, call_counter, int, int);
 
-extern "C" void *my_malloc(void *, size_t x) {
+extern "C" void *my_malloc(JITUserContext *, size_t x) {
     printf("Malloc wasn't supposed to be called!\n");
     exit(-1);
 }
 
-extern "C" void my_free(void *, void *) {
-}
-
 int main(int argc, char **argv) {
     Var x, y;
 
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -201,7 +198,7 @@ int main(int argc, char **argv) {
         f(x, y) = x * y;
         g(x, y) = f(x, y) + f(x + 1, y) + f(x, y + 1) + f(x + 1, y + 1);
         f.store_at(g, y).compute_at(g, x);
-        g.set_custom_allocator(&my_malloc, &my_free);
+        g.jit_handlers().custom_malloc = my_malloc;
         Buffer<int> im = g.realize({10, 10});
     }
 
diff --git a/test/correctness/specialize.cpp b/test/correctness/specialize.cpp
index 203bfa40e54b..f98b20d321d8 100644
--- a/test/correctness/specialize.cpp
+++ b/test/correctness/specialize.cpp
@@ -13,7 +13,7 @@ void reset_trace() {
 }
 
 // A trace that checks for vector and scalar stores
-int my_trace(void *user_context, const halide_trace_event_t *ev) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *ev) {
 
     if (ev->event == halide_trace_store) {
         if (ev->type.lanes > 1) {
@@ -33,7 +33,7 @@ void reset_alloc_counts() {
     empty_allocs = nonempty_allocs = frees = 0;
 }
 
-void *my_malloc(void *ctx, size_t sz) {
+void *my_malloc(JITUserContext *ctx, size_t sz) {
     // Don't worry about alignment because we'll just test this with scalar code
     if (sz == 0) {
         empty_allocs++;
@@ -43,7 +43,7 @@ void *my_malloc(void *ctx, size_t sz) {
     return malloc(sz);
 }
 
-void my_free(void *ctx, void *ptr) {
+void my_free(JITUserContext *ctx, void *ptr) {
     frees++;
     free(ptr);
 }
@@ -77,7 +77,7 @@ class CountIfThenElse : public Internal::IRMutator {
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -102,7 +102,7 @@ int main(int argc, char **argv) {
         // Now specialize the narrow case on param as well
         f.specialize(param);
 
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = my_trace;
         f.trace_stores();
 
         Buffer<int> out(100);
@@ -190,7 +190,8 @@ int main(int argc, char **argv) {
         out.specialize(param);
 
         // Count allocations.
-        out.set_custom_allocator(&my_malloc, &my_free);
+        out.jit_handlers().custom_malloc = my_malloc;
+        out.jit_handlers().custom_free = my_free;
 
         reset_alloc_counts();
         param.set(true);
@@ -229,7 +230,7 @@ int main(int argc, char **argv) {
         f.specialize(im.dim(0).stride() == 1 && im.width() >= 8).vectorize(x, 8);
 
         f.trace_stores();
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = &my_trace;
 
         // Check bounds inference is still cool with widths < 8
         f.infer_input_bounds({5});
@@ -272,7 +273,7 @@ int main(int argc, char **argv) {
         f.specialize(im.dim(0).stride() == 1).vectorize(x, 8);
 
         f.trace_stores();
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = &my_trace;
 
         Buffer<int> strided_image(4, 100);
         strided_image.slice(0, 0);
@@ -589,7 +590,7 @@ int main(int argc, char **argv) {
         // should be (something) == 0
         _halide_user_assert(s[0].condition.as<Internal::EQ>() && is_const_zero(s[0].condition.as<Internal::EQ>()->b));
 
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = &my_trace;
         f.trace_stores();
 
         vector_store_lanes = 0;
@@ -637,7 +638,7 @@ int main(int argc, char **argv) {
         // should be (something) == 0
         _halide_user_assert(s[0].condition.as<Internal::EQ>() && is_const_zero(s[0].condition.as<Internal::EQ>()->b));
 
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = &my_trace;
         f.trace_stores();
 
         vector_store_lanes = 0;
@@ -664,7 +665,7 @@ int main(int argc, char **argv) {
         f.specialize(p == 0).vectorize(x, 32);  // will *not* be pruned
         f.specialize(const_true).vectorize(x, 16);
 
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = &my_trace;
         f.trace_stores();
 
         vector_store_lanes = 0;
@@ -694,7 +695,7 @@ int main(int argc, char **argv) {
         // Also not ok to have duplicate specialize_fail() calls.
         // f.specialize_fail("This is bad.");  -- would fail
 
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = &my_trace;
         f.trace_stores();
 
         vector_store_lanes = 0;
diff --git a/test/correctness/stack_allocations.cpp b/test/correctness/stack_allocations.cpp
index 645c96a1323f..e80d72fe8653 100644
--- a/test/correctness/stack_allocations.cpp
+++ b/test/correctness/stack_allocations.cpp
@@ -4,13 +4,13 @@
 using namespace Halide;
 
 extern "C" {
-void *my_malloc(void *ctx, size_t sz) {
+void *my_malloc(JITUserContext *ctx, size_t sz) {
     printf("There weren't supposed to be heap allocations!\n");
     exit(-1);
     return nullptr;
 }
 
-void my_free(void *ctx, void *ptr) {
+void my_free(JITUserContext *ctx, void *ptr) {
     printf("There weren't supposed to be heap allocations!\n");
     exit(-1);
 }
@@ -18,7 +18,7 @@ void my_free(void *ctx, void *ptr) {
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
@@ -35,7 +35,8 @@ int main(int argc, char **argv) {
     h.tile(x, y, xi, yi, 4, 3).vectorize(xi);
 
     // f and g should both do stack allocations
-    h.set_custom_allocator(&my_malloc, &my_free);
+    h.jit_handlers().custom_malloc = my_malloc;
+    h.jit_handlers().custom_free = my_free;
 
     h.realize({10, 10});
 
diff --git a/test/correctness/stencil_chain_in_update_definitions.cpp b/test/correctness/stencil_chain_in_update_definitions.cpp
index a8cc9ec2f615..596adb020a00 100644
--- a/test/correctness/stencil_chain_in_update_definitions.cpp
+++ b/test/correctness/stencil_chain_in_update_definitions.cpp
@@ -4,7 +4,7 @@ using namespace Halide;
 
 int num_stores = 0;
 
-int my_trace(void *user_context, const halide_trace_event_t *e) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     if (e->event == halide_trace_store) {
         num_stores++;
     }
@@ -69,7 +69,7 @@ int main(int argc, char **argv) {
                     iters * (last_iteration_extent + first_iteration_extent - 2) / 2);  // third update
 
     g.trace_stores();
-    h.set_custom_trace(&my_trace);
+    h.jit_handlers().custom_trace = &my_trace;
     h.realize({output_extent});
 
     if (num_stores != expected) {
diff --git a/test/correctness/storage_folding.cpp b/test/correctness/storage_folding.cpp
index bf412e52d7ab..20a937c10315 100644
--- a/test/correctness/storage_folding.cpp
+++ b/test/correctness/storage_folding.cpp
@@ -9,7 +9,7 @@ using namespace Halide;
 const int tolerance = 3 * sizeof(int);
 std::set<size_t> custom_malloc_sizes;
 
-void *my_malloc(void *user_context, size_t x) {
+void *my_malloc(JITUserContext *user_context, size_t x) {
     custom_malloc_sizes.insert(x);
     void *orig = malloc(x + 32);
     void *ptr = (void *)((((size_t)orig + 32) >> 5) << 5);
@@ -17,7 +17,7 @@ void *my_malloc(void *user_context, size_t x) {
     return ptr;
 }
 
-void my_free(void *user_context, void *ptr) {
+void my_free(JITUserContext *user_context, void *ptr) {
     free(((void **)ptr)[-1]);
 }
 
@@ -96,7 +96,7 @@ extern "C" DLLEXPORT int zigzag_buffer_copy(halide_buffer_t *in, halide_buffer_t
 }
 
 bool error_occurred;
-void expected_error(void *, const char *msg) {
+void expected_error(JITUserContext *, const char *msg) {
     // Emitting "error.*:" to stdout or stderr will cause CMake to report the
     // test as a failure on Windows, regardless of error code returned,
     // hence the abbreviation to "err".
@@ -106,7 +106,7 @@ void expected_error(void *, const char *msg) {
 
 void realize_and_expect_error(Func f, int w, int h) {
     error_occurred = false;
-    f.set_error_handler(expected_error);
+    f.jit_handlers().custom_error = expected_error;
     f.realize({w, h});
     if (!error_occurred) {
         printf("Expected an error!\n");
@@ -116,12 +116,18 @@ void realize_and_expect_error(Func f, int w, int h) {
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
     Var x, y, c;
 
+    // Every allocation in this test wants to go through the custom allocator above.
+    JITHandlers handlers;
+    handlers.custom_malloc = my_malloc;
+    handlers.custom_free = my_free;
+    Internal::JITSharedRuntime::set_default_handlers(handlers);
+
     {
         Func f, g;
 
@@ -131,8 +137,6 @@ int main(int argc, char **argv) {
 
         // Should be able to fold storage in y and c
 
-        g.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = g.realize({100, 1000, 3});
 
         size_t expected_size = 101 * 4 * sizeof(int);
@@ -152,8 +156,6 @@ int main(int argc, char **argv) {
         // Make sure that storage folding doesn't happen if there are
         // multiple producers of the folded buffer.
 
-        g.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = g.realize({100, 1000, 3});
 
         size_t expected_size = 101 * 1002 * 3 * sizeof(int);
@@ -175,8 +177,6 @@ int main(int argc, char **argv) {
         // automatic storage folding refused to fold this (the case
         // above).
 
-        g.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = g.realize({100, 1000});
 
         size_t expected_size = 101 * 3 * sizeof(int);
@@ -197,8 +197,6 @@ int main(int argc, char **argv) {
         // allocation.
         g.compute_at(f, x).store_root();
 
-        f.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = f.realize({1000, 1000});
 
         if (!custom_malloc_sizes.empty()) {
@@ -230,8 +228,6 @@ int main(int argc, char **argv) {
 
         g.compute_at(f, x).store_root();
 
-        f.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = f.realize({1000, 1000});
 
         if (!custom_malloc_sizes.empty()) {
@@ -264,8 +260,6 @@ int main(int argc, char **argv) {
 
         g.compute_at(f, x).store_root();
 
-        f.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = f.realize({1000, 1000});
 
         size_t expected_size = 2 * 1000 * 4 * sizeof(int);
@@ -300,8 +294,6 @@ int main(int argc, char **argv) {
         // correct to fold if we know the output height is a multiple
         // of the split factor.
 
-        f.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = f.realize({1000, 1000});
 
         size_t expected_size = 1000 * 8 * sizeof(int);
@@ -335,8 +327,6 @@ int main(int argc, char **argv) {
         // (e.g. if memory usage is a concern.)
         g.compute_at(f, x).store_root().fold_storage(y, 3);
 
-        f.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = f.realize({1000, 1000});
 
         size_t expected_size = 2 * 1000 * 3 * sizeof(int);
@@ -365,8 +355,6 @@ int main(int argc, char **argv) {
 
         g.compute_at(f, x).store_root();
 
-        f.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = f.realize({1000, 1000});
 
         size_t expected_size = 1000 * 2 * sizeof(int);
@@ -397,8 +385,6 @@ int main(int argc, char **argv) {
         h.compute_at(f, y).store_root().fold_storage(y, 4);
         g.compute_at(f, y).store_root().fold_storage(y, 2);
 
-        f.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = f.realize({1000, 1000});
 
         // Halide allocates one extra scalar, so we account for that.
@@ -436,8 +422,6 @@ int main(int argc, char **argv) {
         // Make sure we can explicitly fold something with an outer
         // loop.
 
-        g.set_custom_allocator(my_malloc, my_free);
-
         Buffer<int> im = g.realize({100, 1000, 3});
 
         size_t expected_size;
diff --git a/test/correctness/store_in.cpp b/test/correctness/store_in.cpp
index d9cb45f8c6d2..ade8534c2b18 100644
--- a/test/correctness/store_in.cpp
+++ b/test/correctness/store_in.cpp
@@ -4,12 +4,12 @@ using namespace Halide;
 
 int mallocs = 0;
 
-void *my_malloc(void *, size_t sz) {
+void *my_malloc(JITUserContext *, size_t sz) {
     mallocs++;
     return (uint8_t *)malloc(sz);
 }
 
-void my_free(void *, void *ptr) {
+void my_free(JITUserContext *, void *ptr) {
     free(ptr);
 }
 
@@ -39,7 +39,8 @@ void check(MemoryType t1, MemoryType t2, MemoryType t3) {
                             (t3 == MemoryType::Heap ? 1 : 0));
 
     mallocs = 0;
-    f.set_custom_allocator(my_malloc, my_free);
+    f.jit_handlers().custom_malloc = my_malloc;
+    f.jit_handlers().custom_free = my_free;
     f.realize({1024});
     if (mallocs != expected_mallocs) {
         std::cerr << "Wrong number of mallocs for " << t1 << ", " << t2 << ", " << t3 << "\n"
@@ -50,7 +51,7 @@ void check(MemoryType t1, MemoryType t2, MemoryType t3) {
 
 int main(int argc, char **argv) {
     if (get_jit_target_from_environment().arch == Target::WebAssembly) {
-        printf("[SKIP] WebAssembly JIT does not support set_custom_allocator().\n");
+        printf("[SKIP] WebAssembly JIT does not support custom allocators.\n");
         return 0;
     }
 
diff --git a/test/correctness/tracing.cpp b/test/correctness/tracing.cpp
index 1eeab85513c0..385de54ff452 100644
--- a/test/correctness/tracing.cpp
+++ b/test/correctness/tracing.cpp
@@ -86,7 +86,7 @@ bool events_match(const event &a, const event &b) {
             !strcmp(a.trace_tag, b.trace_tag));
 }
 
-int my_trace(void *user_context, const halide_trace_event_t *ev) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *ev) {
     assert(ev->dimensions <= 4 && ev->type.lanes <= 4);
 
     // Record this event in the trace array
@@ -194,7 +194,7 @@ int main(int argc, char **argv) {
     g.store_root().compute_at(f, x);
     g.vectorize(x, 4);
 
-    f.set_custom_trace(&my_trace);
+    f.jit_handlers().custom_trace = &my_trace;
 
     // Check that Target::TracePipeline works.
     f.realize({10}, get_jit_target_from_environment().with_feature(Target::TracePipeline));
diff --git a/test/correctness/tracing_broadcast.cpp b/test/correctness/tracing_broadcast.cpp
index 95b6e1820794..d572e9b32346 100644
--- a/test/correctness/tracing_broadcast.cpp
+++ b/test/correctness/tracing_broadcast.cpp
@@ -3,7 +3,7 @@
 
 using namespace Halide;
 
-int my_trace(void *user_context, const halide_trace_event_t *e) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     if (e->event == halide_trace_store) {
         for (int i = 0; i < e->type.lanes; ++i) {
             int val = ((const int *)(e->value))[i];
@@ -25,7 +25,7 @@ int main(int argc, char **argv) {
     f.vectorize(x, 8);
 
     f.trace_stores();
-    f.set_custom_trace(&my_trace);
+    f.jit_handlers().custom_trace = &my_trace;
     f.realize({8, 8});
 
     printf("Success!\n");
diff --git a/test/correctness/tracing_stack.cpp b/test/correctness/tracing_stack.cpp
index abb96b7fb720..d3fe04d548b5 100644
--- a/test/correctness/tracing_stack.cpp
+++ b/test/correctness/tracing_stack.cpp
@@ -21,7 +21,7 @@ using std::string;
 
 stack<string> stack_trace;
 
-int my_trace(void *user_context, const halide_trace_event_t *e) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     const string event_types[] = {"Load ",
                                   "Store ",
                                   "Begin realization ",
@@ -85,7 +85,7 @@ int main(int argc, char **argv) {
     h(x, y) = g(x, y) + input(x, y);
     h.trace_realizations();
 
-    h.set_custom_trace(&my_trace);
+    h.jit_handlers().custom_trace = &my_trace;
     h.realize({100, 100});
 
     printf("The code should not have reached this print statement.\n");
diff --git a/test/correctness/uninitialized_read.cpp b/test/correctness/uninitialized_read.cpp
index 9b328ecf6a73..9d946fc8b8f3 100644
--- a/test/correctness/uninitialized_read.cpp
+++ b/test/correctness/uninitialized_read.cpp
@@ -3,7 +3,7 @@
 
 using namespace Halide;
 
-int my_trace(void *user_context, const halide_trace_event_t *e) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
 
     if (e->event == 2) {  // begin realization
         if (e->coordinates[1] != 4) {
@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
     h.output_buffer().dim(0).set_bounds(0, 1);
 
     f.trace_realizations();
-    h.set_custom_trace(&my_trace);
+    h.jit_handlers().custom_trace = &my_trace;
     h.realize({1});
 
     printf("Success!\n");
diff --git a/test/correctness/vectorize_guard_with_if.cpp b/test/correctness/vectorize_guard_with_if.cpp
index 39f96ccfa947..0dbe6fecb0f1 100644
--- a/test/correctness/vectorize_guard_with_if.cpp
+++ b/test/correctness/vectorize_guard_with_if.cpp
@@ -4,7 +4,7 @@ using namespace Halide;
 
 int num_vector_stores = 0;
 int num_scalar_stores = 0;
-int my_trace(void *user_context, const halide_trace_event_t *e) {
+int my_trace(JITUserContext *user_context, const halide_trace_event_t *e) {
     if (e->event == halide_trace_store) {
         if (e->type.lanes > 1) {
             num_vector_stores++;
@@ -27,7 +27,7 @@ int main(int argc, char **argv) {
         const int expected_vector_stores = w / v;
         const int expected_scalar_stores = w % v;
 
-        f.set_custom_trace(&my_trace);
+        f.jit_handlers().custom_trace = &my_trace;
         f.trace_stores();
 
         num_vector_stores = 0;
diff --git a/test/error/realize_constantly_larger_than_two_gigs.cpp b/test/error/realize_constantly_larger_than_two_gigs.cpp
index 123afd9a1d28..a8a9b0f9c1ce 100644
--- a/test/error/realize_constantly_larger_than_two_gigs.cpp
+++ b/test/error/realize_constantly_larger_than_two_gigs.cpp
@@ -2,25 +2,25 @@
 #include <memory>
 #include <stdio.h>
 
+using namespace Halide;
+
 int error_occurred = false;
-void halide_error(void *ctx, const char *msg) {
+void my_error(JITUserContext *ctx, const char *msg) {
     printf("Expected: %s\n", msg);
     error_occurred = true;
 }
 
-using namespace Halide;
-
 int main(int argc, char **argv) {
     Var x, y, z;
     RDom r(0, 4096, 0, 4096, 0, 256);
     Func big;
     big(x, y, z) = cast<uint8_t>(42);
-    big.set_error_handler(&halide_error);
+    big.jit_handlers().custom_error = my_error;
     big.compute_root();
 
     Func grand_total;
     grand_total() = cast<uint8_t>(sum(big(r.x, r.y, r.z)));
-    grand_total.set_error_handler(&halide_error);
+    grand_total.jit_handlers().custom_error = my_error;
 
     Buffer<uint8_t> result = grand_total.realize();
 
diff --git a/test/error/undefined_rdom_dimension.cpp b/test/error/undefined_rdom_dimension.cpp
index 3ef72c6aaa1b..da03a43c4c5a 100644
--- a/test/error/undefined_rdom_dimension.cpp
+++ b/test/error/undefined_rdom_dimension.cpp
@@ -2,14 +2,14 @@
 #include <assert.h>
 #include <stdio.h>
 
+using namespace Halide;
+
 int error_occurred = false;
-void halide_error(void *ctx, const char *msg) {
+void my_error(JITUserContext *ctx, const char *msg) {
     printf("Expected: %s\n", msg);
     error_occurred = true;
 }
 
-using namespace Halide;
-
 int main(int argc, char **argv) {
     Func f("f"), g("g"), h("h");
     Var x("x"), y("y"), c("c");
@@ -20,7 +20,7 @@ int main(int argc, char **argv) {
     f(x, y, c) = g(x, y, c);
     f(r.x, r.y, c) = f(r.x - 1, r.y, c) + h(r.x, r.y, c);
 
-    f.set_error_handler(&halide_error);
+    f.jit_handlers().custom_error = my_error;
     Buffer<int32_t> result = f.realize({100, 5, 3});
 
     assert(error_occurred);
diff --git a/test/performance/memory_profiler.cpp b/test/performance/memory_profiler.cpp
index 471301016d23..7c5827f5a775 100644
--- a/test/performance/memory_profiler.cpp
+++ b/test/performance/memory_profiler.cpp
@@ -15,13 +15,13 @@ void reset_stats() {
     stack_peak = 0;
 }
 
-void my_print(void *, const char *msg) {
+void my_print(JITUserContext *, const char *msg) {
     float this_ms, this_threads;
     int idx, this_percentage, this_heap_peak;
     int this_num_mallocs, this_malloc_avg, this_stack_peak;
     int val;
 
-    //printf("%s", msg);
+    // printf("%s", msg);
     val = sscanf(msg, " g_%d: %fms (%d%%) threads: %f peak: %d num: %d avg: %d",
                  &idx, &this_ms, &this_percentage, &this_threads, &this_heap_peak,
                  &this_num_mallocs, &this_malloc_avg);
@@ -123,7 +123,7 @@ int main(int argc, char **argv) {
         f1(x, y) = g1(x % size_x, y % size_y);
         g1.compute_root();
 
-        f1.set_custom_print(&my_print);
+        f1.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f1.realize({size_x, size_y}, t);
@@ -144,7 +144,7 @@ int main(int argc, char **argv) {
         f2(x, y) = g2(x - 1, y) + g2(x, y - 1);
         g2.compute_root();
 
-        f2.set_custom_print(&my_print);
+        f2.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f2.realize({size_x, size_y}, t);
@@ -162,7 +162,7 @@ int main(int argc, char **argv) {
         f3(x, y) = select(1 == 2, g3(x - 1, y), 0);
         g3.compute_root();
 
-        f3.set_custom_print(&my_print);
+        f3.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f3.realize({1000, 1000}, t);
@@ -179,7 +179,7 @@ int main(int argc, char **argv) {
         f3(x, y) = select(1 == 2, g3((x - 1) % 10, y % 10), 0);
         g3.compute_root();
 
-        f3.set_custom_print(&my_print);
+        f3.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f3.realize({1000, 1000}, t);
@@ -206,7 +206,7 @@ int main(int argc, char **argv) {
         f4.compute_root();
         f5.compute_root();
 
-        f6.set_custom_print(&my_print);
+        f6.jit_handlers().custom_print = my_print;
 
         int total = 0;
 
@@ -259,7 +259,7 @@ int main(int argc, char **argv) {
         g5.store_at(f8, y).compute_at(f8, y);
         f7.compute_at(f8, y);
 
-        f8.set_custom_print(&my_print);
+        f8.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f8.realize({size_x, size_y}, t);
@@ -285,7 +285,7 @@ int main(int argc, char **argv) {
 
         f10.parallel(y);
 
-        f10.set_custom_print(&my_print);
+        f10.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f10.realize({size_x, size_y}, t);
@@ -306,7 +306,7 @@ int main(int argc, char **argv) {
         f11(x, y) = g7(x % size_x, y % size_y);
         g7.compute_root();
 
-        f11.set_custom_print(&my_print);
+        f11.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f11.realize({size_x, size_y}, t);
@@ -327,7 +327,7 @@ int main(int argc, char **argv) {
 
         f12.parallel(y);
 
-        f12.set_custom_print(&my_print);
+        f12.jit_handlers().custom_print = my_print;
 
         reset_stats();
         f12.realize({size_x, size_y}, t);
diff --git a/test/performance/profiler.cpp b/test/performance/profiler.cpp
index 3bb907f695f8..77741de3daa2 100644
--- a/test/performance/profiler.cpp
+++ b/test/performance/profiler.cpp
@@ -5,7 +5,7 @@ using namespace Halide;
 
 int percentage = 0;
 float ms = 0;
-void my_print(void *, const char *msg) {
+void my_print(JITUserContext *, const char *msg) {
     float this_ms;
     int this_percentage;
     int val = sscanf(msg, " fn13: %fms (%d", &this_ms, &this_percentage);
@@ -46,7 +46,7 @@ int main(int argc, char **argv) {
     RDom r(0, iters);
     out(c, x) += r * f[29](c, x);
 
-    out.set_custom_print(&my_print);
+    out.jit_handlers().custom_print = my_print;
     out.compute_root();
     out.update().reorder(c, x, r);
     for (int i = 0; i < 30; i++) {
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
     Target t = get_jit_target_from_environment().with_feature(Target::Profile);
     Buffer<float> im = out.realize({10, 1000}, t);
 
-    //out.compile_to_assembly("/dev/stdout", {}, t.with_feature(Target::JIT));
+    // out.compile_to_assembly("/dev/stdout", {}, t.with_feature(Target::JIT));
 
     printf("Time spent in fn13: %fms\n", ms);
 
diff --git a/test/warning/require_const_false.cpp b/test/warning/require_const_false.cpp
index 3acfaa06bff6..564b095b6f87 100644
--- a/test/warning/require_const_false.cpp
+++ b/test/warning/require_const_false.cpp
@@ -4,7 +4,7 @@
 
 using namespace Halide;
 
-void halide_error(void *ctx, const char *msg) {
+void my_error(JITUserContext *ctx, const char *msg) {
     // Emitting "error.*:" to stdout or stderr will cause CMake to report the
     // test as a failure on Windows, regardless of error code returned,
     // hence the abbreviation to "err".
@@ -24,7 +24,7 @@ int main(int argc, char **argv) {
     f(x) = require((p1 + p2) == kPrime1,
                    (p1 + p2) * kPrime2,
                    "The parameters should add to exactly", kPrime1, "but were", p1, p2);
-    f.set_error_handler(&halide_error);
+    f.jit_handlers().custom_error = my_error;
     result = f.realize({1});
 
     return 0;