From 05cd73fca179c8f4bdc4e96b54ca28715b640af9 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Mon, 24 Jul 2023 12:30:18 -0700 Subject: [PATCH 01/11] Fix leaks caused by self-referential parameter constraints --- src/Parameter.cpp | 77 ++++++++++++++++++++++++++++++++++++++--------- src/Parameter.h | 10 +++--- 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/src/Parameter.cpp b/src/Parameter.cpp index e65d763faaf8..bd64cd221b55 100644 --- a/src/Parameter.cpp +++ b/src/Parameter.cpp @@ -3,6 +3,7 @@ #include "Argument.h" #include "Float16.h" #include "IR.h" +#include "IRMutator.h" #include "IROperator.h" namespace Halide { @@ -194,34 +195,80 @@ bool Parameter::defined() const { return contents.defined(); } -void Parameter::set_min_constraint(int dim, Expr e) { +Expr remove_self_references(const Parameter &p, const Expr &e) { + class RemoveSelfReferences : public IRMutator { + using IRMutator::visit; + + Expr visit(const Variable *var) { + if (var->param.same_as(p)) { + internal_assert(starts_with(var->name, p.name() + ".")); + return Variable::make(var->type, var->name); + } else { + internal_assert(!starts_with(var->name, p.name() + ".")); + } + return var; + } + + public: + const Parameter &p; + RemoveSelfReferences(const Parameter &p) + : p(p) { + } + } mutator{p}; + return mutator.mutate(e); +} + +Expr restore_self_references(const Parameter &p, const Expr &e) { + class RestoreSelfReferences : public IRMutator { + using IRMutator::visit; + + Expr visit(const Variable *var) { + if (!var->image.defined() && + !var->param.defined() && + !var->reduction_domain.defined() && + starts_with(var->name, p.name() + ".")) { + return Variable::make(var->type, var->name, p); + } + return var; + } + + public: + const Parameter &p; + RestoreSelfReferences(const Parameter &p) + : p(p) { + } + } mutator{p}; + return mutator.mutate(e); +} + +void Parameter::set_min_constraint(int dim, const Expr &e) { check_is_buffer(); check_dim_ok(dim); - contents->buffer_constraints[dim].min = std::move(e); + contents->buffer_constraints[dim].min = remove_self_references(*this, e); } -void Parameter::set_extent_constraint(int dim, Expr e) { +void Parameter::set_extent_constraint(int dim, const Expr &e) { check_is_buffer(); check_dim_ok(dim); - contents->buffer_constraints[dim].extent = std::move(e); + contents->buffer_constraints[dim].extent = remove_self_references(*this, e); } -void Parameter::set_stride_constraint(int dim, Expr e) { +void Parameter::set_stride_constraint(int dim, const Expr &e) { check_is_buffer(); check_dim_ok(dim); - contents->buffer_constraints[dim].stride = std::move(e); + contents->buffer_constraints[dim].stride = remove_self_references(*this, e); } -void Parameter::set_min_constraint_estimate(int dim, Expr min) { +void Parameter::set_min_constraint_estimate(int dim, const Expr &min) { check_is_buffer(); check_dim_ok(dim); - contents->buffer_constraints[dim].min_estimate = std::move(min); + contents->buffer_constraints[dim].min_estimate = remove_self_references(*this, min); } -void Parameter::set_extent_constraint_estimate(int dim, Expr extent) { +void Parameter::set_extent_constraint_estimate(int dim, const Expr &extent) { check_is_buffer(); check_dim_ok(dim); - contents->buffer_constraints[dim].extent_estimate = std::move(extent); + contents->buffer_constraints[dim].extent_estimate = remove_self_references(*this, extent); } void Parameter::set_host_alignment(int bytes) { @@ -232,31 +279,31 @@ void Parameter::set_host_alignment(int bytes) { Expr Parameter::min_constraint(int dim) const { check_is_buffer(); check_dim_ok(dim); - return contents->buffer_constraints[dim].min; + return restore_self_references(*this, contents->buffer_constraints[dim].min); } Expr Parameter::extent_constraint(int dim) const { check_is_buffer(); check_dim_ok(dim); - return contents->buffer_constraints[dim].extent; + return restore_self_references(*this, contents->buffer_constraints[dim].extent); } Expr Parameter::stride_constraint(int dim) const { check_is_buffer(); check_dim_ok(dim); - return contents->buffer_constraints[dim].stride; + return restore_self_references(*this, contents->buffer_constraints[dim].stride); } Expr Parameter::min_constraint_estimate(int dim) const { check_is_buffer(); check_dim_ok(dim); - return contents->buffer_constraints[dim].min_estimate; + return restore_self_references(*this, contents->buffer_constraints[dim].min_estimate); } Expr Parameter::extent_constraint_estimate(int dim) const { check_is_buffer(); check_dim_ok(dim); - return contents->buffer_constraints[dim].extent_estimate; + return restore_self_references(*this, contents->buffer_constraints[dim].extent_estimate); } int Parameter::host_alignment() const { diff --git a/src/Parameter.h b/src/Parameter.h index 8498952366ac..6b187af8807b 100644 --- a/src/Parameter.h +++ b/src/Parameter.h @@ -124,11 +124,11 @@ class Parameter { /** Get and set constraints for the min, extent, stride, and estimates on * the min/extent. */ //@{ - void set_min_constraint(int dim, Expr e); - void set_extent_constraint(int dim, Expr e); - void set_stride_constraint(int dim, Expr e); - void set_min_constraint_estimate(int dim, Expr min); - void set_extent_constraint_estimate(int dim, Expr extent); + void set_min_constraint(int dim, const Expr &e); + void set_extent_constraint(int dim, const Expr &e); + void set_stride_constraint(int dim, const Expr &e); + void set_min_constraint_estimate(int dim, const Expr &min); + void set_extent_constraint_estimate(int dim, const Expr &extent); void set_host_alignment(int bytes); Expr min_constraint(int dim) const; Expr extent_constraint(int dim) const; From acd4fd28138c335c415695d2b5b90d783d8daae5 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Mon, 24 Jul 2023 12:37:43 -0700 Subject: [PATCH 02/11] Add comment --- src/Parameter.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Parameter.cpp b/src/Parameter.cpp index bd64cd221b55..5e1c77c82625 100644 --- a/src/Parameter.cpp +++ b/src/Parameter.cpp @@ -195,6 +195,10 @@ bool Parameter::defined() const { return contents.defined(); } +// Helper function to remove any references in a parameter constraint to the +// parameter itself, to avoid creating a reference count cycle and causing a +// leak. Note that it's still possible to create a cycle by having two different +// Parameters each have constraints that reference the other. Expr remove_self_references(const Parameter &p, const Expr &e) { class RemoveSelfReferences : public IRMutator { using IRMutator::visit; From f4d72c12dc2069db14733ef302fa4431342b1b75 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Mon, 24 Jul 2023 16:22:42 -0700 Subject: [PATCH 03/11] Add missing overrides --- src/Parameter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parameter.cpp b/src/Parameter.cpp index 5e1c77c82625..ea9e41d51f61 100644 --- a/src/Parameter.cpp +++ b/src/Parameter.cpp @@ -203,7 +203,7 @@ Expr remove_self_references(const Parameter &p, const Expr &e) { class RemoveSelfReferences : public IRMutator { using IRMutator::visit; - Expr visit(const Variable *var) { + Expr visit(const Variable *var) override { if (var->param.same_as(p)) { internal_assert(starts_with(var->name, p.name() + ".")); return Variable::make(var->type, var->name); @@ -226,7 +226,7 @@ Expr restore_self_references(const Parameter &p, const Expr &e) { class RestoreSelfReferences : public IRMutator { using IRMutator::visit; - Expr visit(const Variable *var) { + Expr visit(const Variable *var) override { if (!var->image.defined() && !var->param.defined() && !var->reduction_domain.defined() && From abc3ab0dd8a3081ccbf422cd2eca8f1e9afa7ec5 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Mon, 24 Jul 2023 16:26:47 -0700 Subject: [PATCH 04/11] Fix reported leaks in memoize test by explicitly releasing the shared runtime at the end of the test --- test/correctness/memoize.cpp | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/test/correctness/memoize.cpp b/test/correctness/memoize.cpp index 9acec33647aa..bb3bc3697ecb 100644 --- a/test/correctness/memoize.cpp +++ b/test/correctness/memoize.cpp @@ -59,15 +59,27 @@ extern "C" HALIDE_EXPORT_SYMBOL int computed_eviction_key(int a) { } HalideExtern_1(int, computed_eviction_key, int); -void simple_free(JITUserContext *user_context, void *ptr) { - free(ptr); -} - -void *flakey_malloc(JITUserContext * /* user_context */, size_t x) { +// A flaky allocator. Note that it has to be compatible with halide_free +// though, because halide_free is going to be called by +// memoization_cache_cleanup with a null user_context when we release the jit +// shared runtimes at the endof this test. So it has to be aligned, and it has +// to store the pointer to free just before the returned pointer. +void *flaky_malloc(JITUserContext * /* user_context */, size_t x) { if ((rand() % 4) == 0) { return nullptr; } else { - return malloc(x); + x = (x + 63) & (~31); + void *ptr = aligned_alloc(32, x); + void **ret = (void **)ptr; + ret += 4; + ret[-1] = ptr; + return ret; + } +} + +void simple_free(JITUserContext *user_context, void *ptr) { + if (ptr != nullptr) { + free(((void **)ptr)[-1]); } } @@ -600,7 +612,7 @@ int main(int argc, char **argv) { Pipeline pipe(g); pipe.jit_handlers().custom_error = record_error; - pipe.jit_handlers().custom_malloc = flakey_malloc; + pipe.jit_handlers().custom_malloc = flaky_malloc; pipe.jit_handlers().custom_free = simple_free; int total_errors = 0; @@ -644,7 +656,8 @@ int main(int argc, char **argv) { } } - printf("In 100 attempts with flakey malloc, %d errors and %d full completions occured.\n", total_errors, completed); + printf("In 100 attempts with flaky malloc, %d errors and %d full completions occured.\n", + total_errors, completed); } { @@ -733,6 +746,7 @@ int main(int argc, char **argv) { assert(call_count == 8); } + Internal::JITSharedRuntime::release_all(); printf("Success!\n"); return 0; From 6a9d947dbf9ced34450b21fdd40e365ca96a5fd7 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Mon, 24 Jul 2023 16:29:45 -0700 Subject: [PATCH 05/11] Use const refs for non-mutated args --- src/Dimension.cpp | 22 +++++++++++----------- src/Dimension.h | 10 +++++----- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Dimension.cpp b/src/Dimension.cpp index 78b69d6fd804..4cb80a836a99 100644 --- a/src/Dimension.cpp +++ b/src/Dimension.cpp @@ -50,42 +50,42 @@ Expr Dimension::stride() const { return Variable::make(Int(32), s.str(), param); } -Dimension Dimension::set_extent(Expr extent) { +Dimension Dimension::set_extent(const Expr &extent) { // Propagate constant bounds into estimates as well. if (is_const(extent)) { param.set_extent_constraint_estimate(d, extent); } - param.set_extent_constraint(d, std::move(extent)); + param.set_extent_constraint(d, extent); return *this; } -Dimension Dimension::set_min(Expr min) { +Dimension Dimension::set_min(const Expr &min) { // Propagate constant bounds into estimates as well. if (is_const(min)) { param.set_min_constraint_estimate(d, min); } - param.set_min_constraint(d, std::move(min)); + param.set_min_constraint(d, min); return *this; } -Dimension Dimension::set_stride(Expr stride) { - param.set_stride_constraint(d, std::move(stride)); +Dimension Dimension::set_stride(const Expr &stride) { + param.set_stride_constraint(d, stride); return *this; } -Dimension Dimension::set_bounds(Expr min, Expr extent) { - return set_min(std::move(min)).set_extent(std::move(extent)); +Dimension Dimension::set_bounds(const Expr &min, const Expr &extent) { + return set_min(min).set_extent(extent); } -Dimension Dimension::set_estimate(Expr min, Expr extent) { +Dimension Dimension::set_estimate(const Expr &min, const Expr &extent) { // Update the estimates on the linked Func as well. // (This matters mainly for OutputImageParams.) // Note that while it's possible/legal for a Dimension to have an undefined // Func, you shouldn't ever call set_estimate on such an instance. internal_assert(f.defined()); f.set_estimate(f.args()[d], min, extent); - param.set_min_constraint_estimate(d, std::move(min)); - param.set_extent_constraint_estimate(d, std::move(extent)); + param.set_min_constraint_estimate(d, min); + param.set_extent_constraint_estimate(d, extent); return *this; } diff --git a/src/Dimension.h b/src/Dimension.h index 53585c862100..629511f1341c 100644 --- a/src/Dimension.h +++ b/src/Dimension.h @@ -34,7 +34,7 @@ class Dimension { /** Set the min in a given dimension to equal the given * expression. Setting the mins to zero may simplify some * addressing math. */ - Dimension set_min(Expr min); + Dimension set_min(const Expr &min); /** Set the extent in a given dimension to equal the given * expression. Images passed in that fail this check will generate @@ -57,20 +57,20 @@ class Dimension { im.dim(0).set_extent((im.dim(0).extent()/32)*32); \endcode * tells the compiler that the extent is a multiple of 32. */ - Dimension set_extent(Expr extent); + Dimension set_extent(const Expr &extent); /** Set the stride in a given dimension to equal the given * value. This is particularly helpful to set when * vectorizing. Known strides for the vectorized dimension * generate better code. */ - Dimension set_stride(Expr stride); + Dimension set_stride(const Expr &stride); /** Set the min and extent in one call. */ - Dimension set_bounds(Expr min, Expr extent); + Dimension set_bounds(const Expr &min, const Expr &extent); /** Set the min and extent estimates in one call. These values are only * used by the auto-scheduler and/or the RunGen tool/ */ - Dimension set_estimate(Expr min, Expr extent); + Dimension set_estimate(const Expr &min, const Expr &extent); Expr min_estimate() const; Expr extent_estimate() const; From c03b055ce51691c521b9ae6ef7687cc52623ab75 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 25 Jul 2023 15:46:57 -0700 Subject: [PATCH 06/11] Hopefully fix for windows --- test/correctness/memoize.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/correctness/memoize.cpp b/test/correctness/memoize.cpp index bb3bc3697ecb..534b36bd4288 100644 --- a/test/correctness/memoize.cpp +++ b/test/correctness/memoize.cpp @@ -69,7 +69,11 @@ void *flaky_malloc(JITUserContext * /* user_context */, size_t x) { return nullptr; } else { x = (x + 63) & (~31); +#ifdef _MSC_VER + void *ptr = _aligned_malloc(x, 32); +#else void *ptr = aligned_alloc(32, x); +#endif void **ret = (void **)ptr; ret += 4; ret[-1] = ptr; From 13eb6f154d5f41c30d0e4a599cf2d78952af7455 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Thu, 27 Jul 2023 09:02:05 -0700 Subject: [PATCH 07/11] Fix for 32-bit pointers --- test/correctness/memoize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/correctness/memoize.cpp b/test/correctness/memoize.cpp index 534b36bd4288..d155940d311b 100644 --- a/test/correctness/memoize.cpp +++ b/test/correctness/memoize.cpp @@ -75,7 +75,7 @@ void *flaky_malloc(JITUserContext * /* user_context */, size_t x) { void *ptr = aligned_alloc(32, x); #endif void **ret = (void **)ptr; - ret += 4; + ret += 32 / sizeof(void *); ret[-1] = ptr; return ret; } From c7d30680a06a751a42c645b8753c4341c7f4e4a2 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 1 Aug 2023 12:06:11 -0700 Subject: [PATCH 08/11] Don't use _aligned_malloc It requires _aligned_free, which the runtime aint gonna do --- test/correctness/memoize.cpp | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/test/correctness/memoize.cpp b/test/correctness/memoize.cpp index d155940d311b..31221c6bf28f 100644 --- a/test/correctness/memoize.cpp +++ b/test/correctness/memoize.cpp @@ -59,24 +59,21 @@ extern "C" HALIDE_EXPORT_SYMBOL int computed_eviction_key(int a) { } HalideExtern_1(int, computed_eviction_key, int); -// A flaky allocator. Note that it has to be compatible with halide_free -// though, because halide_free is going to be called by -// memoization_cache_cleanup with a null user_context when we release the jit -// shared runtimes at the endof this test. So it has to be aligned, and it has -// to store the pointer to free just before the returned pointer. +// A flaky allocator. Note that it has to be compatible with halide_free, +// because halide_free is going to be called by memoization_cache_cleanup with a +// null user_context when we release the jit shared runtimes at the end of this +// test. So it has to be aligned, and it has to store the pointer to free just +// before the returned pointer. void *flaky_malloc(JITUserContext * /* user_context */, size_t x) { if ((rand() % 4) == 0) { return nullptr; } else { - x = (x + 63) & (~31); -#ifdef _MSC_VER - void *ptr = _aligned_malloc(x, 32); -#else - void *ptr = aligned_alloc(32, x); -#endif - void **ret = (void **)ptr; - ret += 32 / sizeof(void *); - ret[-1] = ptr; + uint8_t *ptr = (uint8_t *)malloc(x + 40); + uint8_t *ret = ptr + 8; + while ((uintptr_t)ret & 31) { + ret++; + } + ((uint8_t **)ret)[-1] = ptr; return ret; } } From 25472d9461ebeb6c5664ef8ef3e6b4d5c4348c5f Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 1 Aug 2023 12:23:06 -0700 Subject: [PATCH 09/11] Fix other memoize test --- test/correctness/memoize_cloned.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/correctness/memoize_cloned.cpp b/test/correctness/memoize_cloned.cpp index 89e8161ff673..53450eb38a65 100644 --- a/test/correctness/memoize_cloned.cpp +++ b/test/correctness/memoize_cloned.cpp @@ -40,6 +40,8 @@ int main(int argc, char **argv) { return 1; } + Internal::JITSharedRuntime::release_all(); + printf("Success!\n"); return 0; } From 60dabb7dd49f72201646f27dd92f742ab0fc778e Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Fri, 4 Aug 2023 16:46:12 -0700 Subject: [PATCH 10/11] Use runtime built-in malloc/free On windows mixing and matching mallocs and frees doesn't work well. --- test/correctness/memoize.cpp | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/test/correctness/memoize.cpp b/test/correctness/memoize.cpp index 31221c6bf28f..554063ba72cf 100644 --- a/test/correctness/memoize.cpp +++ b/test/correctness/memoize.cpp @@ -59,29 +59,24 @@ extern "C" HALIDE_EXPORT_SYMBOL int computed_eviction_key(int a) { } HalideExtern_1(int, computed_eviction_key, int); +void *(*default_malloc)(JITUserContext *, size_t); +void (*default_free)(JITUserContext *, void *); + // A flaky allocator. Note that it has to be compatible with halide_free, // because halide_free is going to be called by memoization_cache_cleanup with a // null user_context when we release the jit shared runtimes at the end of this // test. So it has to be aligned, and it has to store the pointer to free just // before the returned pointer. -void *flaky_malloc(JITUserContext * /* user_context */, size_t x) { +void *flaky_malloc(JITUserContext *user_context, size_t x) { if ((rand() % 4) == 0) { return nullptr; } else { - uint8_t *ptr = (uint8_t *)malloc(x + 40); - uint8_t *ret = ptr + 8; - while ((uintptr_t)ret & 31) { - ret++; - } - ((uint8_t **)ret)[-1] = ptr; - return ret; + return default_malloc(user_context, x); } } void simple_free(JITUserContext *user_context, void *ptr) { - if (ptr != nullptr) { - free(((void **)ptr)[-1]); - } + return default_free(user_context, ptr); } bool error_occured = false; @@ -597,6 +592,15 @@ int main(int argc, char **argv) { return 0; } else { // Test out of memory handling. + + // Get the runtime's malloc and free. We need to use the ones + // in the runtime to ensure the matching free is called when + // we release all the runtimes at the end. + JITUserContext ctx; + Internal::JITSharedRuntime::populate_jit_handlers(&ctx, JITHandlers{}); + default_malloc = ctx.handlers.custom_malloc; + default_free = ctx.handlers.custom_free; + Param val; Func count_calls; From 0e85be4f069008d57703112484597728f879873c Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Fri, 4 Aug 2023 16:47:58 -0700 Subject: [PATCH 11/11] Fix comment --- test/correctness/memoize.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/correctness/memoize.cpp b/test/correctness/memoize.cpp index 554063ba72cf..29a180852936 100644 --- a/test/correctness/memoize.cpp +++ b/test/correctness/memoize.cpp @@ -62,11 +62,7 @@ HalideExtern_1(int, computed_eviction_key, int); void *(*default_malloc)(JITUserContext *, size_t); void (*default_free)(JITUserContext *, void *); -// A flaky allocator. Note that it has to be compatible with halide_free, -// because halide_free is going to be called by memoization_cache_cleanup with a -// null user_context when we release the jit shared runtimes at the end of this -// test. So it has to be aligned, and it has to store the pointer to free just -// before the returned pointer. +// A flaky allocator that wraps the built-in runtime one. void *flaky_malloc(JITUserContext *user_context, size_t x) { if ((rand() % 4) == 0) { return nullptr;