From ae76f8123942b9b9d56a317fe0edb462bc30db0d Mon Sep 17 00:00:00 2001 From: Prasoon Mishra Date: Wed, 28 Feb 2024 16:04:45 +0530 Subject: [PATCH 1/2] Add sobel in hexagon_benchmarks app for CMake builds Resolved compilation errors caused by the eliminate interleave pass, which changed the instruction from halide.hexagon.pack_satub.vuh to halide.hexagon.trunc_satub.vuh. The latter is only available in v65 or later. This commit ensures compatibility with v65 and later versions. --- apps/hexagon_benchmarks/CMakeLists.txt | 9 ++-- apps/hexagon_benchmarks/process.cpp | 5 +- src/HexagonOptimize.cpp | 65 ++++++++++++++++---------- 3 files changed, 48 insertions(+), 31 deletions(-) diff --git a/apps/hexagon_benchmarks/CMakeLists.txt b/apps/hexagon_benchmarks/CMakeLists.txt index 9cbcc541b76a..c01ad22035bd 100644 --- a/apps/hexagon_benchmarks/CMakeLists.txt +++ b/apps/hexagon_benchmarks/CMakeLists.txt @@ -22,23 +22,24 @@ endmacro() add_generator_and_library(dilate3x3) add_generator_and_library(gaussian5x5) add_generator_and_library(median3x3) +add_generator_and_library(sobel) # Main executable add_executable(process process.cpp) target_compile_options(process PRIVATE $<$:-O2>) if (Halide_TARGET MATCHES "hvx") - target_compile_definitions(process PRIVATE DILATE3X3 GAUSSIAN5X5 MEDIAN3X3 TARGET_HAS_HVX) + target_compile_definitions(process PRIVATE DILATE3X3 GAUSSIAN5X5 MEDIAN3X3 SOBEL TARGET_HAS_HVX) else() - target_compile_definitions(process PRIVATE DILATE3X3 GAUSSIAN5X5 MEDIAN3X3) + target_compile_definitions(process PRIVATE DILATE3X3 GAUSSIAN5X5 MEDIAN3X3 SOBEL) endif() target_link_libraries(process PRIVATE Halide::Tools - dilate3x3 gaussian5x5 median3x3) + dilate3x3 gaussian5x5 median3x3 sobel) # Test that the app actually works! add_test(NAME hexagon_benchmarks COMMAND process -n 1) set_tests_properties(hexagon_benchmarks PROPERTIES LABELS hexagon_benchmarks PASS_REGULAR_EXPRESSION "Success!" - SKIP_REGULAR_EXPRESSION "\\[SKIP\\]") + SKIP_REGULAR_EXPRESSION "\\[SKIP\\]") \ No newline at end of file diff --git a/apps/hexagon_benchmarks/process.cpp b/apps/hexagon_benchmarks/process.cpp index 87a492c577d1..def519963ad0 100644 --- a/apps/hexagon_benchmarks/process.cpp +++ b/apps/hexagon_benchmarks/process.cpp @@ -43,10 +43,11 @@ int main(int argc, char **argv) { Dilate3x3Descriptor dilate3x3_pipeine(W, H); Median3x3Descriptor median3x3_pipeline(W, H); Gaussian5x5Descriptor gaussian5x5_pipeline(W, H); + SobelDescriptor sobel_pipeline(W, H); Conv3x3a32Descriptor conv3x3a32_pipeline(W, H); std::vector pipelines = {&conv3x3a16_pipeline, &dilate3x3_pipeine, &median3x3_pipeline, - &gaussian5x5_pipeline, &conv3x3a32_pipeline}; + &gaussian5x5_pipeline, &sobel_pipeline, &conv3x3a32_pipeline}; for (PipelineDescriptorBase *p : pipelines) { if (!p->defined()) { @@ -85,4 +86,4 @@ int main(int argc, char **argv) { printf("Success!\n"); return 0; -} +} \ No newline at end of file diff --git a/src/HexagonOptimize.cpp b/src/HexagonOptimize.cpp index deabd95d1d1b..15500dfda027 100644 --- a/src/HexagonOptimize.cpp +++ b/src/HexagonOptimize.cpp @@ -1685,6 +1685,14 @@ class EliminateInterleaves : public IRMutator { return true; } + // Indicates the minimum Hexagon Vector Extension (HVX) target version required for using these instructions. + enum class HvxTarget { + v62orLater, // Use for Hexagon v62 target or later + v65orLater, // Use for Hexagon v65 target or later + v66orLater, // Use for Hexagon v66 target or later + }; + HvxTarget hvx_target; + Expr visit(const Call *op) override { vector args(op->args); @@ -1702,27 +1710,27 @@ class EliminateInterleaves : public IRMutator { // does not deinterleave, and then opportunistically select // the interleaving alternative when we can cancel out to the // interleave. - static std::map deinterleaving_alts = { - {"halide.hexagon.pack.vh", "halide.hexagon.trunc.vh"}, - {"halide.hexagon.pack.vw", "halide.hexagon.trunc.vw"}, - {"halide.hexagon.packhi.vh", "halide.hexagon.trunclo.vh"}, - {"halide.hexagon.packhi.vw", "halide.hexagon.trunclo.vw"}, - {"halide.hexagon.pack_satub.vh", "halide.hexagon.trunc_satub.vh"}, - {"halide.hexagon.pack_satub.vuh", "halide.hexagon.trunc_satub.vuh"}, - {"halide.hexagon.pack_sath.vw", "halide.hexagon.trunc_sath.vw"}, - {"halide.hexagon.pack_satuh.vw", "halide.hexagon.trunc_satuh.vw"}, - {"halide.hexagon.pack_satuh.vuw", "halide.hexagon.trunc_satuh.vuw"}, + static std::map> deinterleaving_alts = { + {"halide.hexagon.pack.vh", {HvxTarget::v62orLater, "halide.hexagon.trunc.vh"}}, + {"halide.hexagon.pack.vw", {HvxTarget::v62orLater, "halide.hexagon.trunc.vw"}}, + {"halide.hexagon.packhi.vh", {HvxTarget::v62orLater, "halide.hexagon.trunclo.vh"}}, + {"halide.hexagon.packhi.vw", {HvxTarget::v62orLater, "halide.hexagon.trunclo.vw"}}, + {"halide.hexagon.pack_satub.vh", {HvxTarget::v62orLater, "halide.hexagon.trunc_satub.vh"}}, + {"halide.hexagon.pack_satub.vuh", {HvxTarget::v65orLater, "halide.hexagon.trunc_satub.vuh"}}, + {"halide.hexagon.pack_sath.vw", {HvxTarget::v62orLater, "halide.hexagon.trunc_sath.vw"}}, + {"halide.hexagon.pack_satuh.vw", {HvxTarget::v62orLater, "halide.hexagon.trunc_satuh.vw"}}, + {"halide.hexagon.pack_satuh.vuw", {HvxTarget::v62orLater, "halide.hexagon.trunc_satuh.vuw"}}, }; // The reverse mapping of the above. - static std::map interleaving_alts = { - {"halide.hexagon.trunc.vh", "halide.hexagon.pack.vh"}, - {"halide.hexagon.trunc.vw", "halide.hexagon.pack.vw"}, - {"halide.hexagon.trunclo.vh", "halide.hexagon.packhi.vh"}, - {"halide.hexagon.trunclo.vw", "halide.hexagon.packhi.vw"}, - {"halide.hexagon.trunc_satub.vh", "halide.hexagon.pack_satub.vh"}, - {"halide.hexagon.trunc_sath.vw", "halide.hexagon.pack_sath.vw"}, - {"halide.hexagon.trunc_satuh.vw", "halide.hexagon.pack_satuh.vw"}, + static std::map> interleaving_alts = { + {"halide.hexagon.trunc.vh", {HvxTarget::v62orLater, "halide.hexagon.pack.vh"}}, + {"halide.hexagon.trunc.vw", {HvxTarget::v62orLater, "halide.hexagon.pack.vw"}}, + {"halide.hexagon.trunclo.vh", {HvxTarget::v62orLater, "halide.hexagon.packhi.vh"}}, + {"halide.hexagon.trunclo.vw", {HvxTarget::v62orLater, "halide.hexagon.packhi.vw"}}, + {"halide.hexagon.trunc_satub.vh", {HvxTarget::v62orLater, "halide.hexagon.pack_satub.vh"}}, + {"halide.hexagon.trunc_sath.vw", {HvxTarget::v62orLater, "halide.hexagon.pack_sath.vw"}}, + {"halide.hexagon.trunc_satuh.vw", {HvxTarget::v62orLater, "halide.hexagon.pack_satuh.vw"}}, }; if (is_native_deinterleave(op) && yields_interleave(args[0])) { @@ -1738,7 +1746,7 @@ class EliminateInterleaves : public IRMutator { op->func, op->value_index, op->image, op->param); // Add the interleave back to the result of the call. return native_interleave(expr); - } else if (deinterleaving_alts.find(op->name) != deinterleaving_alts.end() && + } else if (deinterleaving_alts.find(op->name) != deinterleaving_alts.end() && hvx_target == deinterleaving_alts[op->name].first && yields_removable_interleave(args)) { // This call has a deinterleaving alternative, and the // arguments are interleaved, so we should use the @@ -1746,14 +1754,14 @@ class EliminateInterleaves : public IRMutator { for (Expr &i : args) { i = remove_interleave(i); } - return Call::make(op->type, deinterleaving_alts[op->name], args, op->call_type); - } else if (interleaving_alts.count(op->name) && is_native_deinterleave(args[0])) { + return Call::make(op->type, deinterleaving_alts[op->name].second, args, op->call_type); + } else if (interleaving_alts.count(op->name) && hvx_target == interleaving_alts[op->name].first && is_native_deinterleave(args[0])) { // This is an interleaving alternative with a // deinterleave, which can be generated when we // deinterleave storage. Revert back to the interleaving // op so we can remove the deinterleave. Expr arg = args[0].as()->args[0]; - return Call::make(op->type, interleaving_alts[op->name], {arg}, op->call_type, + return Call::make(op->type, interleaving_alts[op->name].second, {arg}, op->call_type, op->func, op->value_index, op->image, op->param); } else if (changed) { return Call::make(op->type, op->name, args, op->call_type, @@ -1896,8 +1904,15 @@ class EliminateInterleaves : public IRMutator { using IRMutator::visit; public: - EliminateInterleaves(int native_vector_bytes) + EliminateInterleaves(const Target &t, int native_vector_bytes) : native_vector_bits(native_vector_bytes * 8), alignment_analyzer(native_vector_bytes) { + if (t.features_any_of({Target::HVX_v65})) { + hvx_target = HvxTarget::v65orLater; + } else if (t.features_any_of({Target::HVX_v66})) { + hvx_target = HvxTarget::v66orLater; + } else { + hvx_target = HvxTarget::v62orLater; + } } }; @@ -2233,7 +2248,7 @@ Stmt optimize_hexagon_instructions(Stmt s, const Target &t) { << s << "\n"; // Try to eliminate any redundant interleave/deinterleave pairs. - s = EliminateInterleaves(t.natural_vector_size(Int(8))).mutate(s); + s = EliminateInterleaves(t, t.natural_vector_size(Int(8))).mutate(s); debug(4) << "Hexagon: Lowering after EliminateInterleaves\n" << s << "\n"; @@ -2246,4 +2261,4 @@ Stmt optimize_hexagon_instructions(Stmt s, const Target &t) { } } // namespace Internal -} // namespace Halide +} // namespace Halide \ No newline at end of file From 587a61556c7db7c7f4271479947edf95977b07f8 Mon Sep 17 00:00:00 2001 From: Prasoon Mishra Date: Wed, 6 Mar 2024 17:57:51 +0530 Subject: [PATCH 2/2] Minor fix to address the issue. --- src/HexagonOptimize.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/HexagonOptimize.cpp b/src/HexagonOptimize.cpp index 15500dfda027..f11fa3348399 100644 --- a/src/HexagonOptimize.cpp +++ b/src/HexagonOptimize.cpp @@ -1746,7 +1746,8 @@ class EliminateInterleaves : public IRMutator { op->func, op->value_index, op->image, op->param); // Add the interleave back to the result of the call. return native_interleave(expr); - } else if (deinterleaving_alts.find(op->name) != deinterleaving_alts.end() && hvx_target == deinterleaving_alts[op->name].first && + } else if (deinterleaving_alts.find(op->name) != deinterleaving_alts.end() && hvx_target >= deinterleaving_alts[op->name].first && + yields_removable_interleave(args)) { // This call has a deinterleaving alternative, and the // arguments are interleaved, so we should use the @@ -1755,7 +1756,7 @@ class EliminateInterleaves : public IRMutator { i = remove_interleave(i); } return Call::make(op->type, deinterleaving_alts[op->name].second, args, op->call_type); - } else if (interleaving_alts.count(op->name) && hvx_target == interleaving_alts[op->name].first && is_native_deinterleave(args[0])) { + } else if (interleaving_alts.count(op->name) && hvx_target >= interleaving_alts[op->name].first && is_native_deinterleave(args[0])) { // This is an interleaving alternative with a // deinterleave, which can be generated when we // deinterleave storage. Revert back to the interleaving