From 8c0c07b3fbf02358a1f1a551a409c624c6b6e471 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 26 Mar 2022 17:50:54 +0300 Subject: [PATCH] Move processor tune into it's own enum, out of features --- python_bindings/src/PyEnums.cpp | 27 +++++----- python_bindings/src/PyTarget.cpp | 7 +-- src/CPlusPlusMangle.cpp | 16 +++--- src/CodeGen_X86.cpp | 27 +++++----- src/HexagonOffload.cpp | 2 +- src/Module.cpp | 4 +- src/Target.cpp | 68 +++++++++++++++++++------- src/Target.h | 37 ++++++++------ src/runtime/HalideRuntime.h | 12 ----- test/correctness/simd_op_check_hvx.cpp | 2 +- test/correctness/target.cpp | 16 +++--- 11 files changed, 126 insertions(+), 92 deletions(-) diff --git a/python_bindings/src/PyEnums.cpp b/python_bindings/src/PyEnums.cpp index eef6c2cbd7c9..7e96b60f02a2 100644 --- a/python_bindings/src/PyEnums.cpp +++ b/python_bindings/src/PyEnums.cpp @@ -82,6 +82,21 @@ void define_enums(py::module &m) { .value("RISCV", Target::Arch::RISCV) .value("WebAssembly", Target::Arch::WebAssembly); + py::enum_(m, "TargetProcessor") + .value("TuneGeneric", Target::Processor::ProcessorGeneric) + .value("TuneK8", Target::Processor::K8) + .value("TuneK8_SSE3", Target::Processor::K8_SSE3) + .value("TuneAMDFam10", Target::Processor::AMDFam10) + .value("TuneBtVer1", Target::Processor::BtVer1) + .value("TuneBdVer1", Target::Processor::BdVer1) + .value("TuneBdVer2", Target::Processor::BdVer2) + .value("TuneBdVer3", Target::Processor::BdVer3) + .value("TuneBdVer4", Target::Processor::BdVer4) + .value("TuneBtVer2", Target::Processor::BtVer2) + .value("TuneZnVer1", Target::Processor::ZnVer1) + .value("TuneZnVer2", Target::Processor::ZnVer2) + .value("TuneZnVer3", Target::Processor::ZnVer3); + py::enum_(m, "TargetFeature") .value("JIT", Target::Feature::JIT) .value("Debug", Target::Feature::Debug) @@ -156,18 +171,6 @@ void define_enums(py::module &m) { .value("ARMv81a", Target::Feature::ARMv81a) .value("SanitizerCoverage", Target::Feature::SanitizerCoverage) .value("ProfileByTimer", Target::Feature::ProfileByTimer) - .value("TuneK8", Target::Feature::TuneK8) - .value("TuneK8_SSE3", Target::Feature::TuneK8_SSE3) - .value("TuneAMDFam10", Target::Feature::TuneAMDFam10) - .value("TuneBtVer1", Target::Feature::TuneBtVer1) - .value("TuneBdVer1", Target::Feature::TuneBdVer1) - .value("TuneBdVer2", Target::Feature::TuneBdVer2) - .value("TuneBdVer3", Target::Feature::TuneBdVer3) - .value("TuneBdVer4", Target::Feature::TuneBdVer4) - .value("TuneBtVer2", Target::Feature::TuneBtVer2) - .value("TuneZnVer1", Target::Feature::TuneZnVer1) - .value("TuneZnVer2", Target::Feature::TuneZnVer2) - .value("TuneZnVer3", Target::Feature::TuneZnVer3) .value("FeatureEnd", Target::Feature::FeatureEnd); py::enum_(m, "TypeCode") diff --git a/python_bindings/src/PyTarget.cpp b/python_bindings/src/PyTarget.cpp index 718936332ea9..21f87038bd15 100644 --- a/python_bindings/src/PyTarget.cpp +++ b/python_bindings/src/PyTarget.cpp @@ -23,8 +23,8 @@ void define_target(py::module &m) { py::class_(m, "Target") .def(py::init<>()) .def(py::init()) - .def(py::init()) - .def(py::init>()) + .def(py::init()) + .def(py::init>()) .def("__eq__", [](const Target &value, Target *value2) { return value2 && value == *value2; }) .def("__ne__", [](const Target &value, Target *value2) { return !value2 || value != *value2; }) @@ -32,12 +32,13 @@ void define_target(py::module &m) { .def_readwrite("os", &Target::os) .def_readwrite("arch", &Target::arch) .def_readwrite("bits", &Target::bits) + .def_readwrite("processor", &Target::processor) .def("__repr__", &target_repr) .def("__str__", &Target::to_string) .def("to_string", &Target::to_string) - .def("has_feature", (bool (Target::*)(Target::Feature) const) & Target::has_feature) + .def("has_feature", (bool(Target::*)(Target::Feature) const) & Target::has_feature) .def("features_any_of", &Target::features_any_of, py::arg("features")) .def("features_all_of", &Target::features_all_of, py::arg("features")) diff --git a/src/CPlusPlusMangle.cpp b/src/CPlusPlusMangle.cpp index 05c9d552e68f..37efa6f904b8 100644 --- a/src/CPlusPlusMangle.cpp +++ b/src/CPlusPlusMangle.cpp @@ -942,14 +942,14 @@ void main_tests(const MangleResult *expecteds, const Target &target) { void cplusplus_mangle_test() { Target targets[kTestTargetCount]{ - Target(Target::Linux, Target::X86, 32), - Target(Target::Linux, Target::X86, 64), - Target(Target::OSX, Target::X86, 32), - Target(Target::OSX, Target::X86, 64), - Target(Target::IOS, Target::ARM, 32), - Target(Target::IOS, Target::ARM, 64), - Target(Target::Windows, Target::X86, 32), - Target(Target::Windows, Target::X86, 64)}; + Target(Target::Linux, Target::X86, 32, Target::Processor::ProcessorGeneric), + Target(Target::Linux, Target::X86, 64, Target::Processor::ProcessorGeneric), + Target(Target::OSX, Target::X86, 32, Target::Processor::ProcessorGeneric), + Target(Target::OSX, Target::X86, 64, Target::Processor::ProcessorGeneric), + Target(Target::IOS, Target::ARM, 32, Target::Processor::ProcessorGeneric), + Target(Target::IOS, Target::ARM, 64, Target::Processor::ProcessorGeneric), + Target(Target::Windows, Target::X86, 32, Target::Processor::ProcessorGeneric), + Target(Target::Windows, Target::X86, 64, Target::Processor::ProcessorGeneric)}; MangleResult *expecteds[kTestTargetCount]{ ItaniumABIMangling_main, ItaniumABIMangling_main, ItaniumABIMangling_main, ItaniumABIMangling_main, diff --git a/src/CodeGen_X86.cpp b/src/CodeGen_X86.cpp index 390b061e9507..f91f12045d72 100644 --- a/src/CodeGen_X86.cpp +++ b/src/CodeGen_X86.cpp @@ -687,30 +687,33 @@ void CodeGen_X86::visit(const Store *op) { string CodeGen_X86::mcpu() const { // First, check if any explicit request for tuning exists. - if (target.has_feature(Target::TuneK8)) { + switch (target.processor) { + case Target::Processor::K8: return "k8"; - } else if (target.has_feature(Target::TuneK8_SSE3)) { + case Target::Processor::K8_SSE3: return "k8-sse3"; - } else if (target.has_feature(Target::TuneAMDFam10)) { + case Target::Processor::AMDFam10: return "amdfam10"; - } else if (target.has_feature(Target::TuneBtVer1)) { + case Target::Processor::BtVer1: return "btver1"; - } else if (target.has_feature(Target::TuneBdVer1)) { + case Target::Processor::BdVer1: return "bdver1"; - } else if (target.has_feature(Target::TuneBdVer2)) { + case Target::Processor::BdVer2: return "bdver2"; - } else if (target.has_feature(Target::TuneBdVer3)) { + case Target::Processor::BdVer3: return "bdver3"; - } else if (target.has_feature(Target::TuneBdVer4)) { + case Target::Processor::BdVer4: return "bdver4"; - } else if (target.has_feature(Target::TuneBtVer2)) { + case Target::Processor::BtVer2: return "btver2"; - } else if (target.has_feature(Target::TuneZnVer1)) { + case Target::Processor::ZnVer1: return "znver1"; - } else if (target.has_feature(Target::TuneZnVer2)) { + case Target::Processor::ZnVer2: return "znver2"; - } else if (target.has_feature(Target::TuneZnVer3)) { + case Target::Processor::ZnVer3: return "znver3"; + case Target::Processor::ProcessorGeneric: + break; // Detect "best" CPU from the enabled ISA's. } // And only after that, perform an ad-hoc guess for the tune given features. diff --git a/src/HexagonOffload.cpp b/src/HexagonOffload.cpp index 8ffd1d0c2e4d..05b16a953ec6 100644 --- a/src/HexagonOffload.cpp +++ b/src/HexagonOffload.cpp @@ -967,7 +967,7 @@ class InjectHexagonRpc : public IRMutator { Stmt inject_hexagon_rpc(Stmt s, const Target &host_target, Module &containing_module) { // Make a new target for the device module. - Target target(Target::NoOS, Target::Hexagon, 32); + Target target(Target::NoOS, Target::Hexagon, 32, Target::ProcessorGeneric); // There are two ways of offloading, on device and on host. // In the former we have true QuRT available, while on the // latter we simulate the Hexagon side code with a barebones diff --git a/src/Module.cpp b/src/Module.cpp index ec05676a0ecd..50e7a9787c22 100644 --- a/src/Module.cpp +++ b/src/Module.cpp @@ -615,7 +615,7 @@ void Module::compile(const std::map &output_files) } } debug(1) << "Module.compile(): static_library " << output_files.at(OutputFileType::static_library) << "\n"; - Target base_target(target().os, target().arch, target().bits); + Target base_target(target().os, target().arch, target().bits, target().processor); create_static_library(temp_dir.files(), base_target, output_files.at(OutputFileType::static_library)); } if (contains(output_files, OutputFileType::assembly)) { @@ -923,7 +923,7 @@ void compile_multitarget(const std::string &fn_name, // and add that to the result. if (!base_target.has_feature(Target::NoRuntime)) { // Start with a bare Target, set only the features we know are common to all. - Target runtime_target(base_target.os, base_target.arch, base_target.bits); + Target runtime_target(base_target.os, base_target.arch, base_target.bits, base_target.processor); for (int i = 0; i < Target::FeatureEnd; ++i) { // We never want NoRuntime set here. if (i == Target::NoRuntime) { diff --git a/src/Target.cpp b/src/Target.cpp index fc5f468b3c04..9f2619231bff 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -76,6 +76,7 @@ Target calculate_host_target() { bool use_64_bits = (sizeof(size_t) == 8); int bits = use_64_bits ? 64 : 32; + Target::Processor processor = Target::Processor::ProcessorGeneric; std::vector initial_features; #if __riscv @@ -189,7 +190,7 @@ Target calculate_host_target() { #endif #endif - return {os, arch, bits, initial_features}; + return {os, arch, bits, processor, initial_features}; } bool is_using_hexagon(const Target &t) { @@ -307,6 +308,31 @@ bool lookup_arch(const std::string &tok, Target::Arch &result) { return false; } +const std::map processor_name_map = { + {"tune_generic", Target::Processor::ProcessorGeneric}, + {"tune_k8", Target::Processor::K8}, + {"tune_k8_sse3", Target::Processor::K8_SSE3}, + {"tune_amdfam10", Target::Processor::AMDFam10}, + {"tune_btver1", Target::Processor::BtVer1}, + {"tune_bdver1", Target::Processor::BdVer1}, + {"tune_bdver2", Target::Processor::BdVer2}, + {"tune_bdver3", Target::Processor::BdVer3}, + {"tune_bdver4", Target::Processor::BdVer4}, + {"tune_btver2", Target::Processor::BtVer2}, + {"tune_znver1", Target::Processor::ZnVer1}, + {"tune_znver2", Target::Processor::ZnVer2}, + {"tune_znver3", Target::Processor::ZnVer3}, +}; + +bool lookup_processor(const std::string &tok, Target::Processor &result) { + auto processor_iter = processor_name_map.find(tok); + if (processor_iter != processor_name_map.end()) { + result = processor_iter->second; + return true; + } + return false; +} + const std::map feature_name_map = { {"jit", Target::JIT}, {"debug", Target::Debug}, @@ -386,18 +412,6 @@ const std::map feature_name_map = { {"armv81a", Target::ARMv81a}, {"sanitizer_coverage", Target::SanitizerCoverage}, {"profile_by_timer", Target::ProfileByTimer}, - {"tune_k8", Target::TuneK8}, - {"tune_k8_sse3", Target::TuneK8_SSE3}, - {"tune_amdfam10", Target::TuneAMDFam10}, - {"tune_btver1", Target::TuneBtVer1}, - {"tune_bdver1", Target::TuneBdVer1}, - {"tune_bdver2", Target::TuneBdVer2}, - {"tune_bdver3", Target::TuneBdVer3}, - {"tune_bdver4", Target::TuneBdVer4}, - {"tune_btver2", Target::TuneBtVer2}, - {"tune_znver1", Target::TuneZnVer1}, - {"tune_znver2", Target::TuneZnVer2}, - {"tune_znver3", Target::TuneZnVer3}, // NOTE: When adding features to this map, be sure to update PyEnums.cpp as well. }; @@ -466,7 +480,7 @@ bool merge_string(Target &t, const std::string &target) { } tokens.push_back(rest); - bool os_specified = false, arch_specified = false, bits_specified = false, tune_specified = false, features_specified = false; + bool os_specified = false, arch_specified = false, bits_specified = false, processor_specified = false, features_specified = false; bool is_host = false; for (size_t i = 0; i < tokens.size(); i++) { @@ -496,13 +510,18 @@ bool merge_string(Target &t, const std::string &target) { return false; } os_specified = true; + } else if (lookup_processor(tok, t.processor)) { + if (processor_specified) { + return false; + } + processor_specified = true; } else if (lookup_feature(tok, feature)) { if (tok.substr(0, std::strlen("tune_")) == "tune_") { - if (tune_specified) { + if (processor_specified) { // Only a single tune makes sense. return false; } - tune_specified = true; + processor_specified = true; } t.set_feature(feature); features_specified = true; @@ -560,6 +579,12 @@ void bad_target_string(const std::string &target) { separator = ", "; } separator = ""; + std::string processors; + for (const auto &processor_entry : processor_name_map) { + processors += separator + processor_entry.first; + separator = ", "; + } + separator = ""; // Format the features to go one feature over 70 characters per line, // assume the first line starts with "Features are ". int line_char_start = -(int)sizeof("Features are"); @@ -574,10 +599,11 @@ void bad_target_string(const std::string &target) { } } user_error << "Did not understand Halide target " << target << "\n" - << "Expected format is arch-bits-os-feature1-feature2-...\n" + << "Expected format is arch-bits-os-processor-feature1-feature2-...\n" << "Where arch is: " << architectures << ".\n" << "bits is either 32 or 64.\n" << "os is: " << oses << ".\n" + << "processor is: " << processors << ".\n" << "\n" << "If arch, bits, or os are omitted, they default to the host.\n" << "\n" @@ -647,6 +673,12 @@ std::string Target::to_string() const { break; } } + for (const auto &processor_entry : processor_name_map) { + if (processor_entry.second == processor) { + result += "-" + processor_entry.first; + break; + } + } for (const auto &feature_entry : feature_name_map) { if (has_feature(feature_entry.second)) { result += "-" + feature_entry.first; @@ -1066,7 +1098,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) // Union of features is computed through bitwise-or, and masked away by the features we care about // Intersection of features is computed through bitwise-and and masked away, too. // We merge the bits via bitwise or. - Target output = Target{os, arch, bits}; + Target output = Target{os, arch, bits, processor}; output.features = ((features | other.features) & union_mask) | ((features | other.features) & matching_mask) | ((features & other.features) & intersection_mask); // Pick tight lower bound for CUDA capability. Use fall-through to clear redundant features diff --git a/src/Target.h b/src/Target.h index 6c22c3651d5e..c0d9043e8081 100644 --- a/src/Target.h +++ b/src/Target.h @@ -50,6 +50,24 @@ struct Target { /** The bit-width of the target machine. Must be 0 for unknown, or 32 or 64. */ int bits = 0; + /** The specific processor to be targeted, tuned for. + * Corresponds to processor_name_map in Target.cpp. */ + enum Processor { + ProcessorGeneric = 0, + K8, + K8_SSE3, + AMDFam10, + BtVer1, + BdVer1, + BdVer2, + BdVer3, + BdVer4, + BtVer2, + ZnVer1, + ZnVer2, + ZnVer3, + } processor = ProcessorGeneric; + /** Optional features a target can have. * Corresponds to feature_name_map in Target.cpp. * See definitions in HalideRuntime.h for full information. @@ -133,23 +151,11 @@ struct Target { ARMv81a = halide_target_feature_armv81a, SanitizerCoverage = halide_target_feature_sanitizer_coverage, ProfileByTimer = halide_target_feature_profile_by_timer, - TuneK8 = halide_target_feature_tune_k8, - TuneK8_SSE3 = halide_target_feature_tune_k8_sse3, - TuneAMDFam10 = halide_target_feature_tune_amdfam10, - TuneBtVer1 = halide_target_feature_tune_btver1, - TuneBdVer1 = halide_target_feature_tune_bdver1, - TuneBdVer2 = halide_target_feature_tune_bdver2, - TuneBdVer3 = halide_target_feature_tune_bdver3, - TuneBdVer4 = halide_target_feature_tune_bdver4, - TuneBtVer2 = halide_target_feature_tune_btver2, - TuneZnVer1 = halide_target_feature_tune_znver1, - TuneZnVer2 = halide_target_feature_tune_znver2, - TuneZnVer3 = halide_target_feature_tune_znver3, FeatureEnd = halide_target_feature_end }; Target() = default; - Target(OS o, Arch a, int b, const std::vector &initial_features = std::vector()) - : os(o), arch(a), bits(b) { + Target(OS o, Arch a, int b, Processor p, const std::vector &initial_features = std::vector()) + : os(o), arch(a), bits(b), processor(p) { for (const auto &f : initial_features) { set_feature(f); } @@ -238,6 +244,7 @@ struct Target { return os == other.os && arch == other.arch && bits == other.bits && + processor == other.processor && features == other.features; } @@ -259,7 +266,7 @@ struct Target { /** Convert the Target into a string form that can be reconstituted * by merge_string(), which will always be of the form * - * arch-bits-os-feature1-feature2...featureN. + * arch-bits-os-processor-feature1-feature2...featureN. * * Note that is guaranteed that Target(t1.to_string()) == t1, * but not that Target(s).to_string() == s (since there can be diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h index 8df5c326ae59..6089110420a7 100644 --- a/src/runtime/HalideRuntime.h +++ b/src/runtime/HalideRuntime.h @@ -1348,18 +1348,6 @@ typedef enum halide_target_feature_t { halide_target_feature_armv81a, ///< Enable ARMv8.1-a instructions halide_target_feature_sanitizer_coverage, ///< Enable hooks for SanitizerCoverage support. halide_target_feature_profile_by_timer, ///< Alternative to halide_target_feature_profile using timer interrupt for systems without threads or applicartions that need to avoid them. - halide_target_feature_tune_k8, ///< Tune specifically for the AMD K8 CPU. - halide_target_feature_tune_k8_sse3, ///< Tune specifically for the AMD K8 w/SSE3 CPU. - halide_target_feature_tune_amdfam10, ///< Tune specifically for the AMD FAM10 CPU. - halide_target_feature_tune_btver1, ///< Tune specifically for the AMD BtVer1 CPU. - halide_target_feature_tune_bdver1, ///< Tune specifically for the AMD BdVer1 CPU. - halide_target_feature_tune_bdver2, ///< Tune specifically for the AMD BdVer2 CPU. - halide_target_feature_tune_bdver3, ///< Tune specifically for the AMD BdVer3 CPU. - halide_target_feature_tune_bdver4, ///< Tune specifically for the AMD BdVer4 CPU. - halide_target_feature_tune_btver2, ///< Tune specifically for the AMD BtVer2 CPU. - halide_target_feature_tune_znver1, ///< Tune specifically for the AMD ZnVer1 CPU. - halide_target_feature_tune_znver2, ///< Tune specifically for the AMD ZnVer2 CPU. - halide_target_feature_tune_znver3, ///< Tune specifically for the AMD ZnVer3 CPU. halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing. } halide_target_feature_t; diff --git a/test/correctness/simd_op_check_hvx.cpp b/test/correctness/simd_op_check_hvx.cpp index 8b1b42e94eda..1e9940ce9b3f 100644 --- a/test/correctness/simd_op_check_hvx.cpp +++ b/test/correctness/simd_op_check_hvx.cpp @@ -708,7 +708,7 @@ int main(int argc, char **argv) { printf("host is: %s\n", host.to_string().c_str()); printf("HL_TARGET is: %s\n", hl_target.to_string().c_str()); - Target t(Target::NoOS, Target::Hexagon, 32); + Target t(Target::NoOS, Target::Hexagon, 32, Target::ProcessorGeneric); for (const auto &f : {Target::HVX, Target::HVX_v62, Target::HVX_v65, diff --git a/test/correctness/target.cpp b/test/correctness/target.cpp index 7c575c5233ee..2da8f9a0038f 100644 --- a/test/correctness/target.cpp +++ b/test/correctness/target.cpp @@ -38,7 +38,7 @@ int main(int argc, char **argv) { // } // Full specification round-trip: - t1 = Target(Target::Linux, Target::X86, 32, {Target::SSE41}); + t1 = Target(Target::Linux, Target::X86, 32, Target::ProcessorGeneric, {Target::SSE41}); ts = t1.to_string(); if (ts != "x86-32-linux-sse41") { printf("to_string failure: %s\n", ts.c_str()); @@ -50,7 +50,7 @@ int main(int argc, char **argv) { } // Full specification round-trip, crazy features - t1 = Target(Target::Android, Target::ARM, 32, + t1 = Target(Target::Android, Target::ARM, 32, Target::ProcessorGeneric, {Target::JIT, Target::SSE41, Target::AVX, Target::AVX2, Target::CUDA, Target::OpenCL, Target::OpenGLCompute, Target::Debug}); @@ -99,7 +99,7 @@ int main(int argc, char **argv) { } // with_feature - t1 = Target(Target::Linux, Target::X86, 32, {Target::SSE41}); + t1 = Target(Target::Linux, Target::X86, 32, Target::ProcessorGeneric, {Target::SSE41}); t2 = t1.with_feature(Target::NoAsserts).with_feature(Target::NoBoundsQuery); ts = t2.to_string(); if (ts != "x86-32-linux-no_asserts-no_bounds_query-sse41") { @@ -108,7 +108,7 @@ int main(int argc, char **argv) { } // without_feature - t1 = Target(Target::Linux, Target::X86, 32, {Target::SSE41, Target::NoAsserts}); + t1 = Target(Target::Linux, Target::X86, 32, Target::ProcessorGeneric, {Target::SSE41, Target::NoAsserts}); // Note that NoBoundsQuery wasn't set here, so 'without' is a no-op t2 = t1.without_feature(Target::NoAsserts).without_feature(Target::NoBoundsQuery); ts = t2.to_string(); @@ -119,7 +119,7 @@ int main(int argc, char **argv) { // natural_vector_size // SSE4.1 is 16 bytes wide - t1 = Target(Target::Linux, Target::X86, 32, {Target::SSE41}); + t1 = Target(Target::Linux, Target::X86, 32, Target::ProcessorGeneric, {Target::SSE41}); if (t1.natural_vector_size() != 16) { printf("natural_vector_size failure\n"); return -1; @@ -139,7 +139,7 @@ int main(int argc, char **argv) { // AVX is 32 bytes wide for float, but we treat as only 16 for integral types, // due to suboptimal integer instructions - t1 = Target(Target::Linux, Target::X86, 32, {Target::SSE41, Target::AVX}); + t1 = Target(Target::Linux, Target::X86, 32, Target::ProcessorGeneric, {Target::SSE41, Target::AVX}); if (t1.natural_vector_size() != 16) { printf("natural_vector_size failure\n"); return -1; @@ -158,7 +158,7 @@ int main(int argc, char **argv) { } // AVX2 is 32 bytes wide - t1 = Target(Target::Linux, Target::X86, 32, {Target::SSE41, Target::AVX, Target::AVX2}); + t1 = Target(Target::Linux, Target::X86, 32, Target::ProcessorGeneric, {Target::SSE41, Target::AVX, Target::AVX2}); if (t1.natural_vector_size() != 32) { printf("natural_vector_size failure\n"); return -1; @@ -177,7 +177,7 @@ int main(int argc, char **argv) { } // NEON is 16 bytes wide - t1 = Target(Target::Linux, Target::ARM, 32); + t1 = Target(Target::Linux, Target::ARM, 32, Target::ProcessorGeneric); if (t1.natural_vector_size() != 16) { printf("natural_vector_size failure\n"); return -1;