diff --git a/src/Target.cpp b/src/Target.cpp index a9358d3a900a..06a02ccdb014 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -167,6 +167,7 @@ Target calculate_host_target() { bool use_64_bits = (sizeof(size_t) == 8); int bits = use_64_bits ? 64 : 32; + int vector_bits = 0; Target::Processor processor = Target::Processor::ProcessorGeneric; std::vector initial_features; @@ -296,7 +297,7 @@ Target calculate_host_target() { #endif #endif - return {os, arch, bits, processor, initial_features}; + return {os, arch, bits, processor, initial_features, vector_bits}; } bool is_using_hexagon(const Target &t) { @@ -539,6 +540,18 @@ bool lookup_feature(const std::string &tok, Target::Feature &result) { return false; } +int parse_vector_bits(const std::string &tok) { + if (tok.find("vector_bits_") == 0) { + std::string num = tok.substr(sizeof("vector_bits_") - 1, std::string::npos); + size_t end_index; + int parsed = std::stoi(num, &end_index); + if (end_index == num.size()) { + return parsed; + } + } + return -1; +} + } // End anonymous namespace Target get_target_from_environment() { @@ -601,6 +614,7 @@ bool merge_string(Target &t, const std::string &target) { for (size_t i = 0; i < tokens.size(); i++) { const string &tok = tokens[i]; Target::Feature feature; + int vector_bits; if (tok == "host") { if (i > 0) { @@ -636,6 +650,8 @@ bool merge_string(Target &t, const std::string &target) { } else if (tok == "trace_all") { t.set_features({Target::TraceLoads, Target::TraceStores, Target::TraceRealizations}); features_specified = true; + } else if ((vector_bits = parse_vector_bits(tok)) >= 0) { + t.vector_bits = vector_bits; } else { return false; } @@ -801,6 +817,10 @@ std::string Target::to_string() const { if (has_feature(Target::TraceLoads) && has_feature(Target::TraceStores) && has_feature(Target::TraceRealizations)) { result = Internal::replace_all(result, "trace_loads-trace_realizations-trace_stores", "trace_all"); } + if (vector_bits != 0) { + result += "-vector_bits_" + std::to_string(vector_bits); + } + return result; } @@ -1061,7 +1081,15 @@ int Target::natural_vector_size(const Halide::Type &t) const { const bool is_integer = t.is_int() || t.is_uint(); const int data_size = t.bytes(); - if (arch == Target::Hexagon) { + if (arch == Target::ARM) { + if (vector_bits != 0 && + (has_feature(Halide::Target::SVE2) || + (t.is_float() && has_feature(Halide::Target::SVE)))) { + return vector_bits / (data_size * 8); + } else { + return 16 / data_size; + } + } else if (arch == Target::Hexagon) { if (is_integer) { if (has_feature(Halide::Target::HVX)) { return 128 / data_size; @@ -1103,6 +1131,13 @@ int Target::natural_vector_size(const Halide::Type &t) const { // No vectors, sorry. return 1; } + } else if (arch == Target::RISCV) { + if (vector_bits != 0 && + has_feature(Halide::Target::RVV)) { + return vector_bits / (data_size * 8); + } else { + return 1; + } } else { // Assume 128-bit vectors on other targets. return 16 / data_size; @@ -1310,6 +1345,12 @@ void target_test() { } } + internal_assert(Target().vector_bits == 0) << "Default Target vector_bits not 0.\n"; + internal_assert(Target("arm-64-linux-sve2-vector_bits_512").vector_bits == 512) << "Vector bits not parsed correctly.\n"; + Target with_vector_bits(Target::Linux, Target::ARM, 64, Target::ProcessorGeneric, {Target::SVE}, 512); + internal_assert(with_vector_bits.vector_bits == 512) << "Vector bits not populated in constructor.\n"; + internal_assert(Target(with_vector_bits.to_string()).vector_bits == 512) << "Vector bits not round tripped properly.\n"; + std::cout << "Target test passed" << std::endl; } diff --git a/src/Target.h b/src/Target.h index 06526ccc6dfa..b1c02b68c29e 100644 --- a/src/Target.h +++ b/src/Target.h @@ -50,6 +50,11 @@ struct Target { /** The bit-width of the target machine. Must be 0 for unknown, or 32 or 64. */ int bits = 0; + /** The bit-width of a vector register for targets where this is configurable and + * targeting a fixed size is desired. The default of 0 indicates no assumption of + * fixed size is allowed. */ + int vector_bits = 0; + /** The specific processor to be targeted, tuned for. * Corresponds to processor_name_map in Target.cpp. * @@ -159,8 +164,9 @@ struct Target { FeatureEnd = halide_target_feature_end }; Target() = default; - Target(OS o, Arch a, int b, Processor pt, const std::vector &initial_features = std::vector()) - : os(o), arch(a), bits(b), processor_tune(pt) { + Target(OS o, Arch a, int b, Processor pt, const std::vector &initial_features = std::vector(), + int vb = 0) + : os(o), arch(a), bits(b), vector_bits(vb), processor_tune(pt) { for (const auto &f : initial_features) { set_feature(f); }