Skip to content

Commit

Permalink
Move processor tune into it's own enum, out of features
Browse files Browse the repository at this point in the history
  • Loading branch information
LebedevRI committed Mar 26, 2022
1 parent a22f836 commit 8c0c07b
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 92 deletions.
27 changes: 15 additions & 12 deletions python_bindings/src/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,21 @@ void define_enums(py::module &m) {
.value("RISCV", Target::Arch::RISCV)
.value("WebAssembly", Target::Arch::WebAssembly);

py::enum_<Target::Processor>(m, "TargetProcessor")
.value("TuneGeneric", Target::Processor::ProcessorGeneric)
.value("TuneK8", Target::Processor::K8)
.value("TuneK8_SSE3", Target::Processor::K8_SSE3)
.value("TuneAMDFam10", Target::Processor::AMDFam10)
.value("TuneBtVer1", Target::Processor::BtVer1)
.value("TuneBdVer1", Target::Processor::BdVer1)
.value("TuneBdVer2", Target::Processor::BdVer2)
.value("TuneBdVer3", Target::Processor::BdVer3)
.value("TuneBdVer4", Target::Processor::BdVer4)
.value("TuneBtVer2", Target::Processor::BtVer2)
.value("TuneZnVer1", Target::Processor::ZnVer1)
.value("TuneZnVer2", Target::Processor::ZnVer2)
.value("TuneZnVer3", Target::Processor::ZnVer3);

py::enum_<Target::Feature>(m, "TargetFeature")
.value("JIT", Target::Feature::JIT)
.value("Debug", Target::Feature::Debug)
Expand Down Expand Up @@ -156,18 +171,6 @@ void define_enums(py::module &m) {
.value("ARMv81a", Target::Feature::ARMv81a)
.value("SanitizerCoverage", Target::Feature::SanitizerCoverage)
.value("ProfileByTimer", Target::Feature::ProfileByTimer)
.value("TuneK8", Target::Feature::TuneK8)
.value("TuneK8_SSE3", Target::Feature::TuneK8_SSE3)
.value("TuneAMDFam10", Target::Feature::TuneAMDFam10)
.value("TuneBtVer1", Target::Feature::TuneBtVer1)
.value("TuneBdVer1", Target::Feature::TuneBdVer1)
.value("TuneBdVer2", Target::Feature::TuneBdVer2)
.value("TuneBdVer3", Target::Feature::TuneBdVer3)
.value("TuneBdVer4", Target::Feature::TuneBdVer4)
.value("TuneBtVer2", Target::Feature::TuneBtVer2)
.value("TuneZnVer1", Target::Feature::TuneZnVer1)
.value("TuneZnVer2", Target::Feature::TuneZnVer2)
.value("TuneZnVer3", Target::Feature::TuneZnVer3)
.value("FeatureEnd", Target::Feature::FeatureEnd);

py::enum_<halide_type_code_t>(m, "TypeCode")
Expand Down
7 changes: 4 additions & 3 deletions python_bindings/src/PyTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,22 @@ void define_target(py::module &m) {
py::class_<Target>(m, "Target")
.def(py::init<>())
.def(py::init<const std::string &>())
.def(py::init<Target::OS, Target::Arch, int>())
.def(py::init<Target::OS, Target::Arch, int, std::vector<Target::Feature>>())
.def(py::init<Target::OS, Target::Arch, int, Target::Processor>())
.def(py::init<Target::OS, Target::Arch, int, Target::Processor, std::vector<Target::Feature>>())

.def("__eq__", [](const Target &value, Target *value2) { return value2 && value == *value2; })
.def("__ne__", [](const Target &value, Target *value2) { return !value2 || value != *value2; })

.def_readwrite("os", &Target::os)
.def_readwrite("arch", &Target::arch)
.def_readwrite("bits", &Target::bits)
.def_readwrite("processor", &Target::processor)

.def("__repr__", &target_repr)
.def("__str__", &Target::to_string)
.def("to_string", &Target::to_string)

.def("has_feature", (bool (Target::*)(Target::Feature) const) & Target::has_feature)
.def("has_feature", (bool(Target::*)(Target::Feature) const) & Target::has_feature)
.def("features_any_of", &Target::features_any_of, py::arg("features"))
.def("features_all_of", &Target::features_all_of, py::arg("features"))

Expand Down
16 changes: 8 additions & 8 deletions src/CPlusPlusMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -942,14 +942,14 @@ void main_tests(const MangleResult *expecteds, const Target &target) {

void cplusplus_mangle_test() {
Target targets[kTestTargetCount]{
Target(Target::Linux, Target::X86, 32),
Target(Target::Linux, Target::X86, 64),
Target(Target::OSX, Target::X86, 32),
Target(Target::OSX, Target::X86, 64),
Target(Target::IOS, Target::ARM, 32),
Target(Target::IOS, Target::ARM, 64),
Target(Target::Windows, Target::X86, 32),
Target(Target::Windows, Target::X86, 64)};
Target(Target::Linux, Target::X86, 32, Target::Processor::ProcessorGeneric),
Target(Target::Linux, Target::X86, 64, Target::Processor::ProcessorGeneric),
Target(Target::OSX, Target::X86, 32, Target::Processor::ProcessorGeneric),
Target(Target::OSX, Target::X86, 64, Target::Processor::ProcessorGeneric),
Target(Target::IOS, Target::ARM, 32, Target::Processor::ProcessorGeneric),
Target(Target::IOS, Target::ARM, 64, Target::Processor::ProcessorGeneric),
Target(Target::Windows, Target::X86, 32, Target::Processor::ProcessorGeneric),
Target(Target::Windows, Target::X86, 64, Target::Processor::ProcessorGeneric)};
MangleResult *expecteds[kTestTargetCount]{
ItaniumABIMangling_main, ItaniumABIMangling_main,
ItaniumABIMangling_main, ItaniumABIMangling_main,
Expand Down
27 changes: 15 additions & 12 deletions src/CodeGen_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,30 +687,33 @@ void CodeGen_X86::visit(const Store *op) {

string CodeGen_X86::mcpu() const {
// First, check if any explicit request for tuning exists.
if (target.has_feature(Target::TuneK8)) {
switch (target.processor) {
case Target::Processor::K8:
return "k8";
} else if (target.has_feature(Target::TuneK8_SSE3)) {
case Target::Processor::K8_SSE3:
return "k8-sse3";
} else if (target.has_feature(Target::TuneAMDFam10)) {
case Target::Processor::AMDFam10:
return "amdfam10";
} else if (target.has_feature(Target::TuneBtVer1)) {
case Target::Processor::BtVer1:
return "btver1";
} else if (target.has_feature(Target::TuneBdVer1)) {
case Target::Processor::BdVer1:
return "bdver1";
} else if (target.has_feature(Target::TuneBdVer2)) {
case Target::Processor::BdVer2:
return "bdver2";
} else if (target.has_feature(Target::TuneBdVer3)) {
case Target::Processor::BdVer3:
return "bdver3";
} else if (target.has_feature(Target::TuneBdVer4)) {
case Target::Processor::BdVer4:
return "bdver4";
} else if (target.has_feature(Target::TuneBtVer2)) {
case Target::Processor::BtVer2:
return "btver2";
} else if (target.has_feature(Target::TuneZnVer1)) {
case Target::Processor::ZnVer1:
return "znver1";
} else if (target.has_feature(Target::TuneZnVer2)) {
case Target::Processor::ZnVer2:
return "znver2";
} else if (target.has_feature(Target::TuneZnVer3)) {
case Target::Processor::ZnVer3:
return "znver3";
case Target::Processor::ProcessorGeneric:
break; // Detect "best" CPU from the enabled ISA's.
}

// And only after that, perform an ad-hoc guess for the tune given features.
Expand Down
2 changes: 1 addition & 1 deletion src/HexagonOffload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@ class InjectHexagonRpc : public IRMutator {
Stmt inject_hexagon_rpc(Stmt s, const Target &host_target,
Module &containing_module) {
// Make a new target for the device module.
Target target(Target::NoOS, Target::Hexagon, 32);
Target target(Target::NoOS, Target::Hexagon, 32, Target::ProcessorGeneric);
// There are two ways of offloading, on device and on host.
// In the former we have true QuRT available, while on the
// latter we simulate the Hexagon side code with a barebones
Expand Down
4 changes: 2 additions & 2 deletions src/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ void Module::compile(const std::map<OutputFileType, std::string> &output_files)
}
}
debug(1) << "Module.compile(): static_library " << output_files.at(OutputFileType::static_library) << "\n";
Target base_target(target().os, target().arch, target().bits);
Target base_target(target().os, target().arch, target().bits, target().processor);
create_static_library(temp_dir.files(), base_target, output_files.at(OutputFileType::static_library));
}
if (contains(output_files, OutputFileType::assembly)) {
Expand Down Expand Up @@ -923,7 +923,7 @@ void compile_multitarget(const std::string &fn_name,
// and add that to the result.
if (!base_target.has_feature(Target::NoRuntime)) {
// Start with a bare Target, set only the features we know are common to all.
Target runtime_target(base_target.os, base_target.arch, base_target.bits);
Target runtime_target(base_target.os, base_target.arch, base_target.bits, base_target.processor);
for (int i = 0; i < Target::FeatureEnd; ++i) {
// We never want NoRuntime set here.
if (i == Target::NoRuntime) {
Expand Down
68 changes: 50 additions & 18 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Target calculate_host_target() {

bool use_64_bits = (sizeof(size_t) == 8);
int bits = use_64_bits ? 64 : 32;
Target::Processor processor = Target::Processor::ProcessorGeneric;
std::vector<Target::Feature> initial_features;

#if __riscv
Expand Down Expand Up @@ -189,7 +190,7 @@ Target calculate_host_target() {
#endif
#endif

return {os, arch, bits, initial_features};
return {os, arch, bits, processor, initial_features};
}

bool is_using_hexagon(const Target &t) {
Expand Down Expand Up @@ -307,6 +308,31 @@ bool lookup_arch(const std::string &tok, Target::Arch &result) {
return false;
}

const std::map<std::string, Target::Processor> processor_name_map = {
{"tune_generic", Target::Processor::ProcessorGeneric},
{"tune_k8", Target::Processor::K8},
{"tune_k8_sse3", Target::Processor::K8_SSE3},
{"tune_amdfam10", Target::Processor::AMDFam10},
{"tune_btver1", Target::Processor::BtVer1},
{"tune_bdver1", Target::Processor::BdVer1},
{"tune_bdver2", Target::Processor::BdVer2},
{"tune_bdver3", Target::Processor::BdVer3},
{"tune_bdver4", Target::Processor::BdVer4},
{"tune_btver2", Target::Processor::BtVer2},
{"tune_znver1", Target::Processor::ZnVer1},
{"tune_znver2", Target::Processor::ZnVer2},
{"tune_znver3", Target::Processor::ZnVer3},
};

bool lookup_processor(const std::string &tok, Target::Processor &result) {
auto processor_iter = processor_name_map.find(tok);
if (processor_iter != processor_name_map.end()) {
result = processor_iter->second;
return true;
}
return false;
}

const std::map<std::string, Target::Feature> feature_name_map = {
{"jit", Target::JIT},
{"debug", Target::Debug},
Expand Down Expand Up @@ -386,18 +412,6 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"armv81a", Target::ARMv81a},
{"sanitizer_coverage", Target::SanitizerCoverage},
{"profile_by_timer", Target::ProfileByTimer},
{"tune_k8", Target::TuneK8},
{"tune_k8_sse3", Target::TuneK8_SSE3},
{"tune_amdfam10", Target::TuneAMDFam10},
{"tune_btver1", Target::TuneBtVer1},
{"tune_bdver1", Target::TuneBdVer1},
{"tune_bdver2", Target::TuneBdVer2},
{"tune_bdver3", Target::TuneBdVer3},
{"tune_bdver4", Target::TuneBdVer4},
{"tune_btver2", Target::TuneBtVer2},
{"tune_znver1", Target::TuneZnVer1},
{"tune_znver2", Target::TuneZnVer2},
{"tune_znver3", Target::TuneZnVer3},
// NOTE: When adding features to this map, be sure to update PyEnums.cpp as well.
};

Expand Down Expand Up @@ -466,7 +480,7 @@ bool merge_string(Target &t, const std::string &target) {
}
tokens.push_back(rest);

bool os_specified = false, arch_specified = false, bits_specified = false, tune_specified = false, features_specified = false;
bool os_specified = false, arch_specified = false, bits_specified = false, processor_specified = false, features_specified = false;
bool is_host = false;

for (size_t i = 0; i < tokens.size(); i++) {
Expand Down Expand Up @@ -496,13 +510,18 @@ bool merge_string(Target &t, const std::string &target) {
return false;
}
os_specified = true;
} else if (lookup_processor(tok, t.processor)) {
if (processor_specified) {
return false;
}
processor_specified = true;
} else if (lookup_feature(tok, feature)) {
if (tok.substr(0, std::strlen("tune_")) == "tune_") {
if (tune_specified) {
if (processor_specified) {
// Only a single tune makes sense.
return false;
}
tune_specified = true;
processor_specified = true;
}
t.set_feature(feature);
features_specified = true;
Expand Down Expand Up @@ -560,6 +579,12 @@ void bad_target_string(const std::string &target) {
separator = ", ";
}
separator = "";
std::string processors;
for (const auto &processor_entry : processor_name_map) {
processors += separator + processor_entry.first;
separator = ", ";
}
separator = "";
// Format the features to go one feature over 70 characters per line,
// assume the first line starts with "Features are ".
int line_char_start = -(int)sizeof("Features are");
Expand All @@ -574,10 +599,11 @@ void bad_target_string(const std::string &target) {
}
}
user_error << "Did not understand Halide target " << target << "\n"
<< "Expected format is arch-bits-os-feature1-feature2-...\n"
<< "Expected format is arch-bits-os-processor-feature1-feature2-...\n"
<< "Where arch is: " << architectures << ".\n"
<< "bits is either 32 or 64.\n"
<< "os is: " << oses << ".\n"
<< "processor is: " << processors << ".\n"
<< "\n"
<< "If arch, bits, or os are omitted, they default to the host.\n"
<< "\n"
Expand Down Expand Up @@ -647,6 +673,12 @@ std::string Target::to_string() const {
break;
}
}
for (const auto &processor_entry : processor_name_map) {
if (processor_entry.second == processor) {
result += "-" + processor_entry.first;
break;
}
}
for (const auto &feature_entry : feature_name_map) {
if (has_feature(feature_entry.second)) {
result += "-" + feature_entry.first;
Expand Down Expand Up @@ -1066,7 +1098,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
// Union of features is computed through bitwise-or, and masked away by the features we care about
// Intersection of features is computed through bitwise-and and masked away, too.
// We merge the bits via bitwise or.
Target output = Target{os, arch, bits};
Target output = Target{os, arch, bits, processor};
output.features = ((features | other.features) & union_mask) | ((features | other.features) & matching_mask) | ((features & other.features) & intersection_mask);

// Pick tight lower bound for CUDA capability. Use fall-through to clear redundant features
Expand Down
37 changes: 22 additions & 15 deletions src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,24 @@ struct Target {
/** The bit-width of the target machine. Must be 0 for unknown, or 32 or 64. */
int bits = 0;

/** The specific processor to be targeted, tuned for.
* Corresponds to processor_name_map in Target.cpp. */
enum Processor {
ProcessorGeneric = 0,
K8,
K8_SSE3,
AMDFam10,
BtVer1,
BdVer1,
BdVer2,
BdVer3,
BdVer4,
BtVer2,
ZnVer1,
ZnVer2,
ZnVer3,
} processor = ProcessorGeneric;

/** Optional features a target can have.
* Corresponds to feature_name_map in Target.cpp.
* See definitions in HalideRuntime.h for full information.
Expand Down Expand Up @@ -133,23 +151,11 @@ struct Target {
ARMv81a = halide_target_feature_armv81a,
SanitizerCoverage = halide_target_feature_sanitizer_coverage,
ProfileByTimer = halide_target_feature_profile_by_timer,
TuneK8 = halide_target_feature_tune_k8,
TuneK8_SSE3 = halide_target_feature_tune_k8_sse3,
TuneAMDFam10 = halide_target_feature_tune_amdfam10,
TuneBtVer1 = halide_target_feature_tune_btver1,
TuneBdVer1 = halide_target_feature_tune_bdver1,
TuneBdVer2 = halide_target_feature_tune_bdver2,
TuneBdVer3 = halide_target_feature_tune_bdver3,
TuneBdVer4 = halide_target_feature_tune_bdver4,
TuneBtVer2 = halide_target_feature_tune_btver2,
TuneZnVer1 = halide_target_feature_tune_znver1,
TuneZnVer2 = halide_target_feature_tune_znver2,
TuneZnVer3 = halide_target_feature_tune_znver3,
FeatureEnd = halide_target_feature_end
};
Target() = default;
Target(OS o, Arch a, int b, const std::vector<Feature> &initial_features = std::vector<Feature>())
: os(o), arch(a), bits(b) {
Target(OS o, Arch a, int b, Processor p, const std::vector<Feature> &initial_features = std::vector<Feature>())
: os(o), arch(a), bits(b), processor(p) {
for (const auto &f : initial_features) {
set_feature(f);
}
Expand Down Expand Up @@ -238,6 +244,7 @@ struct Target {
return os == other.os &&
arch == other.arch &&
bits == other.bits &&
processor == other.processor &&
features == other.features;
}

Expand All @@ -259,7 +266,7 @@ struct Target {
/** Convert the Target into a string form that can be reconstituted
* by merge_string(), which will always be of the form
*
* arch-bits-os-feature1-feature2...featureN.
* arch-bits-os-processor-feature1-feature2...featureN.
*
* Note that is guaranteed that Target(t1.to_string()) == t1,
* but not that Target(s).to_string() == s (since there can be
Expand Down
12 changes: 0 additions & 12 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1348,18 +1348,6 @@ typedef enum halide_target_feature_t {
halide_target_feature_armv81a, ///< Enable ARMv8.1-a instructions
halide_target_feature_sanitizer_coverage, ///< Enable hooks for SanitizerCoverage support.
halide_target_feature_profile_by_timer, ///< Alternative to halide_target_feature_profile using timer interrupt for systems without threads or applicartions that need to avoid them.
halide_target_feature_tune_k8, ///< Tune specifically for the AMD K8 CPU.
halide_target_feature_tune_k8_sse3, ///< Tune specifically for the AMD K8 w/SSE3 CPU.
halide_target_feature_tune_amdfam10, ///< Tune specifically for the AMD FAM10 CPU.
halide_target_feature_tune_btver1, ///< Tune specifically for the AMD BtVer1 CPU.
halide_target_feature_tune_bdver1, ///< Tune specifically for the AMD BdVer1 CPU.
halide_target_feature_tune_bdver2, ///< Tune specifically for the AMD BdVer2 CPU.
halide_target_feature_tune_bdver3, ///< Tune specifically for the AMD BdVer3 CPU.
halide_target_feature_tune_bdver4, ///< Tune specifically for the AMD BdVer4 CPU.
halide_target_feature_tune_btver2, ///< Tune specifically for the AMD BtVer2 CPU.
halide_target_feature_tune_znver1, ///< Tune specifically for the AMD ZnVer1 CPU.
halide_target_feature_tune_znver2, ///< Tune specifically for the AMD ZnVer2 CPU.
halide_target_feature_tune_znver3, ///< Tune specifically for the AMD ZnVer3 CPU.
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
} halide_target_feature_t;

Expand Down
Loading

0 comments on commit 8c0c07b

Please sign in to comment.