Skip to content

Commit

Permalink
A few processor detection/features tweaks (#36831)
Browse files Browse the repository at this point in the history
* Missing feature from Apple A13
* Enable Cortex-A78 and Cortex-X1 on LLVM 11

    llvm/llvm-project@954db63
    https://reviews.llvm.org/D83206

* More relaxed Zen detection: treat all family 23 as Zen* and treat all model >= 0x30 as Zen2.

    GCC uses a similar fallback structure albeit based on feature.
    This should still generate **correct** code since that is always controlled by
    available features. It should be as good a scheduling model estimate as anything else.

    Fix #36826
  • Loading branch information
yuyichao authored Jul 30, 2020
1 parent 710cf28 commit cd3fb4d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 11 deletions.
14 changes: 7 additions & 7 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,8 @@ constexpr auto arm_cortex_a73 = armv8a_crc;
constexpr auto arm_cortex_a75 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16);
constexpr auto arm_cortex_a76 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
constexpr auto arm_cortex_a77 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
constexpr auto arm_neoverse_e1 = armv8_2a | get_feature_masks(rcpc, fullfp16, ssbs);
constexpr auto arm_neoverse_n1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
constexpr auto arm_zeus = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
Expand Down Expand Up @@ -336,7 +336,7 @@ constexpr auto apple_a7 = armv8a_crc_crypto;
constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fullfp16, sha3);
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
constexpr auto apple_s4 = apple_a12;
constexpr auto apple_s5 = apple_a12;

Expand All @@ -363,8 +363,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
{"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
{"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, UINT32_MAX, Feature::arm_cortex_a78},
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, UINT32_MAX, Feature::arm_cortex_x1},
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
{"neoverse-e1", CPU::arm_neoverse_e1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_e1},
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
{"zeus", CPU::arm_zeus, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_zeus},
Expand Down Expand Up @@ -637,8 +637,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
{"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
{"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, UINT32_MAX, Feature::arm_cortex_a78},
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, UINT32_MAX, Feature::arm_cortex_x1},
{"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
{"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
{"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
{"denver1", CPU::nvidia_denver1, CPU::arm_cortex_a53, UINT32_MAX, Feature::nvidia_denver1},
{"denver2", CPU::nvidia_denver2, CPU::arm_cortex_a57, UINT32_MAX, Feature::nvidia_denver2},
Expand Down
10 changes: 6 additions & 4 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,11 +539,13 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
case 22:
return CPU::amd_btver2;
case 23:
if ((model >= 0x30 && model <= 0x3f) || model == 0x71)
// Known models:
// Zen: 1, 17
// Zen+: 8, 24
// Zen2: 96, 113
if (model >= 0x30)
return CPU::amd_znver2;
if (model <= 0x0f)
return CPU::amd_znver1;
return CPU::amd_btver1;
return CPU::amd_znver1;
}
}

Expand Down

0 comments on commit cd3fb4d

Please sign in to comment.