Skip to content

Commit ef7c845

Browse files
A-Sattiankitm3k
authored andcommitted
Update Intel Thread Counts (microsoft#22894)
### Description The default thread count methodology by onnxruntime did not account for new upcoming Intel microarchitectures leading to a suboptimal thread count. Optimizing the thread count for new Intel microarchitectures reveal gains on the majority of models across datatypes and shows gains up to ~1.5x speedup. ### Motivation and Context Applications should run on Intel with the most performant thread configuration for the majority of models. With new microarchitectures, adjusting the thread count methodology is required to take advantage of their differences. <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent 345f3d0 commit ef7c845

File tree

3 files changed

+52
-12
lines changed

3 files changed

+52
-12
lines changed

onnxruntime/core/platform/windows/hardware_core_enumerator.cc

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
// Copyright (c) Microsoft Corporation. All rights reserved.
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

44
#include "hardware_core_enumerator.h"
5+
#include "core/platform/windows/env.h"
56
#include <memory>
67
#include <Windows.h>
78
#include <assert.h>
@@ -83,6 +84,38 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
8384
// # of physical cores = # of P cores + # of E Cores + # of Soc Cores.
8485
// # of logical cores = # of P cores x 2 (if hyper threading is enabled) + # of E cores + # of Soc Cores.
8586
auto cores = GetCoreInfo();
87+
#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__)
88+
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
89+
bool isIntelSpecifiedPlatform = false;
90+
const int kVendorID_IntelSpecifiedPlatformIDs[3] = {
91+
// ExtendedModel, ExtendedFamily, Family Code, and Model Number
92+
0xa06a, // MTL
93+
0xc065, // ARL-H
94+
0xb065 // ARL-U
95+
};
96+
97+
int regs_leaf0[4];
98+
int regs_leaf1[4];
99+
__cpuid(regs_leaf0, 0);
100+
__cpuid(regs_leaf1, 0x1);
101+
102+
auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) && (kVendorID_Intel[2] == regs_leaf0[3]);
103+
104+
for (int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs) {
105+
if ((regs_leaf1[0] >> 4) == intelSpecifiedPlatform) {
106+
isIntelSpecifiedPlatform = true;
107+
}
108+
}
109+
110+
if (isIntel) {
111+
if (isIntelSpecifiedPlatform) {
112+
// We want to exclude cores without an LLC
113+
return cores.LLCCores;
114+
} else {
115+
return cores.PhysicalCores;
116+
}
117+
}
118+
#endif
86119

87120
return cores.LLCCores;
88121
}

tools/ci_build/build.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,10 +1559,6 @@ def generate_build_tree(
15591559
# The "/profile" flag implies "/DEBUG:FULL /DEBUGTYPE:cv,fixup /OPT:REF /OPT:NOICF /INCREMENTAL:NO /FIXED:NO". We set it for satisfying a Microsoft internal compliance requirement. External users
15601560
# do not need to have it.
15611561
ldflags = ["/profile", "/DYNAMICBASE"]
1562-
# Address Sanitizer libs do not have a Qspectre version. So they two cannot be both enabled.
1563-
if not args.enable_address_sanitizer:
1564-
# Also enable a special perf patch that was made for Intel Meteor Lake mobile CPUs
1565-
cflags += ["/Qspectre", "/DONNXRUNTIME_ENABLE_INTEL_METEOR_LAKE_MOBILE_PLATFORM_PERF_PATCH"]
15661562
if config == "Release":
15671563
cflags += ["/O2", "/Ob2", "/DNDEBUG"]
15681564
elif config == "RelWithDebInfo":

winml/lib/Api/HardwareCoreEnumerator.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
// Copyright (c) Microsoft Corporation. All rights reserved.
1+
2+
// Copyright (c) Microsoft Corporation. All rights reserved.
23
// Licensed under the MIT License.
34

45
#include "lib/Api/pch/pch.h"
5-
66
#include "HardwareCoreEnumerator.h"
77

88
namespace WINMLP {
@@ -88,22 +88,33 @@ uint32_t HardwareCoreEnumerator::DefaultIntraOpNumThreads() {
8888

8989
#if !defined(_M_ARM64EC) && !defined(_M_ARM64) && !defined(__aarch64__)
9090
const int kVendorID_Intel[3] = {0x756e6547, 0x6c65746e, 0x49656e69}; // "GenuntelineI"
91+
bool isIntelSpecifiedPlatform = false;
92+
const int kVendorID_IntelSpecifiedPlatformIDs[3] = {
93+
// ExtendedModel,ExtendedFamily,Family Code, and Model Number
94+
0xa06a, // MTL
95+
0xc065, // ARL-H
96+
0xb065 // ARL-U
97+
};
98+
9199
int regs_leaf0[4];
92-
int regs_leaf7[4];
100+
int regs_leaf1[4];
93101
__cpuid(regs_leaf0, 0);
94-
__cpuid(regs_leaf7, 0x7);
102+
__cpuid(regs_leaf1, 0x1);
95103

96104
auto isIntel = (kVendorID_Intel[0] == regs_leaf0[1]) && (kVendorID_Intel[1] == regs_leaf0[2]) &&
97105
(kVendorID_Intel[2] == regs_leaf0[3]);
98106

99-
auto isHybrid = (regs_leaf7[3] & (1 << 15));
107+
for (int intelSpecifiedPlatform : kVendorID_IntelSpecifiedPlatformIDs) {
108+
if ((regs_leaf1[0] >> 4) == intelSpecifiedPlatform) {
109+
isIntelSpecifiedPlatform = true;
110+
}
111+
}
100112

101-
if (isIntel && isHybrid) {
113+
if (isIntel && isIntelSpecifiedPlatform) {
102114
// We want to use the number of physical cores, but exclude cores without an LLC
103115
return cores.LLCCores;
104116
}
105117
#endif
106-
107118
return cores.PhysicalCores;
108119
}
109120

0 commit comments

Comments
 (0)