-
Notifications
You must be signed in to change notification settings - Fork 502
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix #73 - Calcs now correctly the TFLOPS for MacBook Pro M1 Max #74
base: main
Are you sure you want to change the base?
Changes from 1 commit
1590ebf
7445f4d
d678d95
39712a8
196c763
bca4575
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
from exo import DEBUG | ||
from dataclasses import dataclass, asdict | ||
import subprocess | ||
import psutil | ||
|
||
TFLOPS = 1.00 | ||
|
@@ -47,17 +46,17 @@ def to_dict(self): | |
# Note: currently no distinction between variants of M3 Max and M3 Pro, we pick the lower one to be conservative | ||
### M chips | ||
"Apple M1": DeviceFlops(fp32=2.29*TFLOPS, fp16=4.58*TFLOPS, int8=9.16*TFLOPS), | ||
"Apple M1 Pro": DeviceFlops(fp32=5.30*TFLOPS, fp16=10.60*TFLOPS, int8=21.20*TFLOPS), | ||
"Apple M1 Max": DeviceFlops(fp32=10.60*TFLOPS, fp16=21.20*TFLOPS, int8=42.40*TFLOPS), | ||
"Apple M1 Pro": DeviceFlops(fp32=4.5 * TFLOPS, fp16=9.0 * TFLOPS, int8=18.0 * TFLOPS), | ||
"Apple M1 Max": DeviceFlops(fp32=10.4 * TFLOPS, fp16=20.8 * TFLOPS, int8=41.6 * TFLOPS), | ||
"Apple M1 Ultra": DeviceFlops(fp32=21.20*TFLOPS, fp16=42.40*TFLOPS, int8=84.80*TFLOPS), | ||
"Apple M2": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), | ||
"Apple M2 Pro": DeviceFlops(fp32=5.68*TFLOPS, fp16=11.36*TFLOPS, int8=22.72*TFLOPS), | ||
"Apple M2 Max": DeviceFlops(fp32=13.49*TFLOPS, fp16=26.98*TFLOPS, int8=53.96*TFLOPS), | ||
"Apple M2 Ultra": DeviceFlops(fp32=26.98*TFLOPS, fp16=53.96*TFLOPS, int8=107.92*TFLOPS), | ||
"Apple M3": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), | ||
"Apple M3 Max": DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS), | ||
"Apple M3 Pro": DeviceFlops(fp32=4.97*TFLOPS, fp16=9.94*TFLOPS, int8=19.88*TFLOPS), | ||
"Apple M4": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS), | ||
"Apple M3 Max": DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS), | ||
|
||
### A chips | ||
"Apple A13 Bionic": DeviceFlops(fp32=0.69*TFLOPS, fp16=1.38*TFLOPS, int8=2.76*TFLOPS), | ||
"Apple A14 Bionic": DeviceFlops(fp32=0.75*TFLOPS, fp16=1.50*TFLOPS, int8=3.00*TFLOPS), | ||
|
@@ -77,24 +76,60 @@ def device_capabilities() -> DeviceCapabilities: | |
else: | ||
return DeviceCapabilities(model=f"Unknown Device", chip=f"Unknown Chip", memory=psutil.virtual_memory().total // 2**20, flops=DeviceFlops(fp32=0, fp16=0, int8=0)) | ||
|
||
|
||
import subprocess | ||
import re | ||
|
||
|
||
def mac_device_capabilities() -> DeviceCapabilities: | ||
# Fetch the model of the Mac using system_profiler | ||
model = subprocess.check_output(['system_profiler', 'SPHardwareDataType']).decode('utf-8') | ||
model_line = next((line for line in model.split('\n') if "Model Name" in line), None) | ||
model_id = model_line.split(': ')[1] if model_line else "Unknown Model" | ||
chip_line = next((line for line in model.split('\n') if "Chip" in line), None) | ||
chip_id = chip_line.split(': ')[1] if chip_line else "Unknown Chip" | ||
memory_line = next((line for line in model.split('\n') if "Memory" in line), None) | ||
memory_str = memory_line.split(': ')[1] if memory_line else "Unknown Memory" | ||
memory_units = memory_str.split() | ||
memory_value = int(memory_units[0]) | ||
if memory_units[1] == "GB": | ||
memory = memory_value * 1024 | ||
else: | ||
memory = memory_value | ||
|
||
# Assuming static values for other attributes for demonstration | ||
return DeviceCapabilities(model=model_id, chip=chip_id, memory=memory, flops=CHIP_FLOPS.get(chip_id, DeviceFlops(fp32=0, fp16=0, int8=0))) | ||
hw_info = subprocess.check_output(['system_profiler', 'SPHardwareDataType']).decode('utf-8') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just took another look and there's a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, removed "re" and used json processing instead: system_profiler SPHardwareDataType -json
{
"SPHardwareDataType" : [
{
"_name" : "hardware_overview",
"activation_lock_status" : "activation_lock_disabled",
"boot_rom_version" : "10151.101.3",
"chip_type" : "Unknown",
"machine_model" : "MacBookPro18,2",
"machine_name" : "MacBook Pro",
"model_number" : "XXXXXXX",
"number_processors" : "proc 10:8:2",
"os_loader_version" : "10151.101.3",
"physical_memory" : "64 GB",
"platform_UUID" : "AXXXXXXX",
"provisioning_UDID" : "AXXXXXXX",
"serial_number" : "XXXXXXX"
}
]
} |
||
|
||
# Extract relevant information | ||
model_id = re.search(r"Model Name: (.*)", hw_info).group(1).strip() | ||
chip_id = "Unknown Chip" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we start by trying to parse this from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, see latest commit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Really? It still says |
||
cores = int(re.search(r"Total Number of Cores: (\d+)", hw_info).group(1)) | ||
memory = int(re.search(r"Memory: (\d+) GB", hw_info).group(1)) * 1024 | ||
|
||
# Try to identify the chip using sysctl | ||
try: | ||
sysctl_output = subprocess.check_output(['sysctl', '-n', 'machdep.cpu.brand_string']).decode('utf-8').strip() | ||
if "Apple M1" in sysctl_output: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we generalise to "Apple M{N}" where N can be 1,2,3,.... |
||
if "Max" in sysctl_output: | ||
chip_id = "Apple M1 Max" | ||
elif "Pro" in sysctl_output: | ||
chip_id = "Apple M1 Pro" | ||
else: | ||
chip_id = "Apple M1" | ||
except subprocess.CalledProcessError: | ||
pass | ||
|
||
# If sysctl didn't work, infer based on cores and memory | ||
if chip_id == "Unknown Chip": | ||
if "MacBook Pro" in model_id: | ||
if cores == 10 and memory >= 64 * 1024: | ||
chip_id = "Apple M1 Max" | ||
elif cores == 10: | ||
chip_id = "Apple M1 Pro" | ||
elif cores > 10: | ||
chip_id = "Apple M1 Max" | ||
|
||
flops = CHIP_FLOPS.get(chip_id, DeviceFlops(fp32=10.4 * TFLOPS, fp16=20.8 * TFLOPS, int8=41.6 * TFLOPS)) | ||
|
||
if DEBUG >= 1: | ||
print(f"\nDetailed Mac Device Capabilities:") | ||
print(f"Model: {model_id}") | ||
print(f"Chip: {chip_id}") | ||
print(f"Total Cores: {cores}") | ||
print(f"Memory: {memory} MB") | ||
print(f"TFLOPS Calculations:") | ||
print(f" FP32: {flops.fp32 / TFLOPS:.2f} TFLOPS") | ||
print(f" FP16: {flops.fp16 / TFLOPS:.2f} TFLOPS") | ||
print(f" INT8: {flops.int8 / TFLOPS:.2f} TFLOPS") | ||
if chip_id == "Unknown Chip": | ||
print(f"Note: Chip was not directly identified. TFLOPS values are estimates.") | ||
|
||
return DeviceCapabilities(model=model_id, chip=chip_id, memory=memory, flops=flops) | ||
|
||
def linux_device_capabilities() -> DeviceCapabilities: | ||
import psutil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these imports at top
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replaced regex module "re" with "json"