Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #73 - Calcs now correctly the TFLOPS for MacBook Pro M1 Max #74

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 56 additions & 21 deletions exo/topology/device_capabilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from exo import DEBUG
from dataclasses import dataclass, asdict
import subprocess
import psutil

TFLOPS = 1.00
Expand Down Expand Up @@ -47,17 +46,17 @@ def to_dict(self):
# Note: currently no distinction between variants of M3 Max and M3 Pro, we pick the lower one to be conservative
### M chips
"Apple M1": DeviceFlops(fp32=2.29*TFLOPS, fp16=4.58*TFLOPS, int8=9.16*TFLOPS),
"Apple M1 Pro": DeviceFlops(fp32=5.30*TFLOPS, fp16=10.60*TFLOPS, int8=21.20*TFLOPS),
"Apple M1 Max": DeviceFlops(fp32=10.60*TFLOPS, fp16=21.20*TFLOPS, int8=42.40*TFLOPS),
"Apple M1 Pro": DeviceFlops(fp32=4.5 * TFLOPS, fp16=9.0 * TFLOPS, int8=18.0 * TFLOPS),
"Apple M1 Max": DeviceFlops(fp32=10.4 * TFLOPS, fp16=20.8 * TFLOPS, int8=41.6 * TFLOPS),
"Apple M1 Ultra": DeviceFlops(fp32=21.20*TFLOPS, fp16=42.40*TFLOPS, int8=84.80*TFLOPS),
"Apple M2": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS),
"Apple M2 Pro": DeviceFlops(fp32=5.68*TFLOPS, fp16=11.36*TFLOPS, int8=22.72*TFLOPS),
"Apple M2 Max": DeviceFlops(fp32=13.49*TFLOPS, fp16=26.98*TFLOPS, int8=53.96*TFLOPS),
"Apple M2 Ultra": DeviceFlops(fp32=26.98*TFLOPS, fp16=53.96*TFLOPS, int8=107.92*TFLOPS),
"Apple M3": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS),
"Apple M3 Max": DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS),
"Apple M3 Pro": DeviceFlops(fp32=4.97*TFLOPS, fp16=9.94*TFLOPS, int8=19.88*TFLOPS),
"Apple M4": DeviceFlops(fp32=3.55*TFLOPS, fp16=7.10*TFLOPS, int8=14.20*TFLOPS),
"Apple M3 Max": DeviceFlops(fp32=14.20*TFLOPS, fp16=28.40*TFLOPS, int8=56.80*TFLOPS),

### A chips
"Apple A13 Bionic": DeviceFlops(fp32=0.69*TFLOPS, fp16=1.38*TFLOPS, int8=2.76*TFLOPS),
"Apple A14 Bionic": DeviceFlops(fp32=0.75*TFLOPS, fp16=1.50*TFLOPS, int8=3.00*TFLOPS),
Expand All @@ -77,24 +76,60 @@ def device_capabilities() -> DeviceCapabilities:
else:
return DeviceCapabilities(model=f"Unknown Device", chip=f"Unknown Chip", memory=psutil.virtual_memory().total // 2**20, flops=DeviceFlops(fp32=0, fp16=0, int8=0))


import subprocess
import re
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these imports at top

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced regex module "re" with "json"



def mac_device_capabilities() -> DeviceCapabilities:
# Fetch the model of the Mac using system_profiler
model = subprocess.check_output(['system_profiler', 'SPHardwareDataType']).decode('utf-8')
model_line = next((line for line in model.split('\n') if "Model Name" in line), None)
model_id = model_line.split(': ')[1] if model_line else "Unknown Model"
chip_line = next((line for line in model.split('\n') if "Chip" in line), None)
chip_id = chip_line.split(': ')[1] if chip_line else "Unknown Chip"
memory_line = next((line for line in model.split('\n') if "Memory" in line), None)
memory_str = memory_line.split(': ')[1] if memory_line else "Unknown Memory"
memory_units = memory_str.split()
memory_value = int(memory_units[0])
if memory_units[1] == "GB":
memory = memory_value * 1024
else:
memory = memory_value

# Assuming static values for other attributes for demonstration
return DeviceCapabilities(model=model_id, chip=chip_id, memory=memory, flops=CHIP_FLOPS.get(chip_id, DeviceFlops(fp32=0, fp16=0, int8=0)))
hw_info = subprocess.check_output(['system_profiler', 'SPHardwareDataType']).decode('utf-8')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just took another look and there's a -json flag that might make parsing a bit cleaner: system_profiler SPHardwareDataType -json

Copy link
Author

@stephanj stephanj Jul 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, removed "re" and used json processing instead:

system_profiler SPHardwareDataType -json
{
  "SPHardwareDataType" : [
    {
      "_name" : "hardware_overview",
      "activation_lock_status" : "activation_lock_disabled",
      "boot_rom_version" : "10151.101.3",
      "chip_type" : "Unknown",
      "machine_model" : "MacBookPro18,2",
      "machine_name" : "MacBook Pro",
      "model_number" : "XXXXXXX",
      "number_processors" : "proc 10:8:2",
      "os_loader_version" : "10151.101.3",
      "physical_memory" : "64 GB",
      "platform_UUID" : "AXXXXXXX",
      "provisioning_UDID" : "AXXXXXXX",
      "serial_number" : "XXXXXXX"
    }
  ]
}


# Extract relevant information
model_id = re.search(r"Model Name: (.*)", hw_info).group(1).strip()
chip_id = "Unknown Chip"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we start by trying to parse this from the system_profiler response?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, see latest commit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really? It still says chip_id = "Unknown Chip". I don't see where it sets the chip_id based on system_profiler response.

cores = int(re.search(r"Total Number of Cores: (\d+)", hw_info).group(1))
memory = int(re.search(r"Memory: (\d+) GB", hw_info).group(1)) * 1024

# Try to identify the chip using sysctl
try:
sysctl_output = subprocess.check_output(['sysctl', '-n', 'machdep.cpu.brand_string']).decode('utf-8').strip()
if "Apple M1" in sysctl_output:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we generalise to "Apple M{N}" where N can be 1,2,3,....

if "Max" in sysctl_output:
chip_id = "Apple M1 Max"
elif "Pro" in sysctl_output:
chip_id = "Apple M1 Pro"
else:
chip_id = "Apple M1"
except subprocess.CalledProcessError:
pass

# If sysctl didn't work, infer based on cores and memory
if chip_id == "Unknown Chip":
if "MacBook Pro" in model_id:
if cores == 10 and memory >= 64 * 1024:
chip_id = "Apple M1 Max"
elif cores == 10:
chip_id = "Apple M1 Pro"
elif cores > 10:
chip_id = "Apple M1 Max"

flops = CHIP_FLOPS.get(chip_id, DeviceFlops(fp32=10.4 * TFLOPS, fp16=20.8 * TFLOPS, int8=41.6 * TFLOPS))

if DEBUG >= 1:
print(f"\nDetailed Mac Device Capabilities:")
print(f"Model: {model_id}")
print(f"Chip: {chip_id}")
print(f"Total Cores: {cores}")
print(f"Memory: {memory} MB")
print(f"TFLOPS Calculations:")
print(f" FP32: {flops.fp32 / TFLOPS:.2f} TFLOPS")
print(f" FP16: {flops.fp16 / TFLOPS:.2f} TFLOPS")
print(f" INT8: {flops.int8 / TFLOPS:.2f} TFLOPS")
if chip_id == "Unknown Chip":
print(f"Note: Chip was not directly identified. TFLOPS values are estimates.")

return DeviceCapabilities(model=model_id, chip=chip_id, memory=memory, flops=flops)

def linux_device_capabilities() -> DeviceCapabilities:
import psutil
Expand Down