Skip to content

Commit

Permalink
Add TorchAO speedup metric vs eager (#6178)
Browse files Browse the repository at this point in the history
Addresses the first part of
#6176

This PR adds another speedup metric vs eager. Because this is TorchAO
dashboard, I think it's more appropriate to show TorchAO vs compile and
TorchAO vs eager instead of TorchAO vs compile and compile vs eager
because the last one (compile vs eager) is a fit for PT2 inductor
dashboard instead. @jerryzh168 What do you think?

I also fix another UX issue to show the oldest commit in the time range
as the base commit instead.

### Testing


https://torchci-git-fork-huydhn-improve-ao-speedup-metric-fbopensource.vercel.app/benchmark/llms?startTime=Thu%2C%2009%20Jan%202025%2010%3A21%3A42%20GMT&stopTime=Thu%2C%2016%20Jan%202025%2010%3A21%3A42%20GMT&granularity=day&lBranch=main&lCommit=2cddc67fe700579043e3e2d395d983764298b82e9746e9b2663c583710b3b08c&rBranch=main&rCommit=399034112cd82562f0d651bda8a8b5ab8840703ee0b40cd136d85181164d2280&repoName=pytorch%2Fao&modelName=All%20Models&backendName=All%20Backends&dtypeName=All%20DType&deviceName=All%20Devices
  • Loading branch information
huydhn authored Jan 16, 2025
1 parent 62ce219 commit cb2e2d9
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 32 deletions.
6 changes: 6 additions & 0 deletions torchci/clickhouse_queries/oss_ci_benchmark_llms/query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ WITH benchmarks AS (
tupleElement(o.benchmark, 'extra_info') [ 'arch' ],
tupleElement(o.runners [ 1 ], 'type')
) AS arch,
IF(
tupleElement(o.benchmark, 'extra_info') [ 'compile' ] = '',
'true', -- Default to true
tupleElement(o.benchmark, 'extra_info') [ 'compile' ]
) AS use_torch_compile,
DATE_TRUNC(
{granularity: String },
fromUnixTimestamp(o.timestamp)
Expand Down Expand Up @@ -71,6 +76,7 @@ SELECT
dtype,
device,
arch,
toBool(use_torch_compile) AS use_torch_compile,
granularity_bucket
FROM
benchmarks
Expand Down
21 changes: 14 additions & 7 deletions torchci/components/benchmark/llms/ModelGraphPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import {
import { TIME_FIELD_NAME } from "components/benchmark/common";
import {
DEFAULT_DEVICE_NAME,
DEFAULT_DTYPE_NAME,
DEFAULT_MODEL_NAME,
LLMsBenchmarkData,
METRIC_DISPLAY_HEADERS,
Expand All @@ -18,7 +17,10 @@ import {
TimeSeriesPanelWithData,
} from "components/metrics/panels/TimeSeriesPanel";
import dayjs from "dayjs";
import { computeSpeedup } from "lib/benchmark/aoUtils";
import {
computeSpeedup,
TORCHAO_SPEEDUP_METRIC_NAMES,
} from "lib/benchmark/aoUtils";
import { computeGeomean, useBenchmark } from "lib/benchmark/llmUtils";
import { BranchAndCommit } from "lib/types";

Expand Down Expand Up @@ -64,7 +66,12 @@ export function GraphPanel({
);
}

const dataWithSpeedup = computeSpeedup(repoName, data);
const dataWithSpeedup = computeSpeedup(
repoName,
computeSpeedup(repoName, data, false, true),
true,
false
);

// Clamp to the nearest granularity (e.g. nearest hour) so that the times will
// align with the data we get from the database
Expand All @@ -80,8 +87,10 @@ export function GraphPanel({
const chartData: { [k: string]: any } = {};
const graphSeries: { [k: string]: any } = {};
metricNames.forEach((metric: string) => {
// TODO (huydhn): Only display aggregated speedup metric for now
if (modelName === DEFAULT_MODEL_NAME && metric !== "speedup") {
if (
modelName === DEFAULT_MODEL_NAME &&
!TORCHAO_SPEEDUP_METRIC_NAMES.includes(metric)
) {
chartData[metric] = [];
return;
}
Expand Down Expand Up @@ -115,8 +124,6 @@ export function GraphPanel({
.filter((record: LLMsBenchmarkData) => {
return (
record.model === modelName &&
(record.dtype === dtypeName ||
dtypeName === DEFAULT_DTYPE_NAME) &&
(`${record.device} (${record.arch})` === deviceName ||
deviceName === DEFAULT_DEVICE_NAME) &&
record.metric === metric
Expand Down
5 changes: 4 additions & 1 deletion torchci/components/benchmark/llms/common.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
token_per_sec: "Token per second",
flops_utilization: "FLOPs utilization",
"compilation_time(s)": "Compilation Time (s)",
speedup: "Speedup",
compile_vs_eager_speedup: "Compile vs eager speedup",
autoquant_vs_compile_speedup: "Autoquant vs compile speedup",
eager_speedup: "Eager speedup",
};
// The variable name is a bit dumb, but it tells if a higher metric value
// is good or bad so that we can highlight it on the dashboard accordingly.
Expand Down Expand Up @@ -53,6 +55,7 @@ export interface LLMsBenchmarkData {
device: string;
arch: string;
display?: string;
use_torch_compile?: boolean;
}

export interface BranchAndCommitPerfData extends BranchAndCommit {
Expand Down
104 changes: 89 additions & 15 deletions torchci/lib/benchmark/aoUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,17 @@ export const TORCHAO_BASELINE = "noquant";
// here on the dashboard
const SPEEDUP_METRICS = ["tok/s", "time_ms(avg)", "time_s(avg)", "img_s(avg)"];

export const TORCHAO_SPEEDUP_METRIC_NAMES = [
"autoquant_vs_compile_speedup",
"compile_vs_eager_speedup",
"eager_speedup",
];
// Different speedup metrics, the key is quantization-torch.compile
export const TORCHAO_SPEEDUP_METRIC_NAMES_MAPPING: { [key: string]: string } = {
"noquant-false": "compile_vs_eager_speedup",
"-true": "autoquant_vs_compile_speedup",
};

// TODO (huydhn): Use this function to convert the generic benchmark data to the old
// CompilerPerformanceData format. This is needed until the TorchInductor dashboard
// is migrated to the new format
Expand Down Expand Up @@ -54,48 +65,111 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) {
return Object.values(convertData);
}

export function computeSpeedup(repoName: string, data: LLMsBenchmarkData[]) {
export function computeSpeedup(
repoName: string,
data: LLMsBenchmarkData[],
useTorchCompile: boolean,
usebaseCommitbaseline: boolean
) {
if (repoName !== TORCHAO_REPO) {
return data;
}

const baselineMetrics: { [key: string]: LLMsBenchmarkData } = {};
// https://github.com/pytorch/test-infra/pull/6178#issuecomment-2596338457, we want
// to show 3 different speedup in AO:
// - Current eager perf vs base commit eager
const baseCommitBaseline: { [key: string]: LLMsBenchmarkData } = {};
// - Current compile perf vs current eager
// - Current autoquant perf vs current compile
const currentCommitBaseline: { [key: string]: LLMsBenchmarkData } = {};

data.forEach((r: LLMsBenchmarkData) => {
if (r.dtype !== TORCHAO_BASELINE) {
if (
r.dtype !== TORCHAO_BASELINE ||
r.use_torch_compile !== useTorchCompile
) {
return;
}

const k = `${r.workflow_id} ${r.job_id} ${r.model} ${r.metric} ${r.device} ${r.arch}`;
baselineMetrics[k] = r;
const baseCommitKey = `${r.model} ${r.metric} ${r.device} ${r.arch}`;
const currentCommitKey = `${r.workflow_id} ${r.job_id} ${baseCommitKey}`;

// To compare against the current commit
currentCommitBaseline[currentCommitKey] = r;

// To compare against the oldest base commit
if (
!usebaseCommitbaseline ||
(baseCommitKey in baseCommitBaseline &&
baseCommitBaseline[baseCommitKey].workflow_id < r.workflow_id)
) {
return;
}
baseCommitBaseline[baseCommitKey] = r;
});

const withSpeedup: LLMsBenchmarkData[] = [];
data.forEach((r: LLMsBenchmarkData) => {
if (r.dtype === TORCHAO_BASELINE) {
return;
withSpeedup.push(r);

// Compute eager speedup vs the base commit baseline
if (r.dtype === TORCHAO_BASELINE && r.use_torch_compile === false) {
if (SPEEDUP_METRICS.includes(r.metric)) {
const k = `${r.model} ${r.metric} ${r.device} ${r.arch}`;
if (
k in baseCommitBaseline &&
baseCommitBaseline[k].actual !== 0 &&
r.actual !== 0 &&
baseCommitBaseline[k].workflow_id <= r.workflow_id
) {
const speedup = r.metric.includes("time")
? baseCommitBaseline[k].actual / r.actual
: r.actual / baseCommitBaseline[k].actual;

withSpeedup.push({
...r,
metric: "eager_speedup",
actual: Number(speedup.toFixed(2)),
target: 0,
});
}
}
}

if (SPEEDUP_METRICS.includes(r.metric)) {
const k = `${r.workflow_id} ${r.job_id} ${r.model} ${r.metric} ${r.device} ${r.arch}`;
if (
k in baselineMetrics &&
baselineMetrics[k].actual !== 0 &&
k in currentCommitBaseline &&
currentCommitBaseline[k].actual !== 0 &&
r.actual !== 0
) {
const speedup = r.metric.includes("time")
? baselineMetrics[k].actual / r.actual
: r.actual / baselineMetrics[k].actual;
? currentCommitBaseline[k].actual / r.actual
: r.actual / currentCommitBaseline[k].actual;

const speedupMetricName =
r.dtype === TORCHAO_BASELINE
? // Compile vs eager
r !== currentCommitBaseline[k]
? TORCHAO_SPEEDUP_METRIC_NAMES_MAPPING[
`${r.dtype}-${useTorchCompile}`
]
: ""
: // Autoquant vs compile or vs eager
TORCHAO_SPEEDUP_METRIC_NAMES_MAPPING[`-${useTorchCompile}`];

if (!speedupMetricName) {
return;
}

withSpeedup.push({
...r,
metric: "speedup",
actual: Number(speedup.toFixed(4)),
metric: speedupMetricName,
actual: Number(speedup.toFixed(2)),
target: 0,
});
}
}

withSpeedup.push(r);
});

return withSpeedup;
Expand Down
30 changes: 21 additions & 9 deletions torchci/pages/benchmark/llms.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ import CopyLink from "components/CopyLink";
import GranularityPicker from "components/GranularityPicker";
import { Granularity } from "components/metrics/panels/TimeSeriesPanel";
import dayjs from "dayjs";
import { computeSpeedup, TORCHAO_BASELINE } from "lib/benchmark/aoUtils";
import {
computeSpeedup,
TORCHAO_BASELINE,
TORCHAO_SPEEDUP_METRIC_NAMES,
} from "lib/benchmark/aoUtils";
import { useBenchmark } from "lib/benchmark/llmUtils";
import { fetcher } from "lib/GeneralUtils";
import { BranchAndCommit } from "lib/types";
Expand Down Expand Up @@ -82,11 +86,22 @@ function Report({
);
}

const lDataWithSpeedup = computeSpeedup(repoName, lData);
const rDataWithSpeedup = computeSpeedup(repoName, rData);
const lDataWithSpeedup = computeSpeedup(
repoName,
computeSpeedup(repoName, lData, false, true),
true,
false
);

const rDataWithSpeedup = computeSpeedup(
repoName,
computeSpeedup(repoName, rData, false, true),
true,
false
);

if (repoName === "pytorch/ao") {
metricNames = ["speedup", ...metricNames];
metricNames = [...TORCHAO_SPEEDUP_METRIC_NAMES, ...metricNames];
}

return (
Expand Down Expand Up @@ -288,10 +303,7 @@ export default function Page() {
];
const dtypeNames: string[] = _.compact([
DEFAULT_DTYPE_NAME,
..._.filter(
_.uniq(data.map((r: any) => r.dtype)) as string[],
(r: string) => r !== TORCHAO_BASELINE
),
...(_.uniq(data.map((r: any) => r.dtype)) as string[]),
]);
const metricNames: string[] = _.uniq(data.map((r: any) => r.metric));

Expand Down Expand Up @@ -367,7 +379,7 @@ export default function Page() {
commit={lCommit}
setCommit={setLCommit}
titlePrefix={"Base"}
fallbackIndex={1} // Default to previous commit
fallbackIndex={-1} // Default to oldest commit
timeRange={timeRange}
/>
<Divider orientation="vertical" flexItem>
Expand Down

0 comments on commit cb2e2d9

Please sign in to comment.