From 8dbb8ab4e491a2ad8962b6e34e08f520ec7bb841 Mon Sep 17 00:00:00 2001 From: Lequn Chen Date: Sun, 14 Jan 2024 11:05:45 -0800 Subject: [PATCH] dump deadline slack --- tools/bench_dispatcher/run_rankmt_main.cpp | 12 +++++++----- tools/nexus_scheduler/run_nexus_main.cpp | 11 ++++++----- tools/shepherd/run_shepherd_main.cpp | 11 ++++++----- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/tools/bench_dispatcher/run_rankmt_main.cpp b/tools/bench_dispatcher/run_rankmt_main.cpp index 6fa6797..1e2b6bc 100644 --- a/tools/bench_dispatcher/run_rankmt_main.cpp +++ b/tools/bench_dispatcher/run_rankmt_main.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -465,15 +466,16 @@ class DispatcherRunner { fprintf(f, "BATCHPLAN %d %d %d %d %.9f %.9f\n", plan_id, gpu_idx, model_idx, batch_size, exec_at, finish_at); + long earliest_recv_ns = std::numeric_limits::max(); for (const auto& q : p.queries()) { const auto& c = q.query_without_input().clock(); qd[model_idx].push_back(p.exec_time_ns() - c.frontend_recv_ns()); - slack[model_idx].push_back( - q.query_without_input().clock().frontend_recv_ns() + - options_.models[model_idx].model_session.latency_sla() * - 1000000L - - p.expected_finish_time_ns()); + earliest_recv_ns = std::min(earliest_recv_ns, c.frontend_recv_ns()); } + slack[model_idx].push_back( + earliest_recv_ns + + options_.models[model_idx].model_session.latency_sla() * 1000000L - + p.expected_finish_time_ns()); CHECK(p.match_method() == 1 || p.match_method() == 2); ++match_method[(model_idx << 1) | (p.match_method() - 1)]; } diff --git a/tools/nexus_scheduler/run_nexus_main.cpp b/tools/nexus_scheduler/run_nexus_main.cpp index c733b2a..624e637 100644 --- a/tools/nexus_scheduler/run_nexus_main.cpp +++ b/tools/nexus_scheduler/run_nexus_main.cpp @@ -500,17 +500,18 @@ class NexusRunner { fprintf(f, "BATCHPLAN %d %d %d %d %.9f %.9f\n", plan_id, gpu_idx, model_idx, batch_size, exec_at, finish_at); + long earliest_recv_ns = std::numeric_limits::max(); auto* queries = query_collector_.queries(model_idx); for (auto q : p.query_ids) { auto& qctx = queries[q]; qd[model_idx].push_back(p.exec_at.time_since_epoch().count() - qctx.frontend_recv_ns); - slack[model_idx].push_back( - qctx.frontend_recv_ns + - options_.models[model_idx].model_session.latency_sla() * - 1000000L - - p.exec_at.time_since_epoch().count()); + earliest_recv_ns = std::min(earliest_recv_ns, qctx.frontend_recv_ns); } + slack[model_idx].push_back( + earliest_recv_ns + + options_.models[model_idx].model_session.latency_sla() * 1000000L - + p.finish_at.time_since_epoch().count()); } } diff --git a/tools/shepherd/run_shepherd_main.cpp b/tools/shepherd/run_shepherd_main.cpp index d066c0b..19a99f3 100644 --- a/tools/shepherd/run_shepherd_main.cpp +++ b/tools/shepherd/run_shepherd_main.cpp @@ -419,16 +419,17 @@ class ShepherdRunner { fprintf(f, "BATCHPLAN %d %d %d %d %.9f %.9f\n", plan_id, gpu_idx, model_idx, batch_size, exec_at, finish_at); + long earliest_recv_ns = std::numeric_limits::max(); for (auto global_id : p.query_ids) { auto query_id = global_id - l.global_id_offset; auto& qctx = l.frontend->queries()[query_id]; qd[model_idx].push_back(p.exec_time_ns() - qctx.frontend_recv_ns); - slack[model_idx].push_back( - qctx.frontend_recv_ns + - options_.models[model_idx].model_session.latency_sla() * - 1000000L - - p.expected_finish_time_ns()); + earliest_recv_ns = std::min(earliest_recv_ns, qctx.frontend_recv_ns); } + slack[model_idx].push_back( + earliest_recv_ns + + options_.models[model_idx].model_session.latency_sla() * 1000000L - + p.expected_finish_time_ns()); } }