Skip to content

Commit

Permalink
dump deadline slack
Browse files Browse the repository at this point in the history
  • Loading branch information
abcdabcd987 committed Jan 14, 2024
1 parent 634fa87 commit 8dbb8ab
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 15 deletions.
12 changes: 7 additions & 5 deletions tools/bench_dispatcher/run_rankmt_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <condition_variable>
#include <cstdio>
#include <iostream>
#include <limits>
#include <memory>
#include <mutex>
#include <numeric>
Expand Down Expand Up @@ -465,15 +466,16 @@ class DispatcherRunner {
fprintf(f, "BATCHPLAN %d %d %d %d %.9f %.9f\n", plan_id, gpu_idx,
model_idx, batch_size, exec_at, finish_at);

long earliest_recv_ns = std::numeric_limits<long>::max();
for (const auto& q : p.queries()) {
const auto& c = q.query_without_input().clock();
qd[model_idx].push_back(p.exec_time_ns() - c.frontend_recv_ns());
slack[model_idx].push_back(
q.query_without_input().clock().frontend_recv_ns() +
options_.models[model_idx].model_session.latency_sla() *
1000000L -
p.expected_finish_time_ns());
earliest_recv_ns = std::min(earliest_recv_ns, c.frontend_recv_ns());
}
slack[model_idx].push_back(
earliest_recv_ns +
options_.models[model_idx].model_session.latency_sla() * 1000000L -
p.expected_finish_time_ns());
CHECK(p.match_method() == 1 || p.match_method() == 2);
++match_method[(model_idx << 1) | (p.match_method() - 1)];
}
Expand Down
11 changes: 6 additions & 5 deletions tools/nexus_scheduler/run_nexus_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,17 +500,18 @@ class NexusRunner {
fprintf(f, "BATCHPLAN %d %d %d %d %.9f %.9f\n", plan_id, gpu_idx,
model_idx, batch_size, exec_at, finish_at);

long earliest_recv_ns = std::numeric_limits<long>::max();
auto* queries = query_collector_.queries(model_idx);
for (auto q : p.query_ids) {
auto& qctx = queries[q];
qd[model_idx].push_back(p.exec_at.time_since_epoch().count() -
qctx.frontend_recv_ns);
slack[model_idx].push_back(
qctx.frontend_recv_ns +
options_.models[model_idx].model_session.latency_sla() *
1000000L -
p.exec_at.time_since_epoch().count());
earliest_recv_ns = std::min(earliest_recv_ns, qctx.frontend_recv_ns);
}
slack[model_idx].push_back(
earliest_recv_ns +
options_.models[model_idx].model_session.latency_sla() * 1000000L -
p.finish_at.time_since_epoch().count());
}
}

Expand Down
11 changes: 6 additions & 5 deletions tools/shepherd/run_shepherd_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,16 +419,17 @@ class ShepherdRunner {
fprintf(f, "BATCHPLAN %d %d %d %d %.9f %.9f\n", plan_id, gpu_idx,
model_idx, batch_size, exec_at, finish_at);

long earliest_recv_ns = std::numeric_limits<long>::max();
for (auto global_id : p.query_ids) {
auto query_id = global_id - l.global_id_offset;
auto& qctx = l.frontend->queries()[query_id];
qd[model_idx].push_back(p.exec_time_ns() - qctx.frontend_recv_ns);
slack[model_idx].push_back(
qctx.frontend_recv_ns +
options_.models[model_idx].model_session.latency_sla() *
1000000L -
p.expected_finish_time_ns());
earliest_recv_ns = std::min(earliest_recv_ns, qctx.frontend_recv_ns);
}
slack[model_idx].push_back(
earliest_recv_ns +
options_.models[model_idx].model_session.latency_sla() * 1000000L -
p.expected_finish_time_ns());
}
}

Expand Down

0 comments on commit 8dbb8ab

Please sign in to comment.