-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
20e008f
commit ab7f0c9
Showing
11 changed files
with
852 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#include "shepherd/common.h" | ||
|
||
#include <gflags/gflags.h> | ||
|
||
using nexus::shepherd::ShepherdConfig; | ||
|
||
DEFINE_uint32(shepherd_dctrl, | ||
ShepherdConfig::Default().ctrl_latency.count() / 1000, | ||
"Shepherd: control plane latency in microseconds."); | ||
DEFINE_uint32(shepherd_ddata, | ||
ShepherdConfig::Default().data_latency.count() / 1000, | ||
"Shepherd: data plane latency in microseconds."); | ||
DEFINE_double(shepherd_preempt_lambda, ShepherdConfig::Default().preempt_lambda, | ||
"Shepherd: lambda for preemptive scheduling."); | ||
|
||
namespace nexus::shepherd { | ||
|
||
ShepherdConfig ShepherdConfig::FromFlags() { | ||
ShepherdConfig config; | ||
config.ctrl_latency = std::chrono::microseconds(FLAGS_shepherd_dctrl); | ||
config.data_latency = std::chrono::microseconds(FLAGS_shepherd_ddata); | ||
config.preempt_lambda = FLAGS_shepherd_preempt_lambda; | ||
return config; | ||
} | ||
|
||
} // namespace nexus::shepherd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#pragma once | ||
#include <memory> | ||
#include <unordered_map> | ||
|
||
namespace nexus::shepherd { | ||
|
||
class FakeShepherdFrontend; | ||
|
||
class FakeObjectAccessor { | ||
public: | ||
std::shared_ptr<FakeShepherdFrontend> GetFrontend(int model_id) { | ||
return frontends_.at(model_id); | ||
} | ||
|
||
void AddFrontend(int model_id, | ||
std::shared_ptr<FakeShepherdFrontend> frontend) { | ||
frontends_.emplace(model_id, frontend); | ||
} | ||
|
||
private: | ||
std::unordered_map<int, std::shared_ptr<FakeShepherdFrontend>> frontends_; | ||
}; | ||
|
||
} // namespace nexus::shepherd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,127 @@ | ||
#include "shepherd/fake_shepherd_backend.h" | ||
|
||
namespace nexus::shepherd {} // namespace nexus::shepherd | ||
#include <glog/logging.h> | ||
|
||
#include "shepherd/common.h" | ||
#include "shepherd/fake_shepherd_frontend.h" | ||
|
||
namespace nexus::shepherd { | ||
|
||
namespace { | ||
|
||
bool HeapOrderBatchPlanByExecTimeASC(const BatchPlan& lhs, | ||
const BatchPlan& rhs) { | ||
return lhs.exec_at > rhs.exec_at; | ||
} | ||
|
||
bool BatchPlanIntersects(const BatchPlan& a, const BatchPlan& b) { | ||
if (a.finish_at <= b.exec_at) return false; | ||
if (b.finish_at <= a.exec_at) return false; | ||
return true; | ||
} | ||
|
||
} // namespace | ||
|
||
FakeShepherdBackend::FakeShepherdBackend(boost::asio::io_context* io_context, | ||
FakeObjectAccessor* accessor, | ||
int gpu_id, bool save_archive) | ||
: io_context_(*CHECK_NOTNULL(io_context)), | ||
accessor_(*CHECK_NOTNULL(accessor)), | ||
gpu_id_(gpu_id), | ||
timer_(io_context_), | ||
save_archive_(save_archive) {} | ||
|
||
void FakeShepherdBackend::Stop() { | ||
timer_.cancel(); | ||
DrainBatchPlans(); | ||
} | ||
|
||
void FakeShepherdBackend::RunBatch(BatchPlan request, Preemption preempt) { | ||
TimePoint now = Clock::now(); | ||
auto now_ns = now.time_since_epoch().count(); | ||
|
||
CHECK_LE(request.exec_time_ns(), request.expected_finish_time_ns()) | ||
<< "Incorrect finish time."; | ||
CHECK_LE(now_ns, request.exec_time_ns()) << "BatchPlan too late."; | ||
|
||
std::lock_guard lock(mutex_); | ||
if (preempt == Preemption::kYes) { | ||
CHECK(!batchplans_.empty()) << "Cannot preempt. No current plan."; | ||
auto old_plan = batchplans_.front(); | ||
CHECK_LE(now_ns, old_plan.expected_finish_time_ns()) | ||
<< "Cannot preempt. Current plan is not running."; | ||
std::pop_heap(batchplans_.begin(), batchplans_.end(), | ||
HeapOrderBatchPlanByExecTimeASC); | ||
batchplans_.pop_back(); | ||
} | ||
|
||
for (const auto& plan : batchplans_) { | ||
CHECK(!BatchPlanIntersects(plan, request)) | ||
<< "Batchplan intersects.\n" | ||
<< "existing plan: exec_time=base" | ||
<< " finish_time=base+" | ||
<< (plan.expected_finish_time_ns() - plan.exec_time_ns()) << "\n" | ||
<< "new plan: exec_time=base+" | ||
<< (request.exec_time_ns() - plan.exec_time_ns()) | ||
<< " finish_time=base+" | ||
<< (request.expected_finish_time_ns() - plan.exec_time_ns()); | ||
} | ||
batchplans_.emplace_back(std::move(request)); | ||
std::push_heap(batchplans_.begin(), batchplans_.end(), | ||
HeapOrderBatchPlanByExecTimeASC); | ||
SetupTimer(); | ||
} | ||
|
||
void FakeShepherdBackend::SetupTimer() { | ||
if (!batchplans_.empty()) { | ||
auto finish_at = batchplans_[0].finish_at; | ||
if (timer_.expiry() != finish_at) { | ||
timer_.expires_at(finish_at); | ||
timer_.async_wait([this](boost::system::error_code ec) { OnTimer(ec); }); | ||
} | ||
} | ||
} | ||
|
||
void FakeShepherdBackend::DrainBatchPlans() { | ||
for (auto& plan : batchplans_) { | ||
OnBatchFinish(plan); | ||
SaveBatchPlan(std::move(plan)); | ||
} | ||
batchplans_.clear(); | ||
} | ||
|
||
void FakeShepherdBackend::OnBatchFinish(const BatchPlan& plan) { | ||
auto frontend = accessor_.GetFrontend(plan.model_id); | ||
frontend->GotBatchReply(plan); | ||
} | ||
|
||
void FakeShepherdBackend::OnTimer(boost::system::error_code ec) { | ||
if (ec) return; | ||
TimePoint now = Clock::now(); | ||
auto now_ns = now.time_since_epoch().count(); | ||
std::vector<BatchPlan> finished_plans; | ||
std::unique_lock lock(mutex_); | ||
while (!batchplans_.empty()) { | ||
if (batchplans_[0].expected_finish_time_ns() > now_ns) { | ||
break; | ||
} | ||
finished_plans.emplace_back(std::move(batchplans_[0])); | ||
std::pop_heap(batchplans_.begin(), batchplans_.end(), | ||
HeapOrderBatchPlanByExecTimeASC); | ||
batchplans_.pop_back(); | ||
} | ||
SetupTimer(); | ||
lock.unlock(); | ||
for (auto& plan : finished_plans) { | ||
OnBatchFinish(plan); | ||
SaveBatchPlan(std::move(plan)); | ||
} | ||
} | ||
|
||
void FakeShepherdBackend::SaveBatchPlan(BatchPlan plan) { | ||
if (save_archive_) { | ||
batchplan_archive_.emplace_back(std::move(plan)); | ||
} | ||
} | ||
|
||
} // namespace nexus::shepherd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,43 @@ | ||
#pragma once | ||
#include <boost/asio/io_context.hpp> | ||
#include <boost/asio/system_timer.hpp> | ||
#include <boost/system/error_code.hpp> | ||
#include <deque> | ||
#include <vector> | ||
|
||
#include "shepherd/common.h" | ||
#include "shepherd/fake_accessor.h" | ||
|
||
namespace nexus::shepherd { | ||
|
||
class FakeShepherdBackend : public BackendStub { | ||
public: | ||
FakeShepherdBackend(boost::asio::io_context* io_context, | ||
FakeObjectAccessor* accessor, int gpu_id, | ||
bool save_archive); | ||
int gpu_id() const { return gpu_id_; } | ||
const std::deque<BatchPlan>& batchplan_archive() const { | ||
return batchplan_archive_; | ||
} | ||
|
||
void RunBatch(BatchPlan plan, Preemption preempt) override; | ||
void DrainBatchPlans(); | ||
void Stop(); | ||
|
||
private: | ||
void OnBatchFinish(const BatchPlan& plan); | ||
void OnTimer(boost::system::error_code ec); | ||
void SetupTimer(); | ||
void SaveBatchPlan(BatchPlan plan); | ||
|
||
boost::asio::io_context& io_context_; | ||
FakeObjectAccessor& accessor_; | ||
int gpu_id_; | ||
bool save_archive_; | ||
boost::asio::system_timer timer_; | ||
std::mutex mutex_; | ||
std::vector<BatchPlan> batchplans_; | ||
std::deque<BatchPlan> batchplan_archive_; | ||
}; | ||
|
||
} // namespace nexus::shepherd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,42 @@ | ||
#include "shepherd/fake_shepherd_frontend.h" | ||
|
||
namespace nexus::shepherd {} // namespace nexus::shepherd | ||
namespace nexus::shepherd { | ||
|
||
FakeShepherdFrontend::FakeShepherdFrontend(int model_id, int slo_ms, | ||
size_t workload_idx, | ||
size_t reserved_size) | ||
: model_id_(model_id), | ||
slo_ms_(slo_ms), | ||
workload_idx_(workload_idx), | ||
reserved_size_(reserved_size) { | ||
queries_.reset(new QueryContext[reserved_size_]); | ||
} | ||
|
||
void FakeShepherdFrontend::MarkQueryDropped(int query_id) { | ||
auto& qctx = queries_[query_id]; | ||
qctx.status = QueryStatus::kDropped; | ||
++cnt_bad_; | ||
} | ||
|
||
void FakeShepherdFrontend::ReceivedQuery(int query_id, | ||
int64_t frontend_recv_ns) { | ||
auto& qctx = queries_[query_id]; | ||
qctx.status = QueryStatus::kPending; | ||
qctx.frontend_recv_ns = frontend_recv_ns; | ||
++cnt_total_; | ||
} | ||
|
||
void FakeShepherdFrontend::GotBatchReply(const BatchPlan& plan) { | ||
for (auto query_id : plan.query_ids) { | ||
auto& qctx = queries_[query_id]; | ||
auto deadline_ns = qctx.frontend_recv_ns + slo_ms_ * 1000 * 1000; | ||
if (plan.finish_at.time_since_epoch().count() < deadline_ns) { | ||
qctx.status = QueryStatus::kSuccess; | ||
} else { | ||
qctx.status = QueryStatus::kTimeout; | ||
++cnt_bad_; | ||
} | ||
} | ||
} | ||
|
||
} // namespace nexus::shepherd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,45 @@ | ||
#pragma once | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <memory> | ||
|
||
#include "shepherd/common.h" | ||
|
||
namespace nexus::shepherd { | ||
|
||
class FakeShepherdFrontend : public FrontendStub { | ||
public: | ||
enum class QueryStatus { | ||
kPending, | ||
kDropped, | ||
kTimeout, | ||
kSuccess, | ||
}; | ||
|
||
struct QueryContext { | ||
QueryStatus status; | ||
int64_t frontend_recv_ns; | ||
}; | ||
|
||
FakeShepherdFrontend(int model_id, int slo_ms, size_t workload_idx, | ||
size_t reserved_size); | ||
const QueryContext* queries() const { return queries_.get(); } | ||
size_t reserved_size() const { return reserved_size_; } | ||
size_t cnt_bad() const { return cnt_bad_; } | ||
size_t cnt_total() const { return cnt_total_; } | ||
|
||
void MarkQueryDropped(int query_id) override; | ||
void ReceivedQuery(int query_id, int64_t frontend_recv_ns); | ||
void GotBatchReply(const BatchPlan& plan); | ||
|
||
private: | ||
int model_id_; | ||
int slo_ms_; | ||
size_t workload_idx_; | ||
size_t reserved_size_; | ||
std::unique_ptr<QueryContext[]> queries_; | ||
size_t cnt_bad_ = 0; | ||
size_t cnt_total_ = 0; | ||
}; | ||
|
||
} // namespace nexus::shepherd |
Oops, something went wrong.