From 4a0e197fb2fcd739c66afd6ce8c32d38a92b5ac3 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 26 Apr 2021 15:48:36 +0300 Subject: [PATCH] Reliable ETA and progress percentage. This has been bugging me for *years*. :) Count of finished edges isn't a great statistic, it isn't really obvious if LLVM will take 8 minues to build, or 10 minutes. But, it's actually pretty straight-forward to get some more useful information. We already know how much time each edge has taken, so we could just do the dumb thing, and assume that every edge in the plan takes the same amount of time. Or, we can do better. `.ninja_log` already contains the historical data on how long each edge took to produce it's outs, so we simply need to ensure that we populate edges with that info, and then we can greatly improve our predictions. The math is pretty simple i think. This is largely a port of a similar change i did to LLVM LIT: https://reviews.llvm.org/D99073 With this, i get something quite lovely: ``` llvm-project/build-Clang12$ NINJA_STATUS="[%f/%t %p %P][%w + %W] " /repositories/ninja/build-Clang-debug/ninja opt [288/2527 11% 4%][00:27 + 08:52] Building CXX object lib/DebugInfo/CodeView/CMakeFiles/LLVMDebugInfoCodeView.dir/AppendingTypeTableBuilder.cpp.o ``` I hope people will find this useful, and it could be merged. --- doc/manual.asciidoc | 6 +- src/build.cc | 22 +++-- src/build_test.cc | 23 ++++- src/graph.h | 4 + src/ninja.cc | 19 ++++ src/status.cc | 206 +++++++++++++++++++++++++++++++++++++++++--- src/status.h | 47 ++++++++-- src/status_test.cc | 5 +- 8 files changed, 294 insertions(+), 38 deletions(-) diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc index 214dca4a57..d33b26a205 100644 --- a/doc/manual.asciidoc +++ b/doc/manual.asciidoc @@ -204,7 +204,11 @@ Several placeholders are available: `%o`:: Overall rate of finished edges per second `%c`:: Current rate of finished edges per second (average over builds specified by `-j` or its default) -`%e`:: Elapsed time in seconds. _(Available since Ninja 1.2.)_ +`%e`:: Elapsed time in seconds. _(Available since Ninja 1.2.)_ +`%E`:: Remaining time (ETA) in seconds. _(Available since Ninja 1.12.0.)_ +`%w`:: Elapsed time in [h:]mm:ss format. _(Available since Ninja 1.12.)_ +`%W`:: Remaining time (ETA) in [h:]mm:ss format. _(Available since Ninja 1.12.0.)_ +`%P`:: The percentage (in ppp% format) of time elapsed out of predicted total runtime. _(Available since Ninja 1.12.0.)_ `%%`:: A plain `%` character. The default progress status is `"[%f/%t] "` (note the trailing space diff --git a/src/build.cc b/src/build.cc index 76ff93af03..dcda6b81fe 100644 --- a/src/build.cc +++ b/src/build.cc @@ -144,8 +144,11 @@ bool Plan::AddSubTarget(const Node* node, const Node* dependent, string* err, void Plan::EdgeWanted(const Edge* edge) { ++wanted_edges_; - if (!edge->is_phony()) + if (!edge->is_phony()) { ++command_edges_; + if (builder_) + builder_->status_->EdgeAddedToPlan(edge); + } } Edge* Plan::FindWork() { @@ -294,8 +297,11 @@ bool Plan::CleanNode(DependencyScan* scan, Node* node, string* err) { want_e->second = kWantNothing; --wanted_edges_; - if (!(*oe)->is_phony()) + if (!(*oe)->is_phony()) { --command_edges_; + if (builder_) + builder_->status_->EdgeRemovedFromPlan(*oe); + } } } } @@ -607,7 +613,6 @@ bool Builder::AlreadyUpToDate() const { bool Builder::Build(string* err) { assert(!AlreadyUpToDate()); - status_->PlanHasTotalEdges(plan_.command_edge_count()); int pending_commands = 0; int failures_allowed = config_.failures_allowed; @@ -780,8 +785,8 @@ bool Builder::FinishCommand(CommandRunner::Result* result, string* err) { end_time_millis = GetTimeMillis() - start_time_millis_; running_edges_.erase(it); - status_->BuildEdgeFinished(edge, end_time_millis, result->success(), - result->output); + status_->BuildEdgeFinished(edge, start_time_millis, end_time_millis, + result->success(), result->output); // The rest of this function only applies to successful commands. if (!result->success()) { @@ -821,10 +826,6 @@ bool Builder::FinishCommand(CommandRunner::Result* result, string* err) { } if (node_cleaned) { record_mtime = edge->command_start_time_; - - // The total number of edges in the plan may have changed as a result - // of a restat. - status_->PlanHasTotalEdges(plan_.command_edge_count()); } } @@ -938,8 +939,5 @@ bool Builder::LoadDyndeps(Node* node, string* err) { if (!plan_.DyndepsLoaded(&scan_, node, ddf, err)) return false; - // New command edges may have been added to the plan. - status_->PlanHasTotalEdges(plan_.command_edge_count()); - return true; } diff --git a/src/build_test.cc b/src/build_test.cc index 3908761057..04bdf4eece 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -2197,11 +2197,28 @@ TEST_F(BuildTest, DepsGccWithEmptyDepfileErrorsOut) { ASSERT_EQ(1u, command_runner_.commands_ran_.size()); } -TEST_F(BuildTest, StatusFormatElapsed) { +TEST_F(BuildTest, StatusFormatElapsed_e) { status_.BuildStarted(); // Before any task is done, the elapsed time must be zero. - EXPECT_EQ("[%/e0.000]", - status_.FormatProgressStatus("[%%/e%e]", 0)); + EXPECT_EQ("[%/e0.000]", status_.FormatProgressStatus("[%%/e%e]", 0)); +} + +TEST_F(BuildTest, StatusFormatElapsed_w) { + status_.BuildStarted(); + // Before any task is done, the elapsed time must be zero. + EXPECT_EQ("[%/e00:00]", status_.FormatProgressStatus("[%%/e%w]", 0)); +} + +TEST_F(BuildTest, StatusFormatETA) { + status_.BuildStarted(); + // Before any task is done, the ETA time must be unknown. + EXPECT_EQ("[%/E?]", status_.FormatProgressStatus("[%%/E%E]", 0)); +} + +TEST_F(BuildTest, StatusFormatTimeProgress) { + status_.BuildStarted(); + // Before any task is done, the percentage of elapsed time must be zero. + EXPECT_EQ("[%/p 0%]", status_.FormatProgressStatus("[%%/p%p]", 0)); } TEST_F(BuildTest, StatusFormatReplacePlaceholder) { diff --git a/src/graph.h b/src/graph.h index d07a9b7639..511438cf0a 100644 --- a/src/graph.h +++ b/src/graph.h @@ -250,6 +250,10 @@ struct Edge { bool is_phony() const; bool use_console() const; bool maybe_phonycycle_diagnostic() const; + + // Historical info: how long did this edge take last time, + // as per .ninja_log, if known? Defaults to -1 if unknown. + int64_t prev_elapsed_time_millis = -1; }; struct EdgeCmp { diff --git a/src/ninja.cc b/src/ninja.cc index 9ae53deb7d..9dd1517c65 100644 --- a/src/ninja.cc +++ b/src/ninja.cc @@ -156,6 +156,10 @@ struct NinjaMain : public BuildLogUser { /// @return true if the manifest was rebuilt. bool RebuildManifest(const char* input_file, string* err, Status* status); + /// For each edge, lookup in build log how long it took last time, + /// and record that in the edge itself. It will be used for ETA predicton. + void ParsePreviousElapsedTimes(); + /// Build the targets listed on the command line. /// @return an exit code. int RunBuild(int argc, char** argv, Status* status); @@ -289,6 +293,19 @@ bool NinjaMain::RebuildManifest(const char* input_file, string* err, return true; } +void NinjaMain::ParsePreviousElapsedTimes() { + for (Edge* edge : state_.edges_) { + for (Node* out : edge->outputs_) { + BuildLog::LogEntry* log_entry = build_log_.LookupByOutput(out->path()); + if (!log_entry) + continue; // Maybe we'll have log entry for next output of this edge? + edge->prev_elapsed_time_millis = + log_entry->end_time - log_entry->start_time; + break; // Onto next edge. + } + } +} + Node* NinjaMain::CollectTarget(const char* cpath, string* err) { string path = cpath; if (path.empty()) { @@ -1588,6 +1605,8 @@ NORETURN void real_main(int argc, char** argv) { exit(1); } + ninja.ParsePreviousElapsedTimes(); + int result = ninja.RunBuild(argc, argv, status); if (g_metrics) ninja.DumpMetrics(); diff --git a/src/status.cc b/src/status.cc index 88b77815b3..d5e71902f7 100644 --- a/src/status.cc +++ b/src/status.cc @@ -14,6 +14,15 @@ #include "status.h" +#ifdef _WIN32 +#include "win32port.h" +#else +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif +#include +#endif + #include #include @@ -27,11 +36,9 @@ using namespace std; StatusPrinter::StatusPrinter(const BuildConfig& config) - : config_(config), - started_edges_(0), finished_edges_(0), total_edges_(0), running_edges_(0), - time_millis_(0), progress_status_format_(NULL), + : config_(config), started_edges_(0), finished_edges_(0), total_edges_(0), + running_edges_(0), progress_status_format_(NULL), current_rate_(config.parallelism) { - // Don't do anything fancy in verbose mode. if (config_.verbosity != BuildConfig::NORMAL) printer_.set_smart_terminal(false); @@ -41,8 +48,32 @@ StatusPrinter::StatusPrinter(const BuildConfig& config) progress_status_format_ = "[%f/%t] "; } -void StatusPrinter::PlanHasTotalEdges(int total) { - total_edges_ = total; +void StatusPrinter::EdgeAddedToPlan(const Edge* edge) { + ++total_edges_; + + // Do we know how long did this edge take last time? + if (edge->prev_elapsed_time_millis != -1) { + ++eta_predictable_edges_total_; + ++eta_predictable_edges_remaining_; + eta_predictable_cpu_time_total_millis_ += edge->prev_elapsed_time_millis; + eta_predictable_cpu_time_remaining_millis_ += + edge->prev_elapsed_time_millis; + } else + ++eta_unpredictable_edges_remaining_; +} + +void StatusPrinter::EdgeRemovedFromPlan(const Edge* edge) { + --total_edges_; + + // Do we know how long did this edge take last time? + if (edge->prev_elapsed_time_millis != -1) { + --eta_predictable_edges_total_; + --eta_predictable_edges_remaining_; + eta_predictable_cpu_time_total_millis_ -= edge->prev_elapsed_time_millis; + eta_predictable_cpu_time_remaining_millis_ -= + edge->prev_elapsed_time_millis; + } else + --eta_unpredictable_edges_remaining_; } void StatusPrinter::BuildEdgeStarted(const Edge* edge, @@ -58,11 +89,102 @@ void StatusPrinter::BuildEdgeStarted(const Edge* edge, printer_.SetConsoleLocked(true); } -void StatusPrinter::BuildEdgeFinished(Edge* edge, int64_t end_time_millis, - bool success, const string& output) { +void StatusPrinter::RecalculateProgressPrediction() { + time_predicted_percentage_ = 0.0; + + // Sometimes, the previous and actual times may be wildly different. + // For example, the previous build may have been fully recovered from ccache, + // so it was blazing fast, while the new build no longer gets hits from ccache + // for whatever reason, so it actually compiles code, which takes much longer. + // We should detect such cases, and avoid using "wrong" previous times. + + // Note that we will only use the previous times if there are edges with + // previous time knowledge remaining. + bool use_previous_times = eta_predictable_edges_remaining_ && + eta_predictable_cpu_time_remaining_millis_; + + // Iff we have sufficient statistical information for the current run, + // that is, if we have took at least 15 sec AND finished at least 5% of edges, + // we can check whether our performance so far matches the previous one. + if (use_previous_times && total_edges_ && finished_edges_ && + (time_millis_ >= 15 * 1e3) && + (((double)finished_edges_ / total_edges_) >= 0.05)) { + // Over the edges we've just run, how long did they take on average? + double actual_average_cpu_time_millis = + (double)cpu_time_millis_ / finished_edges_; + // What is the previous average, for the edges with such knowledge? + double previous_average_cpu_time_millis = + (double)eta_predictable_cpu_time_total_millis_ / + eta_predictable_edges_total_; + + double ratio = std::max(previous_average_cpu_time_millis, + actual_average_cpu_time_millis) / + std::min(previous_average_cpu_time_millis, + actual_average_cpu_time_millis); + + // Let's say that the average times should differ by less than 10x + use_previous_times = ratio < 10; + } + + int edges_with_known_runtime = finished_edges_; + if (use_previous_times) + edges_with_known_runtime += eta_predictable_edges_remaining_; + if (edges_with_known_runtime == 0) + return; + + int edges_with_unknown_runtime = use_previous_times + ? eta_unpredictable_edges_remaining_ + : (total_edges_ - finished_edges_); + + // Given the time elapsed on the edges we've just run, + // and the runtime of the edges for which we know previous runtime, + // what's the edge's average runtime? + int64_t edges_known_runtime_total_millis = cpu_time_millis_; + if (use_previous_times) + edges_known_runtime_total_millis += + eta_predictable_cpu_time_remaining_millis_; + + double average_cpu_time_millis = + (double)edges_known_runtime_total_millis / edges_with_known_runtime; + + // For the edges for which we do not have the previous runtime, + // let's assume that their average runtime is the same as for the other edges, + // and we therefore can predict their remaining runtime. + double unpredictable_cpu_time_remaining_millis = + average_cpu_time_millis * edges_with_unknown_runtime; + + // And therefore we can predict the remaining and total runtimes. + double total_cpu_time_remaining_millis = + unpredictable_cpu_time_remaining_millis; + if (use_previous_times) + total_cpu_time_remaining_millis += + eta_predictable_cpu_time_remaining_millis_; + double total_cpu_time_millis = + cpu_time_millis_ + total_cpu_time_remaining_millis; + if (total_cpu_time_millis == 0.0) + return; + + // After that we can tell how much work we've completed, in time units. + time_predicted_percentage_ = cpu_time_millis_ / total_cpu_time_millis; +} + +void StatusPrinter::BuildEdgeFinished(Edge* edge, int64_t start_time_millis, + int64_t end_time_millis, bool success, + const string& output) { time_millis_ = end_time_millis; ++finished_edges_; + int64_t elapsed = end_time_millis - start_time_millis; + cpu_time_millis_ += elapsed; + + // Do we know how long did this edge take last time? + if (edge->prev_elapsed_time_millis != -1) { + --eta_predictable_edges_remaining_; + eta_predictable_cpu_time_remaining_millis_ -= + edge->prev_elapsed_time_millis; + } else + --eta_unpredictable_edges_remaining_; + if (edge->use_console()) printer_.SetConsoleLocked(false); @@ -201,16 +323,74 @@ string StatusPrinter::FormatProgressStatus(const char* progress_status_format, out += buf; break; - // Percentage + // Percentage of edges completed case 'p': { - int percent = (100 * finished_edges_) / total_edges_; + int percent = 0; + if (finished_edges_ != 0 && total_edges_ != 0) + percent = (100 * finished_edges_) / total_edges_; snprintf(buf, sizeof(buf), "%3i%%", percent); out += buf; break; } - case 'e': { - snprintf(buf, sizeof(buf), "%.3f", time_millis_ / 1e3); +#define FORMAT_TIME_HMMSS(t) \ + "%" PRId64 ":%02" PRId64 ":%02" PRId64 "", (t) / 3600, ((t) % 3600) / 60, \ + (t) % 60 +#define FORMAT_TIME_MMSS(t) "%02" PRId64 ":%02" PRId64 "", (t) / 60, (t) % 60 + + // Wall time + case 'e': // elapsed, seconds + case 'w': // elapsed, human-readable + case 'E': // ETA, seconds + case 'W': // ETA, human-readable + { + // Acquisition. + double sec = -1; + switch (*s) { + case 'e': // elapsed, seconds + case 'w': // elapsed, human-readable + sec = time_millis_ / 1e3; + break; + case 'E': // ETA, seconds + case 'W': // ETA, human-readable + if (time_predicted_percentage_ == 0.0) + sec = -1; // To be printed as "?". + else { + // So, we know that we've spent time_millis_ wall clock, + // and that is time_predicted_percentage_ percent. + // How much time will we need to complete 100%? + double total_wall_time = time_millis_ / time_predicted_percentage_; + // Naturally, that gives us the time remaining. + sec = (total_wall_time - time_millis_) / 1e3; + } + break; + } + // Display, + if (sec < 0) + snprintf(buf, sizeof(buf), "?"); + else { + switch (*s) { + case 'e': // elapsed, seconds + case 'E': // ETA, seconds + snprintf(buf, sizeof(buf), "%.3f", sec); + break; + case 'w': // elapsed, human-readable + case 'W': // ETA, human-readable + if (sec >= 60 * 60) + snprintf(buf, sizeof(buf), FORMAT_TIME_HMMSS((int64_t)sec)); + else + snprintf(buf, sizeof(buf), FORMAT_TIME_MMSS((int64_t)sec)); + break; + } + } + out += buf; + break; + } + + // Percentage of time spent out of the predicted time total + case 'P': { + snprintf(buf, sizeof(buf), "%3i%%", + (int)(100. * time_predicted_percentage_)); out += buf; break; } @@ -232,6 +412,8 @@ void StatusPrinter::PrintStatus(const Edge* edge, int64_t time_millis) { || config_.verbosity == BuildConfig::NO_STATUS_UPDATE) return; + RecalculateProgressPrediction(); + bool force_full_command = config_.verbosity == BuildConfig::VERBOSE; string to_print = edge->GetBinding("description"); diff --git a/src/status.h b/src/status.h index b2e50ea1aa..a1a8fddeae 100644 --- a/src/status.h +++ b/src/status.h @@ -24,10 +24,13 @@ /// Abstract interface to object that tracks the status of a build: /// completion fraction, printing updates. struct Status { - virtual void PlanHasTotalEdges(int total) = 0; - virtual void BuildEdgeStarted(const Edge* edge, int64_t start_time_millis) = 0; - virtual void BuildEdgeFinished(Edge* edge, int64_t end_time_millis, - bool success, const std::string& output) = 0; + virtual void EdgeAddedToPlan(const Edge* edge) = 0; + virtual void EdgeRemovedFromPlan(const Edge* edge) = 0; + virtual void BuildEdgeStarted(const Edge* edge, + int64_t start_time_millis) = 0; + virtual void BuildEdgeFinished(Edge* edge, int64_t start_time_millis, + int64_t end_time_millis, bool success, + const std::string& output) = 0; virtual void BuildLoadDyndeps() = 0; virtual void BuildStarted() = 0; virtual void BuildFinished() = 0; @@ -43,10 +46,15 @@ struct Status { /// human-readable strings to stdout struct StatusPrinter : Status { explicit StatusPrinter(const BuildConfig& config); - virtual void PlanHasTotalEdges(int total); + + /// Callbacks for the Plan to notify us about adding/removing Edge's. + virtual void EdgeAddedToPlan(const Edge* edge); + virtual void EdgeRemovedFromPlan(const Edge* edge); + virtual void BuildEdgeStarted(const Edge* edge, int64_t start_time_millis); - virtual void BuildEdgeFinished(Edge* edge, int64_t end_time_millis, - bool success, const std::string& output); + virtual void BuildEdgeFinished(Edge* edge, int64_t start_time_millis, + int64_t end_time_millis, bool success, + const std::string& output); virtual void BuildLoadDyndeps(); virtual void BuildStarted(); virtual void BuildFinished(); @@ -71,7 +79,30 @@ struct StatusPrinter : Status { const BuildConfig& config_; int started_edges_, finished_edges_, total_edges_, running_edges_; - int64_t time_millis_; + + /// How much wall clock elapsed so far? + int64_t time_millis_ = 0; + + /// How much cpu clock elapsed so far? + int64_t cpu_time_millis_ = 0; + + /// What percentage of predicted total time have elapsed already? + double time_predicted_percentage_ = 0.0; + + /// Out of all the edges, for how many do we know previous time? + int eta_predictable_edges_total_ = 0; + /// And how much time did they all take? + int64_t eta_predictable_cpu_time_total_millis_ = 0; + + /// Out of all the non-finished edges, for how many do we know previous time? + int eta_predictable_edges_remaining_ = 0; + /// And how much time will they all take? + int64_t eta_predictable_cpu_time_remaining_millis_ = 0; + + /// For how many edges we don't know the previous run time? + int eta_unpredictable_edges_remaining_ = 0; + + void RecalculateProgressPrediction(); /// Prints progress output. LinePrinter printer_; diff --git a/src/status_test.cc b/src/status_test.cc index 6e42490ab1..411d3ed03d 100644 --- a/src/status_test.cc +++ b/src/status_test.cc @@ -22,8 +22,9 @@ TEST(StatusTest, StatusFormatElapsed) { status.BuildStarted(); // Before any task is done, the elapsed time must be zero. - EXPECT_EQ("[%/e0.000]", - status.FormatProgressStatus("[%%/e%e]", 0)); + EXPECT_EQ("[%/e0.000]", status.FormatProgressStatus("[%%/e%e]", 0)); + // Before any task is done, the elapsed time must be zero. + EXPECT_EQ("[%/e00:00]", status.FormatProgressStatus("[%%/e%w]", 0)); } TEST(StatusTest, StatusFormatReplacePlaceholder) {