Skip to content

Commit

Permalink
i#5843 scheduler: Add better-diagnostic test and launcher output (#6273)
Browse files Browse the repository at this point in the history
Adds printing '.' for every record and '-' for waiting to the scheduler
unit tests and udpates all the expected output. This makes it much
easier to understand some of the results as now the lockstep timing all
lines up.

Adds -print_every to the launcher and switches to printing letters for a
better output of what happened on each core (if #inputs<=26). Example:
```
$ clients/bin64/scheduler_launcher -trace_dir drmemtrace.*.dir/trace -num_cores 4 -sched_quantum 60000 -print_every 5000
Core #0: GGGGGGGGG,HH,F,B,G,I,A,CC,G,BB,A,FF,AA,GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
Core #1: D,C,D,B,H,FF,EE,CC,II,AA,C,D,G,HH,D,G,II,G,I,G,I,G,HH,BB,II,BB,C,H,I,AA,C,F,I,H,II,AA,C,H,A,H,F,CC,DD,C,BB,HH,CC,F,BB,C,D,H,BB,D,B,EE,I,E,DD,B,F,H,A,D,C,D,E,B,D,I,D,AA,E,DD,EE,CC,II,C,D,I,AA,DD,B,E,I,D,C,E,FF,E,BB,EE,FF,E,AA,D,E,DD,H,BB,HH,D,H,BB,I,AA,II,H,A,FF,H,I,HH,DD,I,H,F,DD,I,A,HH,AA,CC,BB,CC,BB,D,B,FF,H,F,D,I,DD,FF,C,A,C,AA,F,AA,EE,A,D,E,FF,AA,F,A,E,A,E,DD,EE,F,E,F,A
Core #2: F,E,F,C,F,H,I,B,HH,II,FF,CC,G,H,DD,E,A,G,H,G,DD,G,F,D,A,H,I,FF,H,C,A,CC,II,A,FF,C,I,F,CC,B,FF,C,B,H,CC,B,D,B,DD,B,F,I,F,II,D,A,DD,I,D,H,E,H,I,D,HH,FF,BB,II,AA,EE,B,A,BB,E,II,A,BB,A,HH,E,AA,E,F,A,DD,HH,F,H,A,E,I,FF,I,B,F,II,A,FF,D,H,DD,I,AA,F,D,FF,AA,D,A,HH,A,H,F,A,FF,C,F,B,F,C,F,AA,B,FF,D,F,DD,B,C,H,CC,B,C,E,D,EE,C,E,D,EE,F,DD,E,F,D,A,DD,E,D,EE,D,E,D,AA,D,A,DD,F,D,C,D
Core #3: E,A,F,A,D,I,DD,BB,AA,BB,DD,G,EE,AA,H,G,D,B,G,B,G,II,F,HH,B,AA,I,B,A,HH,CC,HH,F,A,FF,C,HH,BB,F,D,F,C,FF,H,C,FF,DD,AA,I,B,II,AA,I,A,B,A,F,A,C,I,B,H,A,F,C,A,C,EE,F,D,EE,CC,E,BB,E,DD,E,CC,B,EE,C,EE,B,I,E,D,E,II,H,B,EE,I,EE,B,II,F,EE,A,D,AA,DD,HH,F,A,F,HH,D,A,II,H,F,II,FF,CC,B,AA,F,A,C,FF,D,C,D,CC,B,C,DD,H,I,F,CC,A,F,C,FF,E,A,DD,E,D,A,FF,AA,EE,F,DD,FF,E,F,EE,FF,AA,EEEEEEEEEEEEEEEE
```

Issue: #5843
  • Loading branch information
derekbruening authored Aug 21, 2023
1 parent 1b697e5 commit f316ed1
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 47 deletions.
49 changes: 35 additions & 14 deletions clients/drcachesim/tests/scheduler_launcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ droption_t<std::string>
"Path with stored as-traced schedule for replay.");
#endif

droption_t<uint64_t> op_print_every(DROPTION_SCOPE_ALL, "print_every", 5000,
"A letter is printed every N instrs",
"A letter is printed every N instrs");

uint64_t
get_current_microseconds()
{
Expand All @@ -134,12 +138,23 @@ get_current_microseconds()
#endif
}

// Processes the stream of records scheduled on the "ordinal"-th virtual core with
// output stream "steram" and scheduler "scheduler".
// Returns in "thread_sequence" a representation of which inputs ran and for
// how long on the core:
// - The letter 'A' plus the input ordinal % 26 represents that input. A letter
// is printed on each context switch and additionally after each -print_every
// instructions.
// - A comma represents a context switch to a new input.
// - A '-' represents a wait where no input was ready or a scheduler-encorced
// dependence is not yet met.
void
simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &scheduler,
std::vector<scheduler_t::input_ordinal_t> &thread_sequence)
std::string &thread_sequence)
{
memref_t record;
uint64_t micros = op_sched_time.get_value() ? get_current_microseconds() : 0;
uint64_t cur_segment_instrs = 0;
// Thread ids can be duplicated, so use the input ordinals to distinguish.
scheduler_t::input_ordinal_t prev_input = scheduler_t::INVALID_INPUT_ORDINAL;
for (scheduler_t::stream_status_t status = stream->next_record(record, micros);
Expand All @@ -148,12 +163,13 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
if (op_sched_time.get_value())
micros = get_current_microseconds();
if (status == scheduler_t::STATUS_WAIT) {
thread_sequence += '-';
std::this_thread::yield();
continue;
}
if (status != scheduler_t::STATUS_OK)
FATAL_ERROR("scheduler failed to advance: %d", status);
if (op_verbose.get_value() >= 3) {
if (op_verbose.get_value() >= 4) {
std::ostringstream line;
line << "Core #" << std::setw(2) << ordinal << " @" << std::setw(9)
<< stream->get_record_ordinal() << " refs, " << std::setw(9)
Expand All @@ -177,11 +193,13 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
std::cerr << line.str();
}
scheduler_t::input_ordinal_t input = stream->get_input_stream_ordinal();
if (thread_sequence.empty())
thread_sequence.push_back(input);
else if (stream->get_input_stream_ordinal() != prev_input) {
thread_sequence.push_back(input);
if (op_verbose.get_value() > 0) {
if (input != prev_input) {
// We convert to letters which only works well for <=26 inputs.
if (!thread_sequence.empty())
thread_sequence += ',';
thread_sequence += 'A' + static_cast<char>(input % 26);
cur_segment_instrs = 0;
if (op_verbose.get_value() >= 2) {
std::ostringstream line;
line
<< "Core #" << std::setw(2) << ordinal << " @" << std::setw(9)
Expand All @@ -205,8 +223,15 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
<< " == thread " << record.instr.tid << "\n";
std::cerr << line.str();
}
prev_input = input;
}
if (type_is_instr(record.instr.type)) {
++cur_segment_instrs;
if (cur_segment_instrs == op_print_every.get_value()) {
thread_sequence += 'A' + static_cast<char>(input % 26);
cur_segment_instrs = 0;
}
}
prev_input = input;
}
}

Expand Down Expand Up @@ -268,8 +293,7 @@ _tmain(int argc, const TCHAR *targv[])
}

std::vector<std::thread> threads;
std::vector<std::vector<scheduler_t::input_ordinal_t>> schedules(
op_num_cores.get_value());
std::vector<std::string> schedules(op_num_cores.get_value());
std::cerr << "Creating " << op_num_cores.get_value() << " simulator threads\n";
threads.reserve(op_num_cores.get_value());
for (int i = 0; i < op_num_cores.get_value(); ++i) {
Expand All @@ -280,10 +304,7 @@ _tmain(int argc, const TCHAR *targv[])
thread.join();

for (int i = 0; i < op_num_cores.get_value(); ++i) {
std::cerr << "Core #" << i << ": ";
for (scheduler_t::input_ordinal_t input : schedules[i])
std::cerr << input << " ";
std::cerr << "\n";
std::cerr << "Core #" << i << ": " << schedules[i] << "\n";
}

#ifdef HAS_ZIP
Expand Down
101 changes: 68 additions & 33 deletions clients/drcachesim/tests/scheduler_unit_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -947,15 +947,37 @@ run_lockstep_simulation(scheduler_t &scheduler, int num_outputs, memref_tid_t ti
eof[i] = true;
continue;
}
if (status == scheduler_t::STATUS_WAIT)
if (status == scheduler_t::STATUS_WAIT) {
sched_as_string[i] += '-';
continue;
}
assert(status == scheduler_t::STATUS_OK);
if (type_is_instr(memref.instr.type)) {
sched_as_string[i] +=
'A' + static_cast<char>(memref.instr.tid - tid_base);
} else {
// While this makes the string longer, it is just too confusing
// with the same letter seemingly on 2 cores at once without these
// fillers to line everything up in time.
sched_as_string[i] += '.';
}
}
}
// Ensure we never see the same output on multiple cores in the same timestep.
size_t max_size = 0;
for (int i = 0; i < num_outputs; ++i)
max_size = std::max(max_size, sched_as_string[i].size());
for (int step = 0; step < static_cast<int>(max_size); ++step) {
std::set<char> inputs;
for (int out = 0; out < num_outputs; ++out) {
if (static_cast<int>(sched_as_string[out].size()) <= step)
continue;
if (sched_as_string[out][step] < 'A' || sched_as_string[out][step] > 'Z')
continue;
assert(inputs.find(sched_as_string[out][step]) == inputs.end());
inputs.insert(sched_as_string[out][step]);
}
}
return sched_as_string;
}

Expand All @@ -980,8 +1002,9 @@ test_synthetic()
// Hardcoding here for the 2 outputs and 7 inputs.
// We expect 3 letter sequences (our quantum) alternating every-other as each
// core alternates; with an odd number the 2nd core finishes early.
static const char *const CORE0_SCHED_STRING = "AAACCCEEEGGGBBBDDDFFFAAACCCEEEGGG";
static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBBDDDFFF";
// The dots are thread exits.
static const char *const CORE0_SCHED_STRING = "AAACCCEEEGGGBBBDDDFFFAAA.CCC.EEE.GGG.";
static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBB.DDD.FFF.";
{
// Test instruction quanta.
std::vector<scheduler_t::input_workload_t> sched_inputs;
Expand Down Expand Up @@ -1090,7 +1113,11 @@ test_synthetic_time_quanta()
trace_type_t expect_type = TRACE_TYPE_READ) {
memref_t memref;
scheduler_t::stream_status_t status = stream->next_record(memref, time);
assert(status == expect_status);
if (status != expect_status) {
std::cerr << "Expected status " << expect_status << " != " << status
<< "\n";
assert(false);
}
if (status == scheduler_t::STATUS_OK) {
if (memref.marker.tid != expect_tid) {
std::cerr << "Expected tid " << expect_tid
Expand Down Expand Up @@ -1161,8 +1188,8 @@ test_synthetic_time_quanta()
for (int i = 0; i < NUM_OUTPUTS; i++) {
std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
}
assert(sched_as_string[0] == "ACCC");
assert(sched_as_string[1] == "BAABB");
assert(sched_as_string[0] == "..A..CCC.");
assert(sched_as_string[1] == "..BAA.BB.");
}
#endif
}
Expand Down Expand Up @@ -1240,8 +1267,10 @@ test_synthetic_with_timestamps()
// workloads we should start with {C,F,I,J} and then move on to {B,E,H} and finish
// with {A,D,G}. We should interleave within each group -- except once we reach J
// we should completely finish it.
assert(sched_as_string[0] == "CCCIIICCCFFFIIIFFFBBBHHHEEEBBBHHHDDDAAAGGGDDD");
assert(sched_as_string[1] == "FFFJJJJJJJJJCCCIIIEEEBBBHHHEEEAAAGGGDDDAAAGGG");
assert(sched_as_string[0] ==
".CC.C.II.IC.CC.F.FF.I.II.FF.F..BB.B.HH.HE.EE.BB.B.HH.H..DD.DA.AA.G.GG.DD.D.");
assert(sched_as_string[1] ==
".FF.F.JJ.JJ.JJ.JJ.J.CC.C.II.I..EE.EB.BB.H.HH.EE.E..AA.A.GG.GD.DD.AA.A.GG.G.");
}

static void
Expand Down Expand Up @@ -1321,8 +1350,10 @@ test_synthetic_with_priorities()
// See the test_synthetic_with_timestamps() test which has our base sequence.
// We've elevated B, E, and H to higher priorities so they go
// first. J remains uninterrupted due to lower timestamps.
assert(sched_as_string[0] == "BBBHHHEEEBBBHHHFFFJJJJJJJJJCCCIIIDDDAAAGGGDDD");
assert(sched_as_string[1] == "EEEBBBHHHEEECCCIIICCCFFFIIIFFFAAAGGGDDDAAAGGG");
assert(sched_as_string[0] ==
".BB.B.HH.HE.EE.BB.B.HH.H..FF.F.JJ.JJ.JJ.JJ.J.CC.C.II.I..DD.DA.AA.G.GG.DD.D.");
assert(sched_as_string[1] ==
".EE.EB.BB.H.HH.EE.E..CC.C.II.IC.CC.F.FF.I.II.FF.F..AA.A.GG.GD.DD.AA.A.GG.G.");
}

static void
Expand Down Expand Up @@ -1386,11 +1417,11 @@ test_synthetic_with_bindings()
std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
}
// We have {A,B,C} on {2,4}, {D,E,F} on {0,1}, and {G,H,I} on {1,2,3}:
assert(sched_as_string[0] == "DDDFFFDDDFFFDDDFFF");
assert(sched_as_string[1] == "EEEHHHEEEIIIEEE");
assert(sched_as_string[2] == "AAACCCGGGCCCHHHCCC");
assert(sched_as_string[3] == "GGGIIIHHHGGGIII");
assert(sched_as_string[4] == "BBBAAABBBAAABBB");
assert(sched_as_string[0] == ".DD.D.FF.FD.DD.F.FF.DD.D.FF.F.");
assert(sched_as_string[1] == ".EE.E.HH.HE.EE.I.II.EE.E.");
assert(sched_as_string[2] == ".AA.A.CC.CG.GG.C.CC.HH.H.CC.C.");
assert(sched_as_string[3] == ".GG.G.II.IH.HH.GG.G.II.I.");
assert(sched_as_string[4] == ".BB.BA.AA.B.BB.AA.A.BB.B.");
}

static void
Expand Down Expand Up @@ -1457,11 +1488,11 @@ test_synthetic_with_bindings_weighted()
std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
}
// We have {A,B,C} on {2,4}, {D,E,F} on {0,1}, and {G,H,I} on {1,2,3}:
assert(sched_as_string[0] == "FFFFFFFFFEEEEEEEEE");
assert(sched_as_string[1] == "IIIIIIIIIDDDDDDDDD");
assert(sched_as_string[2] == "CCCCCCCCCAAAAAAAAA");
assert(sched_as_string[3] == "HHHHHHHHHGGGGGGGGG");
assert(sched_as_string[4] == "BBBBBBBBB");
assert(sched_as_string[0] == ".FF.FF.FF.FF.F..EE.EE.EE.EE.E.");
assert(sched_as_string[1] == ".II.II.II.II.I..DD.DD.DD.DD.D.");
assert(sched_as_string[2] == ".CC.CC.CC.CC.C..AA.AA.AA.AA.A.");
assert(sched_as_string[3] == ".HH.HH.HH.HH.H..GG.GG.GG.GG.G.");
assert(sched_as_string[4] == ".BB.BB.BB.BB.B.");
}

static void
Expand Down Expand Up @@ -1548,10 +1579,12 @@ test_synthetic_with_syscalls_multiple()
// quantum.) Furthermore, B isn't finished by the time E and H are done and we see
// the lower-priority C and F getting scheduled while B is in a wait state for its
// blocking syscall.
// Note that the 3rd B is not really on the two cores at the same time as there are
// markers and other records that cause them to not actually line up.
assert(sched_as_string[0] == "BHHHBHHHBHHHBEBIIIJJJJJJJJJCCCIIIDDDAAAGGGDDD");
assert(sched_as_string[1] == "EEBEEBEEBEECCCFFFBCCCFFFIIIFFFAAAGGGDDDAAAGGG");
assert(sched_as_string[0] ==
".B..HH.H.B.H.HH..B.HH.H..B.E.B...II.I.JJ.JJ.JJ.JJ.J.CC.C.II.I..DD.DA.AA.G.GG."
"DD.D.");
assert(sched_as_string[1] ==
".EE..B..EE..B..EE..B..EE...CC.C.FF.FB..C.CC.F.FF.I.II.FF.F..AA.A.GG.GD.DD.AA."
"A.GG.G.");
}

static void
Expand Down Expand Up @@ -1613,7 +1646,7 @@ test_synthetic_with_syscalls_single()
for (int i = 0; i < NUM_OUTPUTS; i++) {
std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
}
assert(sched_as_string[0] == "AAAAAAAAA");
assert(sched_as_string[0] == ".AA..AA.A.A.AA..A.");
assert(sched_as_string[1] == "");
}

Expand Down Expand Up @@ -1695,6 +1728,8 @@ test_synthetic_with_syscalls_precise()
std::vector<memref_t> refs;
for (scheduler_t::stream_status_t status = stream->next_record(memref);
status != scheduler_t::STATUS_EOF; status = stream->next_record(memref)) {
if (status == scheduler_t::STATUS_WAIT)
continue;
assert(status == scheduler_t::STATUS_OK);
refs.push_back(memref);
}
Expand Down Expand Up @@ -1955,8 +1990,8 @@ test_replay()
// For our 2 outputs and 7 inputs:
// We expect 3 letter sequences (our quantum) alternating every-other as each
// core alternates; with an odd number the 2nd core finishes early.
static const char *const CORE0_SCHED_STRING = "AAACCCEEEGGGBBBDDDFFFAAACCCEEEGGG";
static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBBDDDFFF";
static const char *const CORE0_SCHED_STRING = "AAACCCEEEGGGBBBDDDFFFAAA.CCC.EEE.GGG.";
static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBB.DDD.FFF.";

static constexpr memref_tid_t TID_BASE = 100;
std::vector<trace_entry_t> inputs[NUM_INPUTS];
Expand Down Expand Up @@ -2245,8 +2280,8 @@ test_replay_timestamps()

// Create a record file with timestamps requiring waiting.
// We cooperate with the test_scheduler_t class which constructs this schedule:
static const char *const CORE0_SCHED_STRING = "AAACCC";
static const char *const CORE1_SCHED_STRING = "BBBCCCCCCDDDAAABBBDDDAAABBBDDD";
static const char *const CORE0_SCHED_STRING = ".AAA-------------------------CCC.";
static const char *const CORE1_SCHED_STRING = ".BBB.CCCCCC.DDDAAABBBDDDAAA.BBB.DDD.";
std::string record_fname = "tmp_test_replay_timestamp.zip";
test_scheduler_t test_scheduler;
test_scheduler.write_test_schedule(record_fname);
Expand Down Expand Up @@ -2456,8 +2491,8 @@ test_replay_as_traced()

// Synthesize a cpu-schedule file.
std::string cpu_fname = "tmp_test_cpu_as_traced.zip";
static const char *const CORE0_SCHED_STRING = "EEEAAACCCAAACCCBBBDDD";
static const char *const CORE1_SCHED_STRING = "EEEBBBDDDBBBDDDAAACCC";
static const char *const CORE0_SCHED_STRING = "EEE-AAA-CCCAAACCCBBB.DDD.";
static const char *const CORE1_SCHED_STRING = "---EEE.BBBDDDBBBDDDAAA.CCC.";
{
std::vector<schedule_entry_t> sched0;
sched0.emplace_back(TID_BASE + 4, 10, CPU0, 0);
Expand Down Expand Up @@ -2797,8 +2832,8 @@ test_inactive()
for (int i = 0; i < NUM_OUTPUTS; i++) {
std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
}
assert(sched_as_string[0] == "AABBA");
assert(sched_as_string[1] == "BB");
assert(sched_as_string[0] == "..AABBA.");
assert(sched_as_string[1] == "..B---B.");
}
#endif // HAS_ZIP
}
Expand Down

0 comments on commit f316ed1

Please sign in to comment.