Skip to content

Commit

Permalink
Merge branch 'master' into i6662-public-record-filter
Browse files Browse the repository at this point in the history
  • Loading branch information
edeiana authored May 6, 2024
2 parents bc7e4a0 + 3e1ec2f commit 82a113b
Show file tree
Hide file tree
Showing 9 changed files with 752 additions and 81 deletions.
334 changes: 322 additions & 12 deletions api/docs/scatter_gather_emulation.dox

Large diffs are not rendered by default.

41 changes: 29 additions & 12 deletions clients/drcachesim/analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,26 +571,34 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_serial(analyzer_worker_data_t &
template <typename RecordType, typename ReaderType>
bool
analyzer_tmpl_t<RecordType, ReaderType>::process_shard_exit(
analyzer_worker_data_t *worker, int shard_index)
analyzer_worker_data_t *worker, int shard_index, bool do_process_final_interval)
{
VPRINT(this, 1, "Worker %d finished trace shard %s\n", worker->index,
worker->stream->get_stream_name().c_str());
worker->shard_data[shard_index].exited = true;
if ((interval_microseconds_ != 0 || interval_instr_count_ != 0) &&
(!process_interval(worker->shard_data[shard_index].cur_interval_index,
worker->shard_data[shard_index].cur_interval_init_instr_count,
worker,
/*parallel=*/true, /*at_instr_record=*/false, shard_index) ||
!finalize_interval_snapshots(worker, /*parallel=*/true, shard_index)))
return false;
if (interval_microseconds_ != 0 || interval_instr_count_ != 0) {
if (!do_process_final_interval) {
ERRMSG("i#6793: Skipping process_interval for final interval of shard index "
"%d\n",
shard_index);
} else if (!process_interval(
worker->shard_data[shard_index].cur_interval_index,
worker->shard_data[shard_index].cur_interval_init_instr_count,
worker,
/*parallel=*/true, /*at_instr_record=*/false, shard_index)) {
return false;
}
if (!finalize_interval_snapshots(worker, /*parallel=*/true, shard_index)) {
return false;
}
}
for (int i = 0; i < num_tools_; ++i) {
if (!tools_[i]->parallel_shard_exit(
worker->shard_data[shard_index].tool_data[i].shard_data)) {
worker->error = tools_[i]->parallel_shard_error(
worker->shard_data[shard_index].tool_data[i].shard_data);
VPRINT(this, 1, "Worker %d hit shard exit error %s on trace shard %s\n",
worker->index, worker->error.c_str(),
worker->stream->get_stream_name().c_str());
VPRINT(this, 1, "Worker %d hit shard exit error %s on trace shard index %d\n",
worker->index, worker->error.c_str(), shard_index);
return false;
}
}
Expand Down Expand Up @@ -693,9 +701,18 @@ analyzer_tmpl_t<RecordType, ReaderType>::process_tasks_internal(
}
}
}
// i#6444: Fallback for cases where there is a missing thread final record in
// non-core-sharded traces, in which case we have not yet invoked
// process_shard_exit.
for (const auto &keyval : worker->shard_data) {
if (!keyval.second.exited) {
if (!process_shard_exit(worker, keyval.second.shard_index)) {
// i#6793: We skip processing the final interval for shards exited here
// if the stream has already moved on and cannot provide the state for
// the shard anymore.
bool do_process_final_interval =
keyval.second.shard_index == worker->stream->get_shard_index();
if (!process_shard_exit(worker, keyval.second.shard_index,
do_process_final_interval)) {
return false;
}
}
Expand Down
3 changes: 2 additions & 1 deletion clients/drcachesim/analyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
// Helper for process_tasks() which calls parallel_shard_exit() in each tool.
// Returns false if there was an error and the caller should return early.
bool
process_shard_exit(analyzer_worker_data_t *worker, int shard_index);
process_shard_exit(analyzer_worker_data_t *worker, int shard_index,
bool do_process_final_interval = true);

bool
record_has_tid(RecordType record, memref_tid_t &tid);
Expand Down
3 changes: 2 additions & 1 deletion clients/drcachesim/tests/core_serial.templatex
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Schedule stats tool results:
Total counts:
4 cores
8 threads
8 threads: 1257.*
638938 instructions
6 total context switches
0.0093906 CSPKI \(context switches per 1000 instructions\)
Expand All @@ -17,6 +17,7 @@ Total counts:
345686 idles
64.89% cpu busy by record count
[0-9 ]* cpu microseconds
[0-9 ]* wait microseconds
[0-9 ]* idle microseconds
[0-9 ]* idle microseconds at last instr
[0-9\. ]*% cpu busy by time
Expand Down
15 changes: 10 additions & 5 deletions clients/drcachesim/tests/schedule_stats_nopreempt.templatex
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Schedule stats tool results:
Total counts:
4 cores
8 threads
8 threads: 1257.*
638938 instructions
6 total context switches
0\.0093906 CSPKI \(context switches per 1000 instructions\)
Expand All @@ -17,12 +17,13 @@ Total counts:
*[0-9]* idles
*[0-9\.]*% cpu busy by record count
*[0-9]* cpu microseconds
*[0-9]* wait microseconds
*[0-9]* idle microseconds
*[0-9]* idle microseconds at last instr
*[0-9\.]*% cpu busy by time
*[0-9\.]*% cpu busy by time, ignoring idle past last instr
Core #0 counts:
. threads
. threads: 1257.*
*[0-9]* instructions
. total context switches
0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\)
Expand All @@ -38,12 +39,13 @@ Core #0 counts:
*[0-9]* idles
*[0-9\.]*% cpu busy by record count
*[0-9]* cpu microseconds
*[0-9]* wait microseconds
*[0-9]* idle microseconds
*[0-9]* idle microseconds at last instr
*[0-9\.]*% cpu busy by time
*[0-9\.]*% cpu busy by time, ignoring idle past last instr
Core #1 counts:
. threads
. threads: 1257.*
*[0-9]* instructions
. total context switches
0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\)
Expand All @@ -59,12 +61,13 @@ Core #1 counts:
*[0-9]* idles
*[0-9\.]*% cpu busy by record count
*[0-9]* cpu microseconds
*[0-9]* wait microseconds
*[0-9]* idle microseconds
*[0-9]* idle microseconds at last instr
*[0-9\.]*% cpu busy by time
*[0-9\.]*% cpu busy by time, ignoring idle past last instr
Core #2 counts:
. threads
. threads: 1257.*
*[0-9]* instructions
. total context switches
0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\)
Expand All @@ -80,12 +83,13 @@ Core #2 counts:
*[0-9]* idles
*[0-9\.]*% cpu busy by record count
*[0-9]* cpu microseconds
*[0-9]* wait microseconds
*[0-9]* idle microseconds
*[0-9]* idle microseconds at last instr
*[0-9\.]*% cpu busy by time
*[0-9\.]*% cpu busy by time, ignoring idle past last instr
Core #3 counts:
. threads
. threads: 1257.*
*[0-9]* instructions
. total context switches
0\.0[0-9\.]* CSPKI \(context switches per 1000 instructions\)
Expand All @@ -101,6 +105,7 @@ Core #3 counts:
*[0-9]* idles
*[0-9\.]*% cpu busy by record count
*[0-9]* cpu microseconds
*[0-9]* wait microseconds
*[0-9]* idle microseconds
*[0-9]* idle microseconds at last instr
*[0-9\.]*% cpu busy by time
Expand Down
69 changes: 52 additions & 17 deletions clients/drcachesim/tests/schedule_stats_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

namespace dynamorio {
namespace drmemtrace {
namespace {

using ::dynamorio::drmemtrace::default_memtrace_stream_t;
using ::dynamorio::drmemtrace::memref_t;
Expand All @@ -60,14 +61,45 @@ using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH;
using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL;
using ::dynamorio::drmemtrace::TRACE_MARKER_TYPE_SYSCALL;

// Create a class for testing that has reliable timing.
// It assumes it is only used with one thread and parallel operation
// is emulated via lockstep serial walking, so there is no need for locks.
class mock_schedule_stats_t : public schedule_stats_t {
public:
mock_schedule_stats_t(uint64_t print_every, unsigned int verbose = 0)
: schedule_stats_t(print_every, verbose)
{
}

uint64_t
get_current_microseconds() override
{
return global_time_;
}

bool
parallel_shard_memref(void *shard_data, const memref_t &memref) override
{
// This global time with our lockstep iteration in run_schedule_stats()
// over-counts as it advances while threads are waiting their
// serial turn, but that's fine: so long as it's deterministic.
++global_time_;
return schedule_stats_t::parallel_shard_memref(shard_data, memref);
}

private:
// Start at 1 to avoid asserts about a time of 0.
uint64_t global_time_ = 1;
};

// Bypasses the analyzer and scheduler for a controlled test sequence.
// Alternates the per-core memref vectors in lockstep.
static schedule_stats_t::counters_t
run_schedule_stats(const std::vector<std::vector<memref_t>> &memrefs)
{
// At verbosity 2+ we'd need to subclass default_memtrace_stream_t
// and provide a non-null get_input_interface() (point at "this").
schedule_stats_t tool(/*print_every=*/1, /*verbosity=*/1);
mock_schedule_stats_t tool(/*print_every=*/1, /*verbosity=*/1);
struct per_core_t {
void *worker_data;
void *shard_data;
Expand Down Expand Up @@ -143,6 +175,9 @@ test_basic_stats()
gen_marker(TID_C, TRACE_MARKER_TYPE_TIMESTAMP, 3300),
// Direct switch requested but failed.
gen_instr(TID_C),
gen_exit(TID_C),
// An exit is a voluntary switch.
gen_exit(TID_A),
},
{
gen_instr(TID_B),
Expand All @@ -160,22 +195,21 @@ test_basic_stats()
gen_instr(TID_B),
gen_instr(TID_B),
gen_instr(TID_B),
gen_exit(TID_B),
},
};
auto result = run_schedule_stats(memrefs);
assert(result.instrs == 16);
assert(result.total_switches == 6);
assert(result.voluntary_switches == 2);
assert(result.total_switches == 7);
assert(result.voluntary_switches == 3);
assert(result.direct_switches == 1);
assert(result.syscalls == 4);
assert(result.maybe_blocking_syscalls == 3);
assert(result.direct_switch_requests == 2);
assert(result.waits == 3);
assert(result.idle_microseconds == 0);
// XXX: For Windows test VMs we see coarse time updates resulting in 0's.
#ifndef WIN32
assert(result.cpu_microseconds > 0);
#endif
assert(result.cpu_microseconds > 20);
assert(result.wait_microseconds >= 3);
return true;
}

Expand All @@ -196,6 +230,7 @@ test_idle()
gen_instr(TID_B),
gen_instr(TID_B),
gen_instr(TID_B),
gen_exit(TID_B),
},
{
gen_instr(TID_C),
Expand All @@ -217,31 +252,31 @@ test_idle()
gen_instr(TID_A),
gen_instr(TID_A),
gen_instr(TID_A),
gen_exit(TID_A),
// An exit is a voluntary switch.
gen_exit(TID_C),
},
};
auto result = run_schedule_stats(memrefs);
assert(result.instrs == 13);
assert(result.total_switches == 5);
assert(result.voluntary_switches == 0);
assert(result.total_switches == 6);
assert(result.voluntary_switches == 1);
assert(result.direct_switches == 0);
assert(result.syscalls == 0);
assert(result.maybe_blocking_syscalls == 0);
assert(result.direct_switch_requests == 0);
assert(result.waits == 3);
assert(result.idles == 6);
// It is hard to test wall-clock time precise values so we have sanity checks.
std::cerr << "got idle " << result.idle_microseconds << "us, cpu "
<< result.cpu_microseconds << "us\n"; // NOCHECK
// XXX: For Windows test VMs we see coarse time updates resulting in 0's.
#ifndef WIN32
assert(result.idle_microseconds > 0);
assert(result.idle_microseconds >= 6);
assert(result.idle_micros_at_last_instr > 0 &&
result.idle_micros_at_last_instr <= result.idle_microseconds);
assert(result.cpu_microseconds > 0);
#endif
assert(result.cpu_microseconds > 10);
assert(result.wait_microseconds >= 3);
return true;
}

} // namespace

int
test_main(int argc, const char *argv[])
{
Expand Down
Loading

0 comments on commit 82a113b

Please sign in to comment.