From c4106e004c3246c27392c23161ce5ae3a008fe2c Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 21 Feb 2024 01:57:32 +0000 Subject: [PATCH 01/58] i#6662 public traces, part 1: encoding_filter Simple record_filter_t::record_filter_func_t filter that modifies a field of every trace_entry_t record in a trace and leverages record_filter to output such modified records onto a "filtered" output trace. Issue: #6662 --- clients/drcachesim/CMakeLists.txt | 1 + .../drcachesim/tools/filter/encoding_filter.h | 77 +++++++++++++++++++ .../drcachesim/tools/filter/record_filter.cpp | 6 ++ 3 files changed, 84 insertions(+) create mode 100644 clients/drcachesim/tools/filter/encoding_filter.h diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 75b366ed495..12fbcf07014 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -196,6 +196,7 @@ add_exported_library(drmemtrace_record_filter STATIC tools/filter/cache_filter.h tools/filter/cache_filter.cpp tools/filter/type_filter.h + tools/filter/encoding_filter.h tools/filter/null_filter.h) target_link_libraries(drmemtrace_record_filter drmemtrace_simulator) diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h new file mode 100644 index 00000000000..a4ad415caa9 --- /dev/null +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -0,0 +1,77 @@ +/* ********************************************************** + * Copyright (c) 2022-2024 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _ENCODING_FILTER_H_ +#define _ENCODING_FILTER_H_ 1 + +#include "record_filter.h" +#include "trace_entry.h" + +namespace dynamorio { +namespace drmemtrace { + +class encoding_filter_t : public record_filter_t::record_filter_func_t { +public: + encoding_filter_t() + { + } + + void * + parallel_shard_init(memtrace_stream_t *shard_stream, + bool partial_trace_filter) override + { + per_shard_t *per_shard = new per_shard_t; + return per_shard; + } + + bool + parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + { + entry.length[0] = 255; + return true; + } + + bool + parallel_shard_exit(void *shard_data) override + { + per_shard_t *per_shard = reinterpret_cast(shard_data); + delete per_shard; + return true; + } + +private: + struct per_shard_t { }; +}; + +} // namespace drmemtrace +} // namespace dynamorio +#endif /* _ENCODING_FILTER_H_ */ diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 1d5cbd3a719..01985ad7b97 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -60,6 +60,7 @@ #include "cache_filter.h" #include "trim_filter.h" #include "type_filter.h" +#include "encoding_filter.h" #undef VPRINT #ifdef DEBUG @@ -143,6 +144,11 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp new dynamorio::drmemtrace::trim_filter_t(trim_before_timestamp, trim_after_timestamp))); } + // ED: always enabling encoding_filter for test purposes. TOREMOVE. + filter_funcs.emplace_back( + std::unique_ptr( + new dynamorio::drmemtrace::encoding_filter_t())); + // TODO i#5675: Add other filters. return new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), From 710a5d3e3a17ff149fa247a2172337e44a2d51da Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 24 Apr 2024 15:45:00 -0700 Subject: [PATCH 02/58] Saving previous work. encoding_filter.h changes instr length and pc in this commit, but does not change branch targets accordingly. --- clients/drcachesim/CMakeLists.txt | 4 +- .../tests/record_filter_unit_tests.cpp | 4 +- .../drcachesim/tools/filter/cache_filter.cpp | 3 +- .../drcachesim/tools/filter/cache_filter.h | 3 +- .../drcachesim/tools/filter/encoding_filter.h | 99 ++++++++++++++++++- clients/drcachesim/tools/filter/null_filter.h | 3 +- .../drcachesim/tools/filter/record_filter.cpp | 8 +- .../drcachesim/tools/filter/record_filter.h | 8 +- clients/drcachesim/tools/filter/trim_filter.h | 3 +- clients/drcachesim/tools/filter/type_filter.h | 3 +- core/ir/instr_shared.c | 15 ++- 11 files changed, 137 insertions(+), 16 deletions(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 12fbcf07014..ca8715ac169 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -393,7 +393,9 @@ add_dependencies(prefetch_analyzer_launcher api_headers) add_executable(record_filter_launcher tools/record_filter_launcher.cpp tests/test_helpers.cpp) -target_link_libraries(record_filter_launcher drmemtrace_analyzer drmemtrace_record_filter) +target_link_libraries(record_filter_launcher drmemtrace_analyzer drmemtrace_record_filter +drmemtrace_raw2trace drcovlib_static drfrontendlib ${static_libc} ${zlib_libs}) +add_dependencies(record_filter_launcher api_headers) append_property_list(TARGET record_filter_launcher COMPILE_DEFINITIONS "NO_HELPER_MAIN") use_DynamoRIO_extension(record_filter_launcher droption) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 0f7dd36975a..0e93b4fc150 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -42,6 +42,7 @@ #include "tools/filter/record_filter.h" #include "tools/filter/trim_filter.h" #include "tools/filter/type_filter.h" +#include "trace_entry.h" #include "zipfile_ostream.h" #include @@ -541,7 +542,8 @@ test_chunk_update() return nullptr; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) override { bool res = true; if (type_is_instr(static_cast(entry.type))) { diff --git a/clients/drcachesim/tools/filter/cache_filter.cpp b/clients/drcachesim/tools/filter/cache_filter.cpp index 6dae8b80ac4..a5b0bdad9fa 100644 --- a/clients/drcachesim/tools/filter/cache_filter.cpp +++ b/clients/drcachesim/tools/filter/cache_filter.cpp @@ -87,7 +87,8 @@ cache_filter_t::parallel_shard_init(memtrace_stream_t *shard_stream, return per_shard; } bool -cache_filter_t::parallel_shard_filter(trace_entry_t &entry, void *shard_data) +cache_filter_t::parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) { if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) { if (filter_instrs_) diff --git a/clients/drcachesim/tools/filter/cache_filter.h b/clients/drcachesim/tools/filter/cache_filter.h index 677a027bd11..9eab85c993e 100644 --- a/clients/drcachesim/tools/filter/cache_filter.h +++ b/clients/drcachesim/tools/filter/cache_filter.h @@ -55,7 +55,8 @@ class cache_filter_t : public record_filter_t::record_filter_func_t { parallel_shard_init(memtrace_stream_t *shard_stream, bool partial_trace_filter) override; bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override; + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) override; bool parallel_shard_exit(void *shard_data) override; diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index a4ad415caa9..0232d39c962 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -33,8 +33,14 @@ #ifndef _ENCODING_FILTER_H_ #define _ENCODING_FILTER_H_ 1 +#include "utils.h" +#include "dr_api.h" // Must be before trace_entry.h from analysis_tool.h. #include "record_filter.h" #include "trace_entry.h" +#include "utils.h" + +#include +#include namespace dynamorio { namespace drmemtrace { @@ -49,14 +55,83 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { parallel_shard_init(memtrace_stream_t *shard_stream, bool partial_trace_filter) override { + dcontext_.dcontext = dr_standalone_init(); per_shard_t *per_shard = new per_shard_t; + per_shard->prev_instr_length = 0; + per_shard->prev_pc = 0; return per_shard; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) override { - entry.length[0] = 255; + per_shard_t *per_shard = reinterpret_cast(shard_data); + + /* We have encoding to convert. + * Normally the sequence of trace_entry_t(s) looks like: + * [encoding,]+ instr_with_PC, [read | write]* + * (+ = one or more, * = zero or more) + * If we enter here, trace_entry_t is instr_with_PC. + */ + if (is_any_instr_type(static_cast(entry.type)) && + !last_encoding.empty()) { + const app_pc pc = reinterpret_cast(entry.addr); + byte encoding[MAX_ENCODING_LENGTH]; + memset(encoding, 0, sizeof(encoding)); + uint encoding_size_acc = 0; + for (auto &trace_encoding : last_encoding) { + memcpy(encoding + encoding_size_acc, trace_encoding.encoding, + trace_encoding.size); + encoding_size_acc += trace_encoding.size; + } + + instr_t instr; + instr_init(dcontext_.dcontext, &instr); + app_pc next_pc = decode_from_copy(dcontext_.dcontext, encoding, pc, &instr); + if (next_pc == NULL || !instr_valid(&instr)) { + instr_free(dcontext_.dcontext, &instr); + return false; + } + + instr_t instr_regdeps; + instr_init(dcontext_.dcontext, &instr_regdeps); + instr_convert_to_isa_regdeps(dcontext_.dcontext, &instr, &instr_regdeps); + + byte ALIGN_VAR(4) encoding_regdeps[16]; + app_pc next_pc_regdeps = + instr_encode(dcontext_.dcontext, &instr_regdeps, encoding_regdeps); + + uint encoding_regdeps_length = next_pc_regdeps - encoding_regdeps; + + entry.size = encoding_regdeps_length; + if (per_shard->prev_pc == 0) + per_shard->prev_pc = entry.addr; + old_to_new_pc[entry.addr] = per_shard->prev_pc + per_shard->prev_instr_length; + old_to_new_length[entry.addr] = encoding_regdeps_length; + entry.addr = per_shard->prev_pc + per_shard->prev_instr_length; + per_shard->prev_instr_length = encoding_regdeps_length; + per_shard->prev_pc = entry.addr; + + uint num_encoding_entries_regdeps = + ALIGN_FORWARD(encoding_regdeps_length, 8) / 8; + last_encoding.resize(num_encoding_entries_regdeps); + for (trace_entry_t &te : last_encoding) { + te.type = TRACE_TYPE_ENCODING; + te.size = encoding_regdeps_length < 8 ? encoding_regdeps_length : 8; + memset(te.encoding, 0, 8); + memcpy(te.encoding, encoding_regdeps, te.size); + encoding_regdeps_length -= 8; + } + } else if (is_any_instr_type(static_cast(entry.type)) && + last_encoding.empty()) { + addr_t old_pc = entry.addr; + auto found = old_to_new_pc.find(old_pc); + if (found != old_to_new_pc.end()) { + entry.addr = found->second; + entry.size = old_to_new_length[old_pc]; + } + } return true; } @@ -69,7 +144,25 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { } private: - struct per_shard_t { }; + struct per_shard_t { + uint prev_instr_length; + addr_t prev_pc; + }; + + struct dcontext_cleanup_last_t { + public: + ~dcontext_cleanup_last_t() + { + if (dcontext != nullptr) + dr_standalone_exit(); + } + void *dcontext = nullptr; + }; + + dcontext_cleanup_last_t dcontext_; + + std::unordered_map old_to_new_pc; + std::unordered_map old_to_new_length; }; } // namespace drmemtrace diff --git a/clients/drcachesim/tools/filter/null_filter.h b/clients/drcachesim/tools/filter/null_filter.h index e76f6e0df44..2c89ab57eb3 100644 --- a/clients/drcachesim/tools/filter/null_filter.h +++ b/clients/drcachesim/tools/filter/null_filter.h @@ -47,7 +47,8 @@ class null_filter_t : public record_filter_t::record_filter_func_t { return nullptr; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) override { return true; } diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index a33f0d0aba0..c71b4c18e1b 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -82,8 +82,6 @@ namespace dynamorio { namespace drmemtrace { -namespace { - bool is_any_instr_type(trace_type_t type) { @@ -91,6 +89,8 @@ is_any_instr_type(trace_type_t type) type == TRACE_TYPE_INSTR_NO_FETCH; } +namespace { + template std::vector parse_string(const std::string &s, char sep = ',') @@ -744,8 +744,8 @@ record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &in } if (per_shard->enabled) { for (int i = 0; i < static_cast(filters_.size()); ++i) { - if (!filters_[i]->parallel_shard_filter(entry, - per_shard->filter_shard_data[i])) { + if (!filters_[i]->parallel_shard_filter( + entry, per_shard->filter_shard_data[i], per_shard->last_encoding)) { output = false; } if (!filters_[i]->get_error_string().empty()) { diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index f5550b39cbd..1eee061da5d 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -53,6 +53,11 @@ namespace dynamorio { namespace drmemtrace { +/* Utility function. + */ +bool +is_any_instr_type(trace_type_t type); + /** * Analysis tool that filters the #trace_entry_t records of an offline * trace. Streams through each shard independenty and parallelly, and @@ -96,7 +101,8 @@ class record_filter_t : public record_analysis_tool_t { * An error is indicated by setting error_string_ to a non-empty value. */ virtual bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) = 0; + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) = 0; /** * Invoked when all #trace_entry_t in a shard have been processed * by parallel_shard_filter(). \p shard_data is same as what was diff --git a/clients/drcachesim/tools/filter/trim_filter.h b/clients/drcachesim/tools/filter/trim_filter.h index 30b8395156f..a33802019ed 100644 --- a/clients/drcachesim/tools/filter/trim_filter.h +++ b/clients/drcachesim/tools/filter/trim_filter.h @@ -69,7 +69,8 @@ class trim_filter_t : public record_filter_t::record_filter_func_t { return per_shard; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) override { per_shard_t *per_shard = reinterpret_cast(shard_data); if (entry.type == TRACE_TYPE_MARKER && diff --git a/clients/drcachesim/tools/filter/type_filter.h b/clients/drcachesim/tools/filter/type_filter.h index 3dda5c0592f..9c33237ef9b 100644 --- a/clients/drcachesim/tools/filter/type_filter.h +++ b/clients/drcachesim/tools/filter/type_filter.h @@ -86,7 +86,8 @@ class type_filter_t : public record_filter_t::record_filter_func_t { return per_shard; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data) override + parallel_shard_filter(trace_entry_t &entry, void *shard_data, + std::vector &last_encoding) override { per_shard_t *per_shard = reinterpret_cast(shard_data); if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) { diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index 500e3d37386..c5ca2ca5251 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -3008,16 +3008,21 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, bool dst_reg_used[REGDEPS_MAX_NUM_REGS]; memset(dst_reg_used, 0, sizeof(dst_reg_used)); uint num_dsts = 0; + bool sentinel = false; // TOREMOVE uint instr_real_num_dsts = (uint)instr_num_dsts(instr_real_isa); for (uint dst_index = 0; dst_index < instr_real_num_dsts; ++dst_index) { opnd_t dst_opnd = instr_get_dst(instr_real_isa, dst_index); uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(dst_opnd); if (opnd_is_memory_reference(dst_opnd)) { + sentinel = true; for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) { reg_id_t reg = opnd_get_reg_used(dst_opnd, opnd_index); /* Map sub-registers to their containing register. */ reg_id_t reg_canonical = reg_to_pointer_sized(reg); + // TOREMOVE + if (reg_canonical >= 256) + reg_canonical = 255; if (!src_reg_used[reg_canonical]) { ++num_srcs; src_reg_used[reg_canonical] = true; @@ -3029,6 +3034,10 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, /* Map sub-registers to their containing register. */ reg_id_t reg_canonical = reg_to_pointer_sized(reg); + // TOREMOVE + if (reg_canonical >= 256) + reg_canonical = 255; + if (!dst_reg_used[reg_canonical]) { ++num_dsts; dst_reg_used[reg_canonical] = true; @@ -3058,6 +3067,10 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, /* Map sub-registers to their containing register. */ reg_id_t reg_canonical = reg_to_pointer_sized(reg); + // TOREMOVE + if (reg_canonical >= 256) + reg_canonical = 255; + if (!src_reg_used[reg_canonical]) { ++num_srcs; src_reg_used[reg_canonical] = true; @@ -4345,4 +4358,4 @@ move_mm_avx512_reg_opcode(bool aligned64) } #endif /* !STANDALONE_DECODER */ -/****************************************************************************/ + /****************************************************************************/ From 7d16393719e0e8d7b44a3043018083286da12af0 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 24 Apr 2024 17:15:11 -0700 Subject: [PATCH 03/58] Reverted back to not changing trace_entry_t length and pc. Code clenup. --- .../drcachesim/tools/filter/encoding_filter.h | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index 0232d39c962..f902590c410 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -42,6 +42,13 @@ #include #include +/* We are not exporting the defines in core/ir/isa_regdeps/encoding_common.h, so we + * redefine DR_ISA_REGDEPS alignment requirement here. + */ +#define REGDEPS_ALIGN_BYTES 4 + +#define REGDEPS_MAX_ENCODING_LENGTH 16 + namespace dynamorio { namespace drmemtrace { @@ -56,18 +63,13 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { bool partial_trace_filter) override { dcontext_.dcontext = dr_standalone_init(); - per_shard_t *per_shard = new per_shard_t; - per_shard->prev_instr_length = 0; - per_shard->prev_pc = 0; - return per_shard; + return nullptr; } bool parallel_shard_filter(trace_entry_t &entry, void *shard_data, std::vector &last_encoding) override { - per_shard_t *per_shard = reinterpret_cast(shard_data); - /* We have encoding to convert. * Normally the sequence of trace_entry_t(s) looks like: * [encoding,]+ instr_with_PC, [read | write]* @@ -76,60 +78,68 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { */ if (is_any_instr_type(static_cast(entry.type)) && !last_encoding.empty()) { + /* Gather real ISA encoding bytes looping through all previously saved + * encoding bytes in last_encoding. + */ const app_pc pc = reinterpret_cast(entry.addr); byte encoding[MAX_ENCODING_LENGTH]; memset(encoding, 0, sizeof(encoding)); - uint encoding_size_acc = 0; + uint encoding_offset = 0; for (auto &trace_encoding : last_encoding) { - memcpy(encoding + encoding_size_acc, trace_encoding.encoding, + memcpy(encoding + encoding_offset, trace_encoding.encoding, trace_encoding.size); - encoding_size_acc += trace_encoding.size; + encoding_offset += trace_encoding.size; } + /* Genenerate the real ISA instr_t by decoding the encoding bytes. + */ instr_t instr; instr_init(dcontext_.dcontext, &instr); app_pc next_pc = decode_from_copy(dcontext_.dcontext, encoding, pc, &instr); if (next_pc == NULL || !instr_valid(&instr)) { instr_free(dcontext_.dcontext, &instr); + error_string_ = + "Failed to decode instruction " + to_hex_string(entry.addr); return false; } + /* Convert the real ISA instr_t into a regdeps ISA instr_t. + */ instr_t instr_regdeps; instr_init(dcontext_.dcontext, &instr_regdeps); instr_convert_to_isa_regdeps(dcontext_.dcontext, &instr, &instr_regdeps); - byte ALIGN_VAR(4) encoding_regdeps[16]; + /* Obtain regdeps ISA instr_t encoding bytes. + */ + byte ALIGN_VAR(REGDEPS_ALIGN_BYTES) + encoding_regdeps[REGDEPS_MAX_ENCODING_LENGTH]; app_pc next_pc_regdeps = instr_encode(dcontext_.dcontext, &instr_regdeps, encoding_regdeps); - uint encoding_regdeps_length = next_pc_regdeps - encoding_regdeps; - - entry.size = encoding_regdeps_length; - if (per_shard->prev_pc == 0) - per_shard->prev_pc = entry.addr; - old_to_new_pc[entry.addr] = per_shard->prev_pc + per_shard->prev_instr_length; - old_to_new_length[entry.addr] = encoding_regdeps_length; - entry.addr = per_shard->prev_pc + per_shard->prev_instr_length; - per_shard->prev_instr_length = encoding_regdeps_length; - per_shard->prev_pc = entry.addr; - - uint num_encoding_entries_regdeps = - ALIGN_FORWARD(encoding_regdeps_length, 8) / 8; - last_encoding.resize(num_encoding_entries_regdeps); - for (trace_entry_t &te : last_encoding) { - te.type = TRACE_TYPE_ENCODING; - te.size = encoding_regdeps_length < 8 ? encoding_regdeps_length : 8; - memset(te.encoding, 0, 8); - memcpy(te.encoding, encoding_regdeps, te.size); - encoding_regdeps_length -= 8; - } - } else if (is_any_instr_type(static_cast(entry.type)) && - last_encoding.empty()) { - addr_t old_pc = entry.addr; - auto found = old_to_new_pc.find(old_pc); - if (found != old_to_new_pc.end()) { - entry.addr = found->second; - entry.size = old_to_new_length[old_pc]; + /* Compute number of trace_entry_t to contain regdeps ISA encoding. + * Each trace_entry_t record can contain 8 byte encoding. + */ + size_t trace_entry_encoding_size = sizeof(entry.addr); /* == 8 */ + uint regdeps_encoding_length = next_pc_regdeps - encoding_regdeps; + uint num_regdeps_encoding_entries = + ALIGN_FORWARD(regdeps_encoding_length, trace_entry_encoding_size) / + trace_entry_encoding_size; + last_encoding.resize(num_regdeps_encoding_entries); + + /* Copy regdeps ISA encoding, splitting it among the last_encoding + * trace_entry_t records. + */ + uint regdeps_encoding_offset = 0; + for (trace_entry_t &encoding_entry : last_encoding) { + encoding_entry.type = TRACE_TYPE_ENCODING; + encoding_entry.size = regdeps_encoding_length < trace_entry_encoding_size + ? regdeps_encoding_length + : trace_entry_encoding_size; + memset(encoding_entry.encoding, 0, trace_entry_encoding_size); + memcpy(encoding_entry.encoding, + encoding_regdeps + regdeps_encoding_offset, encoding_entry.size); + regdeps_encoding_length -= trace_entry_encoding_size; + regdeps_encoding_offset += encoding_entry.size; } } return true; @@ -138,17 +148,10 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { bool parallel_shard_exit(void *shard_data) override { - per_shard_t *per_shard = reinterpret_cast(shard_data); - delete per_shard; return true; } private: - struct per_shard_t { - uint prev_instr_length; - addr_t prev_pc; - }; - struct dcontext_cleanup_last_t { public: ~dcontext_cleanup_last_t() @@ -160,9 +163,6 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { }; dcontext_cleanup_last_t dcontext_; - - std::unordered_map old_to_new_pc; - std::unordered_map old_to_new_length; }; } // namespace drmemtrace From d0f102c9bf38adc8f0e78bd9cbf74811fac49341 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 24 Apr 2024 17:17:30 -0700 Subject: [PATCH 04/58] Reverted back workaround for virtual register remapping. --- core/ir/instr_shared.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index c5ca2ca5251..500e3d37386 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -3008,21 +3008,16 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, bool dst_reg_used[REGDEPS_MAX_NUM_REGS]; memset(dst_reg_used, 0, sizeof(dst_reg_used)); uint num_dsts = 0; - bool sentinel = false; // TOREMOVE uint instr_real_num_dsts = (uint)instr_num_dsts(instr_real_isa); for (uint dst_index = 0; dst_index < instr_real_num_dsts; ++dst_index) { opnd_t dst_opnd = instr_get_dst(instr_real_isa, dst_index); uint num_regs_used_by_opnd = (uint)opnd_num_regs_used(dst_opnd); if (opnd_is_memory_reference(dst_opnd)) { - sentinel = true; for (uint opnd_index = 0; opnd_index < num_regs_used_by_opnd; ++opnd_index) { reg_id_t reg = opnd_get_reg_used(dst_opnd, opnd_index); /* Map sub-registers to their containing register. */ reg_id_t reg_canonical = reg_to_pointer_sized(reg); - // TOREMOVE - if (reg_canonical >= 256) - reg_canonical = 255; if (!src_reg_used[reg_canonical]) { ++num_srcs; src_reg_used[reg_canonical] = true; @@ -3034,10 +3029,6 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, /* Map sub-registers to their containing register. */ reg_id_t reg_canonical = reg_to_pointer_sized(reg); - // TOREMOVE - if (reg_canonical >= 256) - reg_canonical = 255; - if (!dst_reg_used[reg_canonical]) { ++num_dsts; dst_reg_used[reg_canonical] = true; @@ -3067,10 +3058,6 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, /* Map sub-registers to their containing register. */ reg_id_t reg_canonical = reg_to_pointer_sized(reg); - // TOREMOVE - if (reg_canonical >= 256) - reg_canonical = 255; - if (!src_reg_used[reg_canonical]) { ++num_srcs; src_reg_used[reg_canonical] = true; @@ -4358,4 +4345,4 @@ move_mm_avx512_reg_opcode(bool aligned64) } #endif /* !STANDALONE_DECODER */ - /****************************************************************************/ +/****************************************************************************/ From 3653ee4b0b8f2f5b1f460829f2a5d25767251ddb Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 01:56:40 -0700 Subject: [PATCH 05/58] Added -encoding_filter_enabled flag, which also (indirectly) disables the check for encoding size vs instruction length in memref_counter_t (which is a reader_t). --- clients/drcachesim/analyzer_multi.cpp | 3 +- clients/drcachesim/common/options.cpp | 5 ++++ clients/drcachesim/common/options.h | 1 + clients/drcachesim/reader/reader.cpp | 3 +- clients/drcachesim/reader/reader.h | 1 + .../tests/record_filter_unit_tests.cpp | 10 ++++--- .../drcachesim/tools/filter/record_filter.cpp | 28 +++++++++++++------ .../drcachesim/tools/filter/record_filter.h | 4 ++- .../tools/filter/record_filter_create.h | 2 +- .../tools/record_filter_launcher.cpp | 7 ++++- clients/drcachesim/tracer/raw2trace_shared.h | 5 ++++ 11 files changed, 52 insertions(+), 17 deletions(-) diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index fb71f986f76..2eb0649eb95 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -334,7 +334,8 @@ record_analyzer_multi_t::create_analysis_tool_from_options( op_outdir.get_value(), op_filter_stop_timestamp.get_value(), op_filter_cache_size.get_value(), op_filter_trace_types.get_value(), op_filter_marker_types.get_value(), op_trim_before_timestamp.get_value(), - op_trim_after_timestamp.get_value(), op_verbose.get_value()); + op_trim_after_timestamp.get_value(), op_encoding_filter_enabled.get_value(), + op_verbose.get_value()); } ERRMSG("Usage error: unsupported record analyzer type \"%s\". Only " RECORD_FILTER " is supported.\n", diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 7105192347b..cd170c16061 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -972,6 +972,11 @@ droption_t "Comma-separated integers for marker types to remove. " "See trace_marker_type_t for the list of marker types."); +droption_t op_encoding_filter_enabled( + DROPTION_SCOPE_FRONTEND, "encoding_filter_enabled", false, + "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", + "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS."); + droption_t op_trim_before_timestamp( DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0, (std::numeric_limits::max)(), diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 316d1acf063..1435491f091 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -214,6 +214,7 @@ extern dynamorio::droption::droption_t op_filter_stop_timestamp; extern dynamorio::droption::droption_t op_filter_cache_size; extern dynamorio::droption::droption_t op_filter_trace_types; extern dynamorio::droption::droption_t op_filter_marker_types; +extern dynamorio::droption::droption_t op_encoding_filter_enabled; extern dynamorio::droption::droption_t op_trim_before_timestamp; extern dynamorio::droption::droption_t op_trim_after_timestamp; extern dynamorio::droption::droption_t op_abort_on_invariant_error; diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index b938e06dfc6..1b487cef8f7 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -210,7 +210,8 @@ reader_t::process_input_entry() ++cur_instr_count_; // Look for encoding bits that belong to this instr. if (last_encoding_.size > 0) { - if (last_encoding_.size != cur_ref_.instr.size) { + if (!ignore_encoding_size_vs_instr_length_check_ && + (last_encoding_.size != cur_ref_.instr.size)) { ERRMSG( "Encoding size %zu != instr size %zu for PC 0x%zx at ord %" PRIu64 " instr %" PRIu64 " last_timestamp=0x%" PRIx64 "\n", diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h index a10f614d848..81b289ff3c2 100644 --- a/clients/drcachesim/reader/reader.h +++ b/clients/drcachesim/reader/reader.h @@ -276,6 +276,7 @@ class reader_t : public std::iterator, // some thread-based checks may not apply. bool core_sharded_ = false; bool found_filetype_ = false; + bool ignore_encoding_size_vs_instr_length_check_ = false; private: memref_t cur_ref_; diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 0e93b4fc150..8977487bcb2 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -90,7 +90,8 @@ class test_record_filter_t : public dynamorio::drmemtrace::record_filter_t { test_record_filter_t(std::vector> filters, uint64_t last_timestamp, bool write_archive = false) : record_filter_t("", std::move(filters), last_timestamp, - /*verbose=*/0) + /*verbose=*/0, + /*ignore_encoding_size_vs_instr_length_check*/ false) , write_archive_(write_archive) { } @@ -1028,9 +1029,10 @@ test_null_filter() // other entries are expected to stay. static constexpr uint64_t stop_timestamp_us = 1; auto record_filter = std::unique_ptr( - new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), - stop_timestamp_us, - /*verbosity=*/0)); + new dynamorio::drmemtrace::record_filter_t( + output_dir, std::move(filter_funcs), stop_timestamp_us, + /*verbosity=*/0, + /*ignore_encoding_size_vs_instr_length_check=*/false)); std::vector tools; tools.push_back(record_filter.get()); record_analyzer_t record_analyzer(op_trace_dir.get_value(), &tools[0], diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index c71b4c18e1b..9536ba114de 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -114,7 +114,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp int cache_filter_size, const std::string &remove_trace_types, const std::string &remove_marker_types, uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, - unsigned int verbose) + bool encoding_filter_enabled, unsigned int verbose) { std::vector< std::unique_ptr> @@ -144,25 +144,35 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp new dynamorio::drmemtrace::trim_filter_t(trim_before_timestamp, trim_after_timestamp))); } - // ED: always enabling encoding_filter for test purposes. TOREMOVE. - filter_funcs.emplace_back( - std::unique_ptr( - new dynamorio::drmemtrace::encoding_filter_t())); + if (encoding_filter_enabled) { + filter_funcs.emplace_back( + std::unique_ptr( + new dynamorio::drmemtrace::encoding_filter_t())); + } // TODO i#5675: Add other filters. - return new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), - stop_timestamp, verbose); + /* If we are changing the encoding of trace_entry_t, we will be generating + * discrepancies between encoding size and instruction length. So, we need to tell + * reader_t, which here comes in the form of memref_counter_t, to ignore such + * discrepancies. + */ + bool ignore_encoding_size_vs_instr_length_check = encoding_filter_enabled; + return new dynamorio::drmemtrace::record_filter_t( + output_dir, std::move(filter_funcs), stop_timestamp, verbose, + ignore_encoding_size_vs_instr_length_check); } record_filter_t::record_filter_t( const std::string &output_dir, std::vector> filters, uint64_t stop_timestamp, - unsigned int verbose) + unsigned int verbose, bool ignore_encoding_size_vs_instr_length_check) : output_dir_(output_dir) , filters_(std::move(filters)) , stop_timestamp_(stop_timestamp) , verbosity_(verbose) + , ignore_encoding_size_vs_instr_length_check_( + ignore_encoding_size_vs_instr_length_check) { UNUSED(verbosity_); UNUSED(output_prefix_); @@ -380,6 +390,8 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data, per_shard->input_entry_count = 0; per_shard->output_entry_count = 0; per_shard->tid = shard_stream->get_tid(); + per_shard->memref_counter.set_ignore_encoding_size_vs_instr_length_check( + ignore_encoding_size_vs_instr_length_check_); if (shard_type_ == SHARD_BY_CORE) { per_shard->memref_counter.set_core_sharded(true); } diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index 1eee061da5d..2d8707cb49b 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -127,7 +127,8 @@ class record_filter_t : public record_analysis_tool_t { // stop_timestamp sets a point beyond which no filtering will occur. record_filter_t(const std::string &output_dir, std::vector> filters, - uint64_t stop_timestamp, unsigned int verbose); + uint64_t stop_timestamp, unsigned int verbose, + bool ignore_encoding_size_vs_instr_length_check); ~record_filter_t() override; bool process_memref(const trace_entry_t &entry) override; @@ -267,6 +268,7 @@ class record_filter_t : public record_analysis_tool_t { std::vector> filters_; uint64_t stop_timestamp_; unsigned int verbosity_; + bool ignore_encoding_size_vs_instr_length_check_; const char *output_prefix_ = "[record_filter]"; // For core-sharded, but used for thread-sharded to simplify the code. std::mutex input2info_mutex_; diff --git a/clients/drcachesim/tools/filter/record_filter_create.h b/clients/drcachesim/tools/filter/record_filter_create.h index ae0665f3fef..5320d0fc8dd 100644 --- a/clients/drcachesim/tools/filter/record_filter_create.h +++ b/clients/drcachesim/tools/filter/record_filter_create.h @@ -68,7 +68,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp int cache_filter_size, const std::string &remove_trace_types, const std::string &remove_marker_types, uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, - unsigned int verbose); + bool encoding_filter_enabled, unsigned int verbose); } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp index 746481d79d5..a2bf8c3188b 100644 --- a/clients/drcachesim/tools/record_filter_launcher.cpp +++ b/clients/drcachesim/tools/record_filter_launcher.cpp @@ -121,6 +121,10 @@ static droption_t op_trim_after_timestamp( "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with " "timestamp larger than the specified value."); +static droption_t op_encoding_filter_enabled( + DROPTION_SCOPE_FRONTEND, "encoding_filter_enabled", false, + "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", + "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS."); } // namespace int @@ -150,7 +154,8 @@ _tmain(int argc, const TCHAR *targv[]) op_output_dir.get_value(), op_stop_timestamp.get_value(), op_cache_filter_size.get_value(), op_remove_trace_types.get_value(), op_remove_marker_types.get_value(), op_trim_before_timestamp.get_value(), - op_trim_after_timestamp.get_value(), op_verbose.get_value())); + op_trim_after_timestamp.get_value(), op_encoding_filter_enabled.get_value(), + op_verbose.get_value())); std::vector tools; tools.push_back(record_filter.get()); diff --git a/clients/drcachesim/tracer/raw2trace_shared.h b/clients/drcachesim/tracer/raw2trace_shared.h index 1fa67a1e3f1..0ef467508cb 100644 --- a/clients/drcachesim/tracer/raw2trace_shared.h +++ b/clients/drcachesim/tracer/raw2trace_shared.h @@ -148,6 +148,11 @@ class memref_counter_t : public reader_t { { core_sharded_ = core_sharded; } + void + set_ignore_encoding_size_vs_instr_length_check(bool ignore) + { + ignore_encoding_size_vs_instr_length_check_ = ignore; + } private: bool saw_pid_ = false; From 8263e766a3ae9e26d0f5478f00fcc0e7060b553b Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 02:45:33 -0700 Subject: [PATCH 06/58] Removed unnecessary headers. --- clients/drcachesim/tools/filter/encoding_filter.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index f902590c410..699a221ef87 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -33,8 +33,6 @@ #ifndef _ENCODING_FILTER_H_ #define _ENCODING_FILTER_H_ 1 -#include "utils.h" -#include "dr_api.h" // Must be before trace_entry.h from analysis_tool.h. #include "record_filter.h" #include "trace_entry.h" #include "utils.h" From 8a121b4a8bcdfd7ef3d95562c6f2ffdbb449d068 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 02:45:52 -0700 Subject: [PATCH 07/58] We now use dynamorio encoding functionality, so we need to link it in record_filter_launcher. --- clients/drcachesim/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index ca8715ac169..95d4c74314d 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -398,6 +398,7 @@ drmemtrace_raw2trace drcovlib_static drfrontendlib ${static_libc} ${zlib_libs}) add_dependencies(record_filter_launcher api_headers) append_property_list(TARGET record_filter_launcher COMPILE_DEFINITIONS "NO_HELPER_MAIN") use_DynamoRIO_extension(record_filter_launcher droption) +configure_DynamoRIO_static(record_filter_launcher) # We want to use test_helper's disable_popups() but we have _tmain and so do not want # the test_helper library's main symbol: so we compile ourselves and disable. From cedc3bc748cfb12ccbc76ea6c441eaaf7ba203c2 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 03:03:34 -0700 Subject: [PATCH 08/58] Fixing static dynamorio linking to client not working on mac. --- clients/drcachesim/CMakeLists.txt | 4 +++- clients/drcachesim/tools/filter/encoding_filter.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 95d4c74314d..e09f8be0903 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -398,7 +398,9 @@ drmemtrace_raw2trace drcovlib_static drfrontendlib ${static_libc} ${zlib_libs}) add_dependencies(record_filter_launcher api_headers) append_property_list(TARGET record_filter_launcher COMPILE_DEFINITIONS "NO_HELPER_MAIN") use_DynamoRIO_extension(record_filter_launcher droption) -configure_DynamoRIO_static(record_filter_launcher) +if (NOT APPLE) + configure_DynamoRIO_static(record_filter_launcher) +endif() # We want to use test_helper's disable_popups() but we have _tmain and so do not want # the test_helper library's main symbol: so we compile ourselves and disable. diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index 699a221ef87..dca2aab910e 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -117,7 +117,7 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { /* Compute number of trace_entry_t to contain regdeps ISA encoding. * Each trace_entry_t record can contain 8 byte encoding. */ - size_t trace_entry_encoding_size = sizeof(entry.addr); /* == 8 */ + uint trace_entry_encoding_size = (uint)sizeof(entry.addr); /* == 8 */ uint regdeps_encoding_length = next_pc_regdeps - encoding_regdeps; uint num_regdeps_encoding_entries = ALIGN_FORWARD(regdeps_encoding_length, trace_entry_encoding_size) / From 064697f44c98a6178e662c8d447a9f130fa0e555 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 03:07:52 -0700 Subject: [PATCH 09/58] Added TODO. --- clients/drcachesim/tools/filter/encoding_filter.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index dca2aab910e..62f7792c18c 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -68,6 +68,10 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { parallel_shard_filter(trace_entry_t &entry, void *shard_data, std::vector &last_encoding) override { + /* TODO i#6662: modify trace_entry_t header entry to regdeps ISA, instead of the + * real ISA of the incoming trace. + */ + /* We have encoding to convert. * Normally the sequence of trace_entry_t(s) looks like: * [encoding,]+ instr_with_PC, [read | write]* From edb3d5565187ef8955a2213839c249ff6d2a512f Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 03:29:16 -0700 Subject: [PATCH 10/58] Fixed warning as error on windows. --- clients/drcachesim/tools/filter/encoding_filter.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index 62f7792c18c..191bce9eef7 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -122,9 +122,9 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { * Each trace_entry_t record can contain 8 byte encoding. */ uint trace_entry_encoding_size = (uint)sizeof(entry.addr); /* == 8 */ - uint regdeps_encoding_length = next_pc_regdeps - encoding_regdeps; + uint regdeps_encoding_size = next_pc_regdeps - encoding_regdeps; uint num_regdeps_encoding_entries = - ALIGN_FORWARD(regdeps_encoding_length, trace_entry_encoding_size) / + ALIGN_FORWARD(regdeps_encoding_size, trace_entry_encoding_size) / trace_entry_encoding_size; last_encoding.resize(num_regdeps_encoding_entries); @@ -134,13 +134,14 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { uint regdeps_encoding_offset = 0; for (trace_entry_t &encoding_entry : last_encoding) { encoding_entry.type = TRACE_TYPE_ENCODING; - encoding_entry.size = regdeps_encoding_length < trace_entry_encoding_size - ? regdeps_encoding_length + uint size = regdeps_encoding_size < trace_entry_encoding_size + ? regdeps_encoding_size : trace_entry_encoding_size; + encoding_entry.size = (unsigned short)size; memset(encoding_entry.encoding, 0, trace_entry_encoding_size); memcpy(encoding_entry.encoding, encoding_regdeps + regdeps_encoding_offset, encoding_entry.size); - regdeps_encoding_length -= trace_entry_encoding_size; + regdeps_encoding_size -= trace_entry_encoding_size; regdeps_encoding_offset += encoding_entry.size; } } From 8825a91349da6f3126e6108f5537e107b8641fa2 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 03:35:18 -0700 Subject: [PATCH 11/58] Minor comment improvement. --- clients/drcachesim/tools/filter/record_filter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 9536ba114de..29ef7cc93f8 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -155,7 +155,8 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp /* If we are changing the encoding of trace_entry_t, we will be generating * discrepancies between encoding size and instruction length. So, we need to tell * reader_t, which here comes in the form of memref_counter_t, to ignore such - * discrepancies. + * discrepancies. Note that simulators that deal with these filtered traces will + * also have to handle the fact that encoding_size != instruction_length. */ bool ignore_encoding_size_vs_instr_length_check = encoding_filter_enabled; return new dynamorio::drmemtrace::record_filter_t( From d4eb820868b264a8ed0b552fda3a505a24d5b884 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 25 Apr 2024 03:46:58 -0700 Subject: [PATCH 12/58] Another fix of warning as error on windows. --- clients/drcachesim/tools/filter/encoding_filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index 191bce9eef7..e08df148a71 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -122,7 +122,7 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { * Each trace_entry_t record can contain 8 byte encoding. */ uint trace_entry_encoding_size = (uint)sizeof(entry.addr); /* == 8 */ - uint regdeps_encoding_size = next_pc_regdeps - encoding_regdeps; + uint regdeps_encoding_size = (uint)(next_pc_regdeps - encoding_regdeps); uint num_regdeps_encoding_entries = ALIGN_FORWARD(regdeps_encoding_size, trace_entry_encoding_size) / trace_entry_encoding_size; From 892462001b0abed50d9e9c73ba987579d12b1595 Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 29 Apr 2024 15:25:52 -0700 Subject: [PATCH 13/58] Now modifying the file_type of the trace to regdeps ISA. --- clients/drcachesim/common/trace_entry.h | 24 ++++++++++++------- clients/drcachesim/reader/reader.cpp | 3 ++- .../tests/record_filter_unit_tests.cpp | 10 ++++---- .../drcachesim/tools/filter/encoding_filter.h | 22 +++++++++++++---- .../drcachesim/tools/filter/record_filter.cpp | 21 +++++++++------- .../drcachesim/tools/filter/record_filter.h | 13 +++++----- 6 files changed, 58 insertions(+), 35 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 49fd8528604..4dad6f12e7f 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -968,19 +968,27 @@ typedef enum { * Each trace shard represents one core and contains interleaved software threads. */ OFFLINE_FILE_TYPE_CORE_SHARDED = 0x10000, + /** + * Trace filtered by the record_filter tool using encodings2regdeps. + */ + OFFLINE_FILE_TYPE_ARCH_REGDEPS = 0x20000, } offline_file_type_t; static inline const char * trace_arch_string(offline_file_type_t type) { - return TESTANY(OFFLINE_FILE_TYPE_ARCH_AARCH64, type) - ? "aarch64" - : (TESTANY(OFFLINE_FILE_TYPE_ARCH_ARM32, type) - ? "arm" - : (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_32, type) - ? "i386" - : (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_64, type) ? "x86_64" - : "unspecified"))); + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_AARCH64, type)) + return "aarch64"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ARM32, type)) + return "arm"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_32, type)) + return "i386"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_X86_64, type)) + return "x86_64"; + else if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, type)) + return "regdeps"; + else + return "unspecified"; } /* We have non-client targets including this header that do not include API diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index 1b487cef8f7..5639a6dad94 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -210,7 +210,8 @@ reader_t::process_input_entry() ++cur_instr_count_; // Look for encoding bits that belong to this instr. if (last_encoding_.size > 0) { - if (!ignore_encoding_size_vs_instr_length_check_ && + + if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_) && (last_encoding_.size != cur_ref_.instr.size)) { ERRMSG( "Encoding size %zu != instr size %zu for PC 0x%zx at ord %" PRIu64 diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 8977487bcb2..0e93b4fc150 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -90,8 +90,7 @@ class test_record_filter_t : public dynamorio::drmemtrace::record_filter_t { test_record_filter_t(std::vector> filters, uint64_t last_timestamp, bool write_archive = false) : record_filter_t("", std::move(filters), last_timestamp, - /*verbose=*/0, - /*ignore_encoding_size_vs_instr_length_check*/ false) + /*verbose=*/0) , write_archive_(write_archive) { } @@ -1029,10 +1028,9 @@ test_null_filter() // other entries are expected to stay. static constexpr uint64_t stop_timestamp_us = 1; auto record_filter = std::unique_ptr( - new dynamorio::drmemtrace::record_filter_t( - output_dir, std::move(filter_funcs), stop_timestamp_us, - /*verbosity=*/0, - /*ignore_encoding_size_vs_instr_length_check=*/false)); + new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), + stop_timestamp_us, + /*verbosity=*/0)); std::vector tools; tools.push_back(record_filter.get()); record_analyzer_t record_analyzer(op_trace_dir.get_value(), &tools[0], diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index e08df148a71..9a5fbfdacc7 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -68,15 +68,27 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { parallel_shard_filter(trace_entry_t &entry, void *shard_data, std::vector &last_encoding) override { - /* TODO i#6662: modify trace_entry_t header entry to regdeps ISA, instead of the - * real ISA of the incoming trace. + /* Modify file_type to regdeps ISA, removing the real ISA of the input trace. */ + trace_type_t entry_type = static_cast(entry.type); + if (entry_type == TRACE_TYPE_MARKER) { + trace_marker_type_t marker_type = + static_cast(entry.size); + if (marker_type == TRACE_MARKER_TYPE_FILETYPE) { + uint64_t marker_value = static_cast(entry.addr); + marker_value &= ~OFFLINE_FILE_TYPE_ARCH_ALL; + marker_value |= OFFLINE_FILE_TYPE_ARCH_REGDEPS; + entry.addr = static_cast(marker_value); + } + } /* We have encoding to convert. * Normally the sequence of trace_entry_t(s) looks like: - * [encoding,]+ instr_with_PC, [read | write]* - * (+ = one or more, * = zero or more) - * If we enter here, trace_entry_t is instr_with_PC. + * [TRACE_TYPE_ENCODING,]+ [TRACE_TYPE_MARKER.TRACE_MARKER_TYPE_BRANCH_TARGET,] + * TRACE_TYPE_INSTR_, [TRACE_TYPE_READ | TRACE_TYPE_WRITE]* + * ([] = zero or one, + = one or more, * = zero or more) + * If we enter here, trace_entry_t is some TRACE_TYPE_INSTR_ for which + * last_encoding already contains its encoding. */ if (is_any_instr_type(static_cast(entry.type)) && !last_encoding.empty()) { diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 29ef7cc93f8..835d05cccfa 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -158,23 +158,28 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp * discrepancies. Note that simulators that deal with these filtered traces will * also have to handle the fact that encoding_size != instruction_length. */ - bool ignore_encoding_size_vs_instr_length_check = encoding_filter_enabled; - return new dynamorio::drmemtrace::record_filter_t( - output_dir, std::move(filter_funcs), stop_timestamp, verbose, - ignore_encoding_size_vs_instr_length_check); + return new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), + stop_timestamp, verbose); } record_filter_t::record_filter_t( const std::string &output_dir, std::vector> filters, uint64_t stop_timestamp, - unsigned int verbose, bool ignore_encoding_size_vs_instr_length_check) + unsigned int verbose) : output_dir_(output_dir) , filters_(std::move(filters)) , stop_timestamp_(stop_timestamp) , verbosity_(verbose) - , ignore_encoding_size_vs_instr_length_check_( - ignore_encoding_size_vs_instr_length_check) { + /* Check if encodings2regdeps filter is present. + */ + encodings2regdeps_ = false; + for (auto &filter : filters) { + if (dynamic_cast(filter.get()) != nullptr) { + encodings2regdeps_ = true; + break; + } + } UNUSED(verbosity_); UNUSED(output_prefix_); } @@ -391,8 +396,6 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data, per_shard->input_entry_count = 0; per_shard->output_entry_count = 0; per_shard->tid = shard_stream->get_tid(); - per_shard->memref_counter.set_ignore_encoding_size_vs_instr_length_check( - ignore_encoding_size_vs_instr_length_check_); if (shard_type_ == SHARD_BY_CORE) { per_shard->memref_counter.set_core_sharded(true); } diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index 2d8707cb49b..8a1118ba65c 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -127,8 +127,7 @@ class record_filter_t : public record_analysis_tool_t { // stop_timestamp sets a point beyond which no filtering will occur. record_filter_t(const std::string &output_dir, std::vector> filters, - uint64_t stop_timestamp, unsigned int verbose, - bool ignore_encoding_size_vs_instr_length_check); + uint64_t stop_timestamp, unsigned int verbose); ~record_filter_t() override; bool process_memref(const trace_entry_t &entry) override; @@ -255,11 +254,13 @@ class record_filter_t : public record_analysis_tool_t { inline uint64_t add_to_filetype(uint64_t filetype) { - if (stop_timestamp_ != 0) { + if (stop_timestamp_ != 0) filetype |= OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP; - } - if (shard_type_ == SHARD_BY_CORE) { + if (shard_type_ == SHARD_BY_CORE) filetype |= OFFLINE_FILE_TYPE_CORE_SHARDED; + if (encodings2regdeps_) { + filetype &= ~OFFLINE_FILE_TYPE_ARCH_ALL; + filetype |= OFFLINE_FILE_TYPE_ARCH_REGDEPS; } return filetype; } @@ -268,7 +269,7 @@ class record_filter_t : public record_analysis_tool_t { std::vector> filters_; uint64_t stop_timestamp_; unsigned int verbosity_; - bool ignore_encoding_size_vs_instr_length_check_; + bool encodings2regdeps_; const char *output_prefix_ = "[record_filter]"; // For core-sharded, but used for thread-sharded to simplify the code. std::mutex input2info_mutex_; From 9a8cd021b9c37c9c9d21850df03beb6b704b3d59 Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 29 Apr 2024 15:34:40 -0700 Subject: [PATCH 14/58] Improved comments. Now checking return value after encode. --- clients/drcachesim/tools/filter/encoding_filter.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encoding_filter.h index 9a5fbfdacc7..d90c3a25269 100644 --- a/clients/drcachesim/tools/filter/encoding_filter.h +++ b/clients/drcachesim/tools/filter/encoding_filter.h @@ -129,6 +129,12 @@ class encoding_filter_t : public record_filter_t::record_filter_func_t { encoding_regdeps[REGDEPS_MAX_ENCODING_LENGTH]; app_pc next_pc_regdeps = instr_encode(dcontext_.dcontext, &instr_regdeps, encoding_regdeps); + if (next_pc_regdeps == NULL) { + instr_free(dcontext_.dcontext, &instr); + error_string_ = + "Failed to encode regdeps instruction " + to_hex_string(entry.addr); + return false; + } /* Compute number of trace_entry_t to contain regdeps ISA encoding. * Each trace_entry_t record can contain 8 byte encoding. From f1fdd2c86bda45cb5adc4b7fc080929afb2e2467 Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 29 Apr 2024 16:39:58 -0700 Subject: [PATCH 15/58] Moved is_any_instr_type() to trace_entry.h. --- clients/drcachesim/common/trace_entry.h | 11 +++++++++++ clients/drcachesim/tools/filter/record_filter.cpp | 7 ------- clients/drcachesim/tools/filter/record_filter.h | 5 ----- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 4dad6f12e7f..019178e30fc 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -665,6 +665,17 @@ type_is_instr(const trace_type_t type) type == TRACE_TYPE_INSTR_UNTAKEN_JUMP; } +/** + * Returns whether the type represents an instruction. + * Includes TRACE_TYPE_INSTR_NO_FETCH and TRACE_TYPE_INSTR_MAYBE_FETCH. + */ +static inline bool +is_any_instr_type(const trace_type_t type) +{ + return type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH || + type == TRACE_TYPE_INSTR_NO_FETCH; +} + /** Returns whether the type represents the fetch of a branch instruction. */ static inline bool type_is_instr_branch(const trace_type_t type) diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 835d05cccfa..9d92b6f8150 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -82,13 +82,6 @@ namespace dynamorio { namespace drmemtrace { -bool -is_any_instr_type(trace_type_t type) -{ - return type_is_instr(type) || type == TRACE_TYPE_INSTR_MAYBE_FETCH || - type == TRACE_TYPE_INSTR_NO_FETCH; -} - namespace { template diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index 8a1118ba65c..ea05551a856 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -53,11 +53,6 @@ namespace dynamorio { namespace drmemtrace { -/* Utility function. - */ -bool -is_any_instr_type(trace_type_t type); - /** * Analysis tool that filters the #trace_entry_t records of an offline * trace. Streams through each shard independenty and parallelly, and From 0ef52fa887f83b1c23aeebb69fd292ea46497717 Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 29 Apr 2024 17:21:12 -0700 Subject: [PATCH 16/58] Refactoring: renaming of generic encoding_filter to more precise encodings2regdeps. --- clients/drcachesim/CMakeLists.txt | 2 +- clients/drcachesim/analyzer_multi.cpp | 2 +- clients/drcachesim/common/options.cpp | 7 ++++--- clients/drcachesim/common/options.h | 2 +- .../filter/{encoding_filter.h => encodings2regdeps.h} | 0 clients/drcachesim/tools/filter/record_filter.cpp | 6 +++--- clients/drcachesim/tools/filter/record_filter_create.h | 4 +++- clients/drcachesim/tools/record_filter_launcher.cpp | 9 +++++---- 8 files changed, 18 insertions(+), 14 deletions(-) rename clients/drcachesim/tools/filter/{encoding_filter.h => encodings2regdeps.h} (100%) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index e09f8be0903..84bd4369344 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -196,7 +196,7 @@ add_exported_library(drmemtrace_record_filter STATIC tools/filter/cache_filter.h tools/filter/cache_filter.cpp tools/filter/type_filter.h - tools/filter/encoding_filter.h + tools/filter/encodings2regdeps.h tools/filter/null_filter.h) target_link_libraries(drmemtrace_record_filter drmemtrace_simulator) diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index 2eb0649eb95..045f05bfd6f 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -334,7 +334,7 @@ record_analyzer_multi_t::create_analysis_tool_from_options( op_outdir.get_value(), op_filter_stop_timestamp.get_value(), op_filter_cache_size.get_value(), op_filter_trace_types.get_value(), op_filter_marker_types.get_value(), op_trim_before_timestamp.get_value(), - op_trim_after_timestamp.get_value(), op_encoding_filter_enabled.get_value(), + op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(), op_verbose.get_value()); } ERRMSG("Usage error: unsupported record analyzer type \"%s\". Only " RECORD_FILTER diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index cd170c16061..d64ccb70f00 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -972,10 +972,11 @@ droption_t "Comma-separated integers for marker types to remove. " "See trace_marker_type_t for the list of marker types."); -droption_t op_encoding_filter_enabled( - DROPTION_SCOPE_FRONTEND, "encoding_filter_enabled", false, +droption_t op_encodings2regdeps( + DROPTION_SCOPE_FRONTEND, "encodings2regdeps", false, "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", - "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS."); + "When present, converts the encoding of instructions from a real ISA to the " + "DR_ISA_REGDEPS synthetic ISA."); droption_t op_trim_before_timestamp( DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0, diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 1435491f091..132f974c018 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -214,7 +214,7 @@ extern dynamorio::droption::droption_t op_filter_stop_timestamp; extern dynamorio::droption::droption_t op_filter_cache_size; extern dynamorio::droption::droption_t op_filter_trace_types; extern dynamorio::droption::droption_t op_filter_marker_types; -extern dynamorio::droption::droption_t op_encoding_filter_enabled; +extern dynamorio::droption::droption_t op_encodings2regdeps; extern dynamorio::droption::droption_t op_trim_before_timestamp; extern dynamorio::droption::droption_t op_trim_after_timestamp; extern dynamorio::droption::droption_t op_abort_on_invariant_error; diff --git a/clients/drcachesim/tools/filter/encoding_filter.h b/clients/drcachesim/tools/filter/encodings2regdeps.h similarity index 100% rename from clients/drcachesim/tools/filter/encoding_filter.h rename to clients/drcachesim/tools/filter/encodings2regdeps.h diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 9d92b6f8150..0a20357f9c4 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -60,7 +60,7 @@ #include "cache_filter.h" #include "trim_filter.h" #include "type_filter.h" -#include "encoding_filter.h" +#include "encodings2regdeps.h" #undef VPRINT #ifdef DEBUG @@ -107,7 +107,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp int cache_filter_size, const std::string &remove_trace_types, const std::string &remove_marker_types, uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, - bool encoding_filter_enabled, unsigned int verbose) + bool encodings2regdeps, unsigned int verbose) { std::vector< std::unique_ptr> @@ -137,7 +137,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp new dynamorio::drmemtrace::trim_filter_t(trim_before_timestamp, trim_after_timestamp))); } - if (encoding_filter_enabled) { + if (encodings2regdeps) { filter_funcs.emplace_back( std::unique_ptr( new dynamorio::drmemtrace::encoding_filter_t())); diff --git a/clients/drcachesim/tools/filter/record_filter_create.h b/clients/drcachesim/tools/filter/record_filter_create.h index 5320d0fc8dd..1c38df93570 100644 --- a/clients/drcachesim/tools/filter/record_filter_create.h +++ b/clients/drcachesim/tools/filter/record_filter_create.h @@ -62,13 +62,15 @@ namespace drmemtrace { * up to its first timestamp whose value is greater or equal to this parameter. * @param[in] trim_after_timestamp Trim records after the trace's first timestamp * whose value is greater than this parameter. + * @param[in] encodings2regdeps If true, converts instruction encodings from the real ISA + * of the input trace to the #DR_ISA_REGDEPS synthetic ISA. */ record_analysis_tool_t * record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp, int cache_filter_size, const std::string &remove_trace_types, const std::string &remove_marker_types, uint64_t trim_before_timestamp, uint64_t trim_after_timestamp, - bool encoding_filter_enabled, unsigned int verbose); + bool encodings2regdeps, unsigned int verbose); } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp index a2bf8c3188b..fb877b2270e 100644 --- a/clients/drcachesim/tools/record_filter_launcher.cpp +++ b/clients/drcachesim/tools/record_filter_launcher.cpp @@ -121,10 +121,11 @@ static droption_t op_trim_after_timestamp( "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with " "timestamp larger than the specified value."); -static droption_t op_encoding_filter_enabled( - DROPTION_SCOPE_FRONTEND, "encoding_filter_enabled", false, +droption_t op_encodings2regdeps( + DROPTION_SCOPE_FRONTEND, "encodings2regdeps", false, "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", - "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS."); + "When present, converts the encoding of instructions from a real ISA to the " + "DR_ISA_REGDEPS synthetic ISA."); } // namespace int @@ -154,7 +155,7 @@ _tmain(int argc, const TCHAR *targv[]) op_output_dir.get_value(), op_stop_timestamp.get_value(), op_cache_filter_size.get_value(), op_remove_trace_types.get_value(), op_remove_marker_types.get_value(), op_trim_before_timestamp.get_value(), - op_trim_after_timestamp.get_value(), op_encoding_filter_enabled.get_value(), + op_trim_after_timestamp.get_value(), op_encodings2regdeps.get_value(), op_verbose.get_value())); std::vector tools; tools.push_back(record_filter.get()); From 09c4d15d8c0b952e0ea0390343cc14fae8cabfdf Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 29 Apr 2024 17:26:20 -0700 Subject: [PATCH 17/58] Renaming class encoding_filter_t to encodings2regdeps_t. --- clients/drcachesim/tools/filter/encodings2regdeps.h | 4 ++-- clients/drcachesim/tools/filter/record_filter.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clients/drcachesim/tools/filter/encodings2regdeps.h b/clients/drcachesim/tools/filter/encodings2regdeps.h index d90c3a25269..47c5dfe6462 100644 --- a/clients/drcachesim/tools/filter/encodings2regdeps.h +++ b/clients/drcachesim/tools/filter/encodings2regdeps.h @@ -50,9 +50,9 @@ namespace dynamorio { namespace drmemtrace { -class encoding_filter_t : public record_filter_t::record_filter_func_t { +class encodings2regdeps_t : public record_filter_t::record_filter_func_t { public: - encoding_filter_t() + encodings2regdeps_t() { } diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 0a20357f9c4..8ccfdbc9222 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -140,7 +140,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp if (encodings2regdeps) { filter_funcs.emplace_back( std::unique_ptr( - new dynamorio::drmemtrace::encoding_filter_t())); + new dynamorio::drmemtrace::encodings2regdeps_t())); } // TODO i#5675: Add other filters. @@ -168,7 +168,7 @@ record_filter_t::record_filter_t( */ encodings2regdeps_ = false; for (auto &filter : filters) { - if (dynamic_cast(filter.get()) != nullptr) { + if (dynamic_cast(filter.get()) != nullptr) { encodings2regdeps_ = true; break; } From 96d0925be48654988c9ea285d74e4beba1aa3b27 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 01:43:52 -0700 Subject: [PATCH 18/58] Fixed memory leak. --- clients/drcachesim/tools/filter/encodings2regdeps.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/tools/filter/encodings2regdeps.h b/clients/drcachesim/tools/filter/encodings2regdeps.h index 47c5dfe6462..0047b75bea5 100644 --- a/clients/drcachesim/tools/filter/encodings2regdeps.h +++ b/clients/drcachesim/tools/filter/encodings2regdeps.h @@ -122,6 +122,7 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { instr_t instr_regdeps; instr_init(dcontext_.dcontext, &instr_regdeps); instr_convert_to_isa_regdeps(dcontext_.dcontext, &instr, &instr_regdeps); + instr_free(dcontext_.dcontext, &instr); /* Obtain regdeps ISA instr_t encoding bytes. */ @@ -129,8 +130,8 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { encoding_regdeps[REGDEPS_MAX_ENCODING_LENGTH]; app_pc next_pc_regdeps = instr_encode(dcontext_.dcontext, &instr_regdeps, encoding_regdeps); + instr_free(dcontext_.dcontext, &instr_regdeps); if (next_pc_regdeps == NULL) { - instr_free(dcontext_.dcontext, &instr); error_string_ = "Failed to encode regdeps instruction " + to_hex_string(entry.addr); return false; From 3ebaf3979f272187f28179c9d260f9510e34ebe9 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 01:44:08 -0700 Subject: [PATCH 19/58] Added encodings2regdeps test (note: record_filter is tested on x86_64 only for various reasons). --- .../tests/record_filter_unit_tests.cpp | 139 +++++++++++++++++- 1 file changed, 134 insertions(+), 5 deletions(-) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 0e93b4fc150..443160645b2 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -42,6 +42,7 @@ #include "tools/filter/record_filter.h" #include "tools/filter/trim_filter.h" #include "tools/filter/type_filter.h" +#include "tools/filter/encodings2regdeps.h" #include "trace_entry.h" #include "zipfile_ostream.h" @@ -171,6 +172,17 @@ struct test_case_t { std::vector output; }; +struct test_entries_t { + /* Represents entry in the input trace. + */ + trace_entry_t entry_test; + /* Represents corresponding expected entry in the output trace. + * XXX i#6662: consider making this into a std::vector to test cases + * where DR_ISA_REGDEPS encoding is longer or shorter than entry_test. + */ + trace_entry_t entry_expected; +}; + static bool local_create_dir(const char *dir) { @@ -288,6 +300,123 @@ process_entries_and_check_result(test_record_filter_t *record_filter, return true; } +bool +process_entries_and_check_result_against_ground_truth( + test_record_filter_t *record_filter, const std::vector &entries) +{ + auto stream = std::unique_ptr(new local_stream_t()); + void *shard_data = + record_filter->parallel_shard_init_stream(0, nullptr, stream.get()); + if (!*record_filter) { + fprintf(stderr, "Filtering init failed: %s\n", + record_filter->get_error_string().c_str()); + return false; + } + // Process each trace entry. + for (int i = 0; i < static_cast(entries.size()); ++i) { + // We need to emulate the stream for the tool. + if (entries[i].entry_test.type == TRACE_TYPE_MARKER && + entries[i].entry_test.size == TRACE_MARKER_TYPE_TIMESTAMP) + stream->set_last_timestamp(entries[i].entry_test.addr); + if (!record_filter->parallel_shard_memref(shard_data, entries[i].entry_test)) { + fprintf(stderr, "Filtering failed on entry %d: %s\n", i, + record_filter->parallel_shard_error(shard_data).c_str()); + return false; + } + } + if (!record_filter->parallel_shard_exit(shard_data) || !*record_filter) { + fprintf(stderr, "Filtering exit failed\n"); + return false; + } + if (!record_filter->print_results()) { + fprintf(stderr, "Filtering results failed\n"); + return false; + } + + std::vector filtered = record_filter->get_output_entries(); + // Verbose output for easier debugging. + fprintf(stderr, "Input:\n"); + for (int i = 0; i < static_cast(entries.size()); ++i) { + fprintf(stderr, " %d: ", i); + print_entry(entries[i].entry_test); + fprintf(stderr, "\n"); + } + fprintf(stderr, "Output:\n"); + for (int i = 0; i < static_cast(filtered.size()); ++i) { + fprintf(stderr, " %d: ", i); + print_entry(filtered[i]); + fprintf(stderr, "\n"); + } + // Check filtered output entries. + for (int i = 0; i < static_cast(entries.size()); ++i) { + if (memcmp(&filtered[i], &entries[i].entry_expected, sizeof(trace_entry_t)) != + 0) { + fprintf(stderr, "Wrong filter result for at pos=%d. Expected: ", i); + print_entry(entries[i].entry_expected); + fprintf(stderr, ", got: "); + print_entry(filtered[i]); + fprintf(stderr, "\n"); + return false; + } + } + return true; +} + +static bool +test_encodings2regdeps_filter() +{ + std::vector entries = { + // Trace shard header. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, { TRACE_TYPE_HEADER, 0, { 0x1 } } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } } }, + // File type, also modified by the record_filter encodings2regdeps. + { { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ENCODINGS | + OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } }, + { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_FILETYPE, + { OFFLINE_FILE_TYPE_ARCH_REGDEPS | OFFLINE_FILE_TYPE_ENCODINGS | + OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | + OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } } }, + { { TRACE_TYPE_THREAD, 0, { 0x4 } }, { TRACE_TYPE_THREAD, 0, { 0x4 } } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, { TRACE_TYPE_PID, 0, { 0x5 } } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } } }, + // Unit header. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, + { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } } }, + { { TRACE_TYPE_ENCODING, 4, { 0xe78948 } }, + { TRACE_TYPE_ENCODING, 8, { 0x0006090600010011 } } }, + { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, + { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } } }, + // Trace shard footer. + { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, { TRACE_TYPE_FOOTER, 0, { 0x0 } } }, + }; + + // Construct record_filter_t. + std::vector> filters; + auto encodings2regdeps_filter = std::unique_ptr( + new dynamorio::drmemtrace::encodings2regdeps_t()); + if (encodings2regdeps_filter->get_error_string() != "") { + fprintf(stderr, "Couldn't construct a encodings2regdeps_filter %s", + encodings2regdeps_filter->get_error_string().c_str()); + return false; + } + filters.push_back(std::move(encodings2regdeps_filter)); + auto record_filter = std::unique_ptr( + new test_record_filter_t(std::move(filters), 0)); + if (!process_entries_and_check_result_against_ground_truth(record_filter.get(), + entries)) + return false; + + fprintf(stderr, "test_encodings2regdeps_filter passed\n"); + return true; +} + static bool test_cache_and_type_filter() { @@ -1022,10 +1151,10 @@ test_null_filter() std::unique_ptr(new dynamorio::drmemtrace::null_filter_t()); std::vector> filter_funcs; filter_funcs.push_back(std::move(null_filter)); - // We use a very small stop_timestamp for the record filter. This is to verify that - // we emit the TRACE_MARKER_TYPE_FILTER_ENDPOINT marker for each thread even if it - // starts after the given stop_timestamp. Since the stop_timestamp is so small, all - // other entries are expected to stay. + // We use a very small stop_timestamp for the record filter. This is to verify + // that we emit the TRACE_MARKER_TYPE_FILTER_ENDPOINT marker for each thread even + // if it starts after the given stop_timestamp. Since the stop_timestamp is so + // small, all other entries are expected to stay. static constexpr uint64_t stop_timestamp_us = 1; auto record_filter = std::unique_ptr( new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), @@ -1107,7 +1236,7 @@ test_main(int argc, const char *argv[]) droption_parser_t::usage_short(DROPTION_SCOPE_ALL).c_str()); } if (!test_cache_and_type_filter() || !test_chunk_update() || !test_trim_filter() || - !test_null_filter() || !test_wait_filter()) + !test_null_filter() || !test_wait_filter() || !test_encodings2regdeps_filter()) return 1; fprintf(stderr, "All done!\n"); return 0; From bfbe0228c742c3dea4eccfd83d3c8d4728ef7547 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 04:22:03 -0700 Subject: [PATCH 20/58] Fixed opcode_mix analyzer to work with DR_ISA_REGDEPS traces. --- clients/drcachesim/tools/opcode_mix.cpp | 37 +++++++++++++++++++------ 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 995c810a578..5231e40e50f 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -54,6 +54,7 @@ #include "analysis_tool.h" #include "dr_api.h" +#include "dr_ir_encode.h" #include "memref.h" #include "raw2trace.h" #include "raw2trace_directory.h" @@ -165,6 +166,8 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) " but tool built for " + trace_arch_string(build_target_arch_type()); return false; } + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, memref.marker.marker_value)) + dr_set_isa_mode(dcontext_.dcontext, DR_ISA_REGDEPS, nullptr); } else if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { #ifdef AARCH64 @@ -232,12 +235,19 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) instr_init(dcontext_.dcontext, &instr); app_pc next_pc = decode_from_copy(dcontext_.dcontext, decode_pc, trace_pc, &instr); - if (next_pc == NULL || !instr_valid(&instr)) { + if (next_pc == NULL) { instr_free(dcontext_.dcontext, &instr); shard->error = "Failed to decode instruction " + to_hex_string(memref.instr.addr); return false; } + if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, shard->filetype) && + !instr_valid(&instr)) { + instr_free(dcontext_.dcontext, &instr); + shard->error = "Failed to decode instruction (invalid) " + + to_hex_string(memref.instr.addr); + return false; + } opcode = instr_get_opcode(&instr); category = instr_get_category(&instr); shard->worker->opcode_data_cache[trace_pc] = opcode_data_t(opcode, category); @@ -316,6 +326,7 @@ opcode_mix_t::print_results() for (const auto &keyvals : shard.second->category_counts) { total.category_counts[keyvals.first] += keyvals.second; } + total.filetype = shard.second->filetype; } } std::cerr << TOOL_NAME << " results:\n"; @@ -323,9 +334,19 @@ opcode_mix_t::print_results() std::vector> sorted(total.opcode_counts.begin(), total.opcode_counts.end()); std::sort(sorted.begin(), sorted.end(), cmp_val); - for (const auto &keyvals : sorted) { - std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) - << decode_opcode_name(keyvals.first) << "\n"; + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, total.filetype)) { + /* There should be only one opcode: OP_INVALID. + */ + if (sorted.size() != 1) + return false; + + std::cerr << std::setw(15) << sorted[0].second << " : " << std::setw(9) + << "invalid" << "\n"; + } else { + for (const auto &keyvals : sorted) { + std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) + << decode_opcode_name(keyvals.first) << "\n"; + } } std::cerr << "\n"; std::cerr << std::setw(15) << total.category_counts.size() @@ -412,16 +433,16 @@ opcode_mix_t::print_interval_results( const auto *snap = reinterpret_cast(base_snap); std::cerr << "ID:" << snap->get_interval_id() << " ending at instruction " << snap->get_instr_count_cumulative() << " has " - << snap->opcode_counts_.size() << " opcodes" - << " and " << snap->category_counts_.size() << " categories.\n"; + << snap->opcode_counts_.size() << " opcodes" << " and " + << snap->category_counts_.size() << " categories.\n"; std::vector> sorted(snap->opcode_counts_.begin(), snap->opcode_counts_.end()); std::sort(sorted.begin(), sorted.end(), cmp_val); for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted.size()); ++i) { std::cerr << " [" << i + 1 << "]" << " Opcode: " << decode_opcode_name(sorted[i].first) << " (" - << sorted[i].first << ")" - << " Count=" << sorted[i].second << " PKI=" + << sorted[i].first << ")" << " Count=" << sorted[i].second + << " PKI=" << sorted[i].second * 1000.0 / snap->get_instr_count_delta() << "\n"; } From 6950c7b7a1171a7898572d8c140138d0ba2305ff Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 04:22:42 -0700 Subject: [PATCH 21/58] Removed unnecessary space. --- clients/drcachesim/reader/reader.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index 5639a6dad94..a2eb017ec2c 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -210,7 +210,6 @@ reader_t::process_input_entry() ++cur_instr_count_; // Look for encoding bits that belong to this instr. if (last_encoding_.size > 0) { - if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_) && (last_encoding_.size != cur_ref_.instr.size)) { ERRMSG( From 241819c7ee5015b732c458120be5d1687d055675 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 04:36:02 -0700 Subject: [PATCH 22/58] Indentation fix. --- clients/drcachesim/CMakeLists.txt | 2 +- suite/tests/CMakeLists.txt | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 84bd4369344..2d3d26d53e0 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -394,7 +394,7 @@ add_executable(record_filter_launcher tools/record_filter_launcher.cpp tests/test_helpers.cpp) target_link_libraries(record_filter_launcher drmemtrace_analyzer drmemtrace_record_filter -drmemtrace_raw2trace drcovlib_static drfrontendlib ${static_libc} ${zlib_libs}) + drmemtrace_raw2trace drcovlib_static drfrontendlib ${static_libc} ${zlib_libs}) add_dependencies(record_filter_launcher api_headers) append_property_list(TARGET record_filter_launcher COMPILE_DEFINITIONS "NO_HELPER_MAIN") use_DynamoRIO_extension(record_filter_launcher droption) diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 77723d43877..b761e9d4879 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4674,6 +4674,17 @@ if (BUILD_CLIENTS) # it checks encodings). "opcode_mix") + if (X86 AND X64 AND ZLIB_FOUND) + set(testname "tool.record_filter_encodings2regdeps") + torun_record_filter("${testname}" ${ci_shared_app} + "record_filter_encodings2regdeps" + # We assume the app name starts with "s" here to avoid colliding with + # our output dir, while still letting the single precmd remove both. + "${drcachesim_path}@-simulator_type@record_filter@-encodings2regdeps@-indir@${testname}.s*.dir/trace@-core_sharded@-cores@4@-outdir@${testname}.filtered.dir" + # We run the opcode_mix analyzer to test econdings2regdeps filtered traces. + "opcode_mix") + endif () + if (UNIX) # Windows multi-thread tests are too slow. set(testname "tool.record_filter_bycore_multi") torun_record_filter("${testname}" pthreads.ptsig From 33e3bf02e51a7ae235682300ce0c67c663edbdf1 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 04:41:25 -0700 Subject: [PATCH 23/58] Formatting fixed. --- clients/drcachesim/tools/opcode_mix.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 5231e40e50f..21298db53c5 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -433,16 +433,16 @@ opcode_mix_t::print_interval_results( const auto *snap = reinterpret_cast(base_snap); std::cerr << "ID:" << snap->get_interval_id() << " ending at instruction " << snap->get_instr_count_cumulative() << " has " - << snap->opcode_counts_.size() << " opcodes" << " and " - << snap->category_counts_.size() << " categories.\n"; + << snap->opcode_counts_.size() << " opcodes" + << " and " << snap->category_counts_.size() << " categories.\n"; std::vector> sorted(snap->opcode_counts_.begin(), snap->opcode_counts_.end()); std::sort(sorted.begin(), sorted.end(), cmp_val); for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted.size()); ++i) { std::cerr << " [" << i + 1 << "]" << " Opcode: " << decode_opcode_name(sorted[i].first) << " (" - << sorted[i].first << ")" << " Count=" << sorted[i].second - << " PKI=" + << sorted[i].first << ")" + << " Count=" << sorted[i].second << " PKI=" << sorted[i].second * 1000.0 / snap->get_instr_count_delta() << "\n"; } From b61c5b7c1457b009d092c835ae5636329cd89e9e Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 04:49:01 -0700 Subject: [PATCH 24/58] Code cleanup. --- clients/drcachesim/tracer/raw2trace_shared.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clients/drcachesim/tracer/raw2trace_shared.h b/clients/drcachesim/tracer/raw2trace_shared.h index 0ef467508cb..1fa67a1e3f1 100644 --- a/clients/drcachesim/tracer/raw2trace_shared.h +++ b/clients/drcachesim/tracer/raw2trace_shared.h @@ -148,11 +148,6 @@ class memref_counter_t : public reader_t { { core_sharded_ = core_sharded; } - void - set_ignore_encoding_size_vs_instr_length_check(bool ignore) - { - ignore_encoding_size_vs_instr_length_check_ = ignore; - } private: bool saw_pid_ = false; From 05abd7ec99152ab4f4a50fe3b9da0ba6d4699111 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 04:55:43 -0700 Subject: [PATCH 25/58] clang-format pass. --- clients/drcachesim/tools/opcode_mix.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 21298db53c5..02ea5151660 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -341,7 +341,8 @@ opcode_mix_t::print_results() return false; std::cerr << std::setw(15) << sorted[0].second << " : " << std::setw(9) - << "invalid" << "\n"; + << "invalid" + << "\n"; } else { for (const auto &keyvals : sorted) { std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) From 454aac22b36beacd6f078c8cadad4821b677c664 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 04:55:58 -0700 Subject: [PATCH 26/58] Templatex for new encodings2regdeps test. To fix. --- .../tests/record_filter_encodings2regdeps.templatex | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 clients/drcachesim/tests/record_filter_encodings2regdeps.templatex diff --git a/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex new file mode 100644 index 00000000000..e9e8f316122 --- /dev/null +++ b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex @@ -0,0 +1,10 @@ +Output .* entries from .* entries. +Schedule stats tool results: +.* +Core #0 schedule: .* +Core #1 schedule: .* +Core #2 schedule: .* +Core #3 schedule: .* +Core #4 schedule: .* +Core #5 schedule: .* +Core #6 schedule: .* From 84fa4803bc607dee62462d0b6427f056e344e7b8 Mon Sep 17 00:00:00 2001 From: edeiana Date: Tue, 30 Apr 2024 05:10:16 -0700 Subject: [PATCH 27/58] Improved comments. Removed unnecessary class attribute in record_filter. --- clients/drcachesim/reader/reader.h | 1 - clients/drcachesim/tools/filter/record_filter.cpp | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/clients/drcachesim/reader/reader.h b/clients/drcachesim/reader/reader.h index 81b289ff3c2..a10f614d848 100644 --- a/clients/drcachesim/reader/reader.h +++ b/clients/drcachesim/reader/reader.h @@ -276,7 +276,6 @@ class reader_t : public std::iterator, // some thread-based checks may not apply. bool core_sharded_ = false; bool found_filetype_ = false; - bool ignore_encoding_size_vs_instr_length_check_ = false; private: memref_t cur_ref_; diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 8ccfdbc9222..4f45eec9c7f 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -138,6 +138,14 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp trim_after_timestamp))); } if (encodings2regdeps) { + /* If we are changing the encoding of trace_entry_t with encodings2regdeps, we + * will be generating discrepancies between encoding size and instruction length. + * So, we need to tell reader_t, which here comes in the form of memref_counter_t, + * to ignore such discrepancies. We do so by adding OFFLINE_FILE_TYPE_ARCH_REGDEPS + * to the file type of the filtered trace. Note that simulators that deal with + * these filtered traces will also have to handle the fact that + * encoding_size != instruction_length. + */ filter_funcs.emplace_back( std::unique_ptr( new dynamorio::drmemtrace::encodings2regdeps_t())); @@ -145,12 +153,6 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp // TODO i#5675: Add other filters. - /* If we are changing the encoding of trace_entry_t, we will be generating - * discrepancies between encoding size and instruction length. So, we need to tell - * reader_t, which here comes in the form of memref_counter_t, to ignore such - * discrepancies. Note that simulators that deal with these filtered traces will - * also have to handle the fact that encoding_size != instruction_length. - */ return new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), stop_timestamp, verbose); } From 4c7c201150fd544fe6ca13efaba297ff548cd1b6 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 03:08:19 -0700 Subject: [PATCH 28/58] Added OP_UNDECODED and interface between record_filter and its filters. --- .../record_filter_encodings2regdeps.templatex | 17 +++---- .../tests/record_filter_unit_tests.cpp | 5 +- .../drcachesim/tools/filter/cache_filter.cpp | 5 +- .../drcachesim/tools/filter/cache_filter.h | 5 +- .../tools/filter/encodings2regdeps.h | 26 +++++++---- clients/drcachesim/tools/filter/null_filter.h | 5 +- .../drcachesim/tools/filter/record_filter.cpp | 46 ++++++++++--------- .../drcachesim/tools/filter/record_filter.h | 31 +++++++++++-- clients/drcachesim/tools/filter/trim_filter.h | 5 +- clients/drcachesim/tools/filter/type_filter.h | 5 +- clients/drcachesim/tools/opcode_mix.cpp | 29 +++--------- core/ir/arm/table_encode.c | 4 +- core/ir/instr_shared.c | 7 +++ core/ir/isa_regdeps/decode.c | 6 +++ core/ir/x86/decode_table.c | 2 +- 15 files changed, 120 insertions(+), 78 deletions(-) diff --git a/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex index e9e8f316122..a9271341d91 100644 --- a/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex +++ b/clients/drcachesim/tests/record_filter_encodings2regdeps.templatex @@ -1,10 +1,11 @@ +Hello, world! + +Trace invariant checks passed + Output .* entries from .* entries. -Schedule stats tool results: + +Opcode mix tool results: + + *[0-9]* : total executed instructions + *[0-9]* : .* -Core #0 schedule: .* -Core #1 schedule: .* -Core #2 schedule: .* -Core #3 schedule: .* -Core #4 schedule: .* -Core #5 schedule: .* -Core #6 schedule: .* diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 443160645b2..e14a94c5ad9 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -671,8 +671,9 @@ test_chunk_update() return nullptr; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { bool res = true; if (type_is_instr(static_cast(entry.type))) { diff --git a/clients/drcachesim/tools/filter/cache_filter.cpp b/clients/drcachesim/tools/filter/cache_filter.cpp index a5b0bdad9fa..470bc3b958f 100644 --- a/clients/drcachesim/tools/filter/cache_filter.cpp +++ b/clients/drcachesim/tools/filter/cache_filter.cpp @@ -87,8 +87,9 @@ cache_filter_t::parallel_shard_init(memtrace_stream_t *shard_stream, return per_shard; } bool -cache_filter_t::parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) +cache_filter_t::parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) { if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) { if (filter_instrs_) diff --git a/clients/drcachesim/tools/filter/cache_filter.h b/clients/drcachesim/tools/filter/cache_filter.h index 9eab85c993e..d94152b6c19 100644 --- a/clients/drcachesim/tools/filter/cache_filter.h +++ b/clients/drcachesim/tools/filter/cache_filter.h @@ -55,8 +55,9 @@ class cache_filter_t : public record_filter_t::record_filter_func_t { parallel_shard_init(memtrace_stream_t *shard_stream, bool partial_trace_filter) override; bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) override; + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override; bool parallel_shard_exit(void *shard_data) override; diff --git a/clients/drcachesim/tools/filter/encodings2regdeps.h b/clients/drcachesim/tools/filter/encodings2regdeps.h index 0047b75bea5..46d84e2537b 100644 --- a/clients/drcachesim/tools/filter/encodings2regdeps.h +++ b/clients/drcachesim/tools/filter/encodings2regdeps.h @@ -65,9 +65,12 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { + std::vector *last_encoding = record_filter_info.last_encoding; + /* Modify file_type to regdeps ISA, removing the real ISA of the input trace. */ trace_type_t entry_type = static_cast(entry.type); @@ -76,8 +79,7 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { static_cast(entry.size); if (marker_type == TRACE_MARKER_TYPE_FILETYPE) { uint64_t marker_value = static_cast(entry.addr); - marker_value &= ~OFFLINE_FILE_TYPE_ARCH_ALL; - marker_value |= OFFLINE_FILE_TYPE_ARCH_REGDEPS; + marker_value = add_to_filetype(marker_value); entry.addr = static_cast(marker_value); } } @@ -91,7 +93,7 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { * last_encoding already contains its encoding. */ if (is_any_instr_type(static_cast(entry.type)) && - !last_encoding.empty()) { + !last_encoding->empty()) { /* Gather real ISA encoding bytes looping through all previously saved * encoding bytes in last_encoding. */ @@ -99,7 +101,7 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { byte encoding[MAX_ENCODING_LENGTH]; memset(encoding, 0, sizeof(encoding)); uint encoding_offset = 0; - for (auto &trace_encoding : last_encoding) { + for (auto &trace_encoding : *last_encoding) { memcpy(encoding + encoding_offset, trace_encoding.encoding, trace_encoding.size); encoding_offset += trace_encoding.size; @@ -145,13 +147,13 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { uint num_regdeps_encoding_entries = ALIGN_FORWARD(regdeps_encoding_size, trace_entry_encoding_size) / trace_entry_encoding_size; - last_encoding.resize(num_regdeps_encoding_entries); + last_encoding->resize(num_regdeps_encoding_entries); /* Copy regdeps ISA encoding, splitting it among the last_encoding * trace_entry_t records. */ uint regdeps_encoding_offset = 0; - for (trace_entry_t &encoding_entry : last_encoding) { + for (trace_entry_t &encoding_entry : *last_encoding) { encoding_entry.type = TRACE_TYPE_ENCODING; uint size = regdeps_encoding_size < trace_entry_encoding_size ? regdeps_encoding_size @@ -173,6 +175,14 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { return true; } + uint64_t + add_to_filetype(uint64_t filetype) override + { + filetype &= ~OFFLINE_FILE_TYPE_ARCH_ALL; + filetype |= OFFLINE_FILE_TYPE_ARCH_REGDEPS; + return filetype; + } + private: struct dcontext_cleanup_last_t { public: diff --git a/clients/drcachesim/tools/filter/null_filter.h b/clients/drcachesim/tools/filter/null_filter.h index 2c89ab57eb3..9ebf5377bd4 100644 --- a/clients/drcachesim/tools/filter/null_filter.h +++ b/clients/drcachesim/tools/filter/null_filter.h @@ -47,8 +47,9 @@ class null_filter_t : public record_filter_t::record_filter_func_t { return nullptr; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { return true; } diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 4f45eec9c7f..f95a3e62cd8 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -166,15 +166,6 @@ record_filter_t::record_filter_t( , stop_timestamp_(stop_timestamp) , verbosity_(verbose) { - /* Check if encodings2regdeps filter is present. - */ - encodings2regdeps_ = false; - for (auto &filter : filters) { - if (dynamic_cast(filter.get()) != nullptr) { - encodings2regdeps_ = true; - break; - } - } UNUSED(verbosity_); UNUSED(output_prefix_); } @@ -403,6 +394,7 @@ record_filter_t::parallel_shard_init_stream(int shard_index, void *worker_data, success_ = false; } } + per_shard->record_filter_info.last_encoding = &per_shard->last_encoding; std::lock_guard guard(shard_map_mutex_); shard_map_[shard_index] = per_shard; return reinterpret_cast(per_shard); @@ -605,7 +597,10 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t & // XXX: What if there is a filter removing all encodings but only // to the stop point, so a partial remove that does not change // the filetype? For now we do not support that, and we re-add - // encodings at chunk boundaries regardless. + // encodings at chunk boundaries regardless. Note that filters that modify + // encodings (even if they add or remove trace_entry_t records) do not incurr in + // this problem and we don't need support for partial removal of encodings in this + // case. An example of such filters is encodings2regdeps_t. if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype) && per_shard->cur_chunk_pcs.find(entry.addr) == per_shard->cur_chunk_pcs.end()) { if (per_shard->per_input == nullptr) @@ -622,14 +617,19 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t & per_shard->chunk_ordinal, per_shard->cur_refs); // Sanity check that the encoding size is correct. const auto &enc = per_shard->per_input->pc2encoding[entry.addr]; - size_t enc_sz = 0; - // Since all but the last entry are fixed-size we could avoid a loop - // but the loop is easier to read and we have just 1 or 2 iters. - for (const auto &record : enc) - enc_sz += record.size; - if (enc_sz != entry.size) { - return "New-chunk encoding size " + std::to_string(enc_sz) + - " != instr size " + std::to_string(entry.size); + /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with size != ifetch. + * It's a design choice, not an error, hence we avoid this sanity check. + */ + if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, per_shard->filetype)) { + size_t enc_sz = 0; + // Since all but the last entry are fixed-size we could avoid a loop + // but the loop is easier to read and we have just 1 or 2 iters. + for (const auto &record : enc) + enc_sz += record.size; + if (enc_sz != entry.size) { + return "New-chunk encoding size " + std::to_string(enc_sz) + + " != instr size " + std::to_string(entry.size); + } } if (!write_trace_entries(per_shard, enc)) { return "Failed to write"; @@ -658,7 +658,10 @@ record_filter_t::process_delayed_encodings(per_shard_t *per_shard, trace_entry_t } else if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype)) { // Output if we have encodings that haven't yet been output, and // there is no filter removing all encodings (we don't support - // partial encoding removal). + // partial encoding removal). Note that filters that modify encodings (even if + // they add or remove trace_entry_t records) do not incurr in this problem and we + // don't need support for partial removal of encodings in this case. An example + // of such filters is encodings2regdeps_t. // We check prev_was_output to rule out filtered-out encodings // (we record all encodings for new-chunk insertion). if (!per_shard->last_encoding.empty() && per_shard->prev_was_output) { @@ -755,8 +758,9 @@ record_filter_t::parallel_shard_memref(void *shard_data, const trace_entry_t &in } if (per_shard->enabled) { for (int i = 0; i < static_cast(filters_.size()); ++i) { - if (!filters_[i]->parallel_shard_filter( - entry, per_shard->filter_shard_data[i], per_shard->last_encoding)) { + if (!filters_[i]->parallel_shard_filter(entry, + per_shard->filter_shard_data[i], + per_shard->record_filter_info)) { output = false; } if (!filters_[i]->get_error_string().empty()) { diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index ea05551a856..f61492ed89b 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -61,6 +61,13 @@ namespace drmemtrace { */ class record_filter_t : public record_analysis_tool_t { public: + /** + * Interface for the record_filter to share data with its filters. + */ + struct record_filter_info_t { + std::vector *last_encoding; + }; + /** * The base class for a single filter. */ @@ -94,10 +101,12 @@ class record_filter_t : public record_analysis_tool_t { * the trace if other filter tools are present, and may include changes * made by other tools. * An error is indicated by setting error_string_ to a non-empty value. + * \p record_filter_info is the interface used by record_filter to + * share data with its filters. */ virtual bool parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) = 0; + record_filter_info_t &record_filter_info) = 0; /** * Invoked when all #trace_entry_t in a shard have been processed * by parallel_shard_filter(). \p shard_data is same as what was @@ -115,6 +124,17 @@ class record_filter_t : public record_analysis_tool_t { return error_string_; } + /** + * If a filter modifies the file type of a trace, its changes should be made here, + * so they are visible to the record_filter even if the #trace_entry_t containing + * the file type marker is not modified. + */ + virtual uint64_t + add_to_filetype(uint64_t filetype) + { + return filetype; + } + protected: std::string error_string_; }; @@ -189,6 +209,7 @@ class record_filter_t : public record_analysis_tool_t { trace_entry_t last_written_record; // Cached value updated on context switches. per_input_t *per_input = nullptr; + record_filter_info_t record_filter_info; }; virtual std::string @@ -253,9 +274,10 @@ class record_filter_t : public record_analysis_tool_t { filetype |= OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP; if (shard_type_ == SHARD_BY_CORE) filetype |= OFFLINE_FILE_TYPE_CORE_SHARDED; - if (encodings2regdeps_) { - filetype &= ~OFFLINE_FILE_TYPE_ARCH_ALL; - filetype |= OFFLINE_FILE_TYPE_ARCH_REGDEPS; + /* If filters modify the file type, add their changes here. + */ + for (auto &filter : filters_) { + filetype = filter->add_to_filetype(filetype); } return filetype; } @@ -264,7 +286,6 @@ class record_filter_t : public record_analysis_tool_t { std::vector> filters_; uint64_t stop_timestamp_; unsigned int verbosity_; - bool encodings2regdeps_; const char *output_prefix_ = "[record_filter]"; // For core-sharded, but used for thread-sharded to simplify the code. std::mutex input2info_mutex_; diff --git a/clients/drcachesim/tools/filter/trim_filter.h b/clients/drcachesim/tools/filter/trim_filter.h index a33802019ed..0a7c464347b 100644 --- a/clients/drcachesim/tools/filter/trim_filter.h +++ b/clients/drcachesim/tools/filter/trim_filter.h @@ -69,8 +69,9 @@ class trim_filter_t : public record_filter_t::record_filter_func_t { return per_shard; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { per_shard_t *per_shard = reinterpret_cast(shard_data); if (entry.type == TRACE_TYPE_MARKER && diff --git a/clients/drcachesim/tools/filter/type_filter.h b/clients/drcachesim/tools/filter/type_filter.h index 9c33237ef9b..48d6aaef1db 100644 --- a/clients/drcachesim/tools/filter/type_filter.h +++ b/clients/drcachesim/tools/filter/type_filter.h @@ -86,8 +86,9 @@ class type_filter_t : public record_filter_t::record_filter_func_t { return per_shard; } bool - parallel_shard_filter(trace_entry_t &entry, void *shard_data, - std::vector &last_encoding) override + parallel_shard_filter( + trace_entry_t &entry, void *shard_data, + record_filter_t::record_filter_info_t &record_filter_info) override { per_shard_t *per_shard = reinterpret_cast(shard_data); if (entry.type == TRACE_TYPE_MARKER && entry.size == TRACE_MARKER_TYPE_FILETYPE) { diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 02ea5151660..3465c3ef382 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -166,6 +166,9 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) " but tool built for " + trace_arch_string(build_target_arch_type()); return false; } + /* If we are dealing with a regdeps trace, we need to set the dcontext ISA mode + * to the correct synthetic ISA (i.e., DR_ISA_REGDEPS). + */ if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, memref.marker.marker_value)) dr_set_isa_mode(dcontext_.dcontext, DR_ISA_REGDEPS, nullptr); } else if (memref.marker.type == TRACE_TYPE_MARKER && @@ -235,19 +238,12 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) instr_init(dcontext_.dcontext, &instr); app_pc next_pc = decode_from_copy(dcontext_.dcontext, decode_pc, trace_pc, &instr); - if (next_pc == NULL) { + if (next_pc == NULL || !instr_valid(&instr)) { instr_free(dcontext_.dcontext, &instr); shard->error = "Failed to decode instruction " + to_hex_string(memref.instr.addr); return false; } - if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, shard->filetype) && - !instr_valid(&instr)) { - instr_free(dcontext_.dcontext, &instr); - shard->error = "Failed to decode instruction (invalid) " + - to_hex_string(memref.instr.addr); - return false; - } opcode = instr_get_opcode(&instr); category = instr_get_category(&instr); shard->worker->opcode_data_cache[trace_pc] = opcode_data_t(opcode, category); @@ -334,20 +330,9 @@ opcode_mix_t::print_results() std::vector> sorted(total.opcode_counts.begin(), total.opcode_counts.end()); std::sort(sorted.begin(), sorted.end(), cmp_val); - if (TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, total.filetype)) { - /* There should be only one opcode: OP_INVALID. - */ - if (sorted.size() != 1) - return false; - - std::cerr << std::setw(15) << sorted[0].second << " : " << std::setw(9) - << "invalid" - << "\n"; - } else { - for (const auto &keyvals : sorted) { - std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) - << decode_opcode_name(keyvals.first) << "\n"; - } + for (const auto &keyvals : sorted) { + std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) + << decode_opcode_name(keyvals.first) << "\n"; } std::cerr << "\n"; std::cerr << std::setw(15) << total.category_counts.size() diff --git a/core/ir/arm/table_encode.c b/core/ir/arm/table_encode.c index 6a95c899ae1..7d78f5c167d 100644 --- a/core/ir/arm/table_encode.c +++ b/core/ir/arm/table_encode.c @@ -51,7 +51,9 @@ const op_to_instr_info_t op_instr[] = { /* Format is: {A32, T32, T32.it} */ /* OP_INVALID */ {NULL, NULL, NULL}, - /* OP_UNDECODED */ {NULL, NULL, NULL}, + /* OP_UNDECODED */ {(instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, + (instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, + (instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}}, /* OP_CONTD */ {NULL, NULL, NULL}, /* OP_LABEL */ {NULL, NULL, NULL}, diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index fc88d7897b3..794307b94fd 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -49,6 +49,7 @@ * this macro magic is unnecessary. * http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx */ +#include "opcode_api.h" #define INSTR_INLINE extern inline #include "../globals.h" @@ -3142,6 +3143,12 @@ instr_convert_to_isa_regdeps(void *drcontext, instr_t *instr_real_isa, */ instr_set_operands_valid(instr_regdeps_isa, true); + /* Set opcode as OP_UNDECODED, so routines like instr_valid() can still work. + * We can't use instr_set_opcode() because of its CLIENT_ASSERT when setting the + * opcode to OP_UNDECODED or OP_INVALID. + */ + instr_regdeps_isa->opcode = OP_UNDECODED; + /* Set converted instruction ISA mode to be DR_ISA_REGDEPS. */ instr_set_isa_mode(instr_regdeps_isa, DR_ISA_REGDEPS); diff --git a/core/ir/isa_regdeps/decode.c b/core/ir/isa_regdeps/decode.c index 2ac0a8a7f7f..e5afec31b33 100644 --- a/core/ir/isa_regdeps/decode.c +++ b/core/ir/isa_regdeps/decode.c @@ -144,6 +144,12 @@ decode_isa_regdeps(dcontext_t *dcontext, byte *encoded_instr, instr_t *instr) */ instr_set_operands_valid(instr, true); + /* Set opcode as OP_UNDECODED, so routines like instr_valid() can still work. + * We can't use instr_set_opcode() because of its CLIENT_ASSERT when setting the + * opcode to OP_UNDECODED or OP_INVALID. + */ + instr->opcode = OP_UNDECODED; + /* Set decoded instruction ISA mode to be synthetic. */ instr_set_isa_mode(instr, DR_ISA_REGDEPS); diff --git a/core/ir/x86/decode_table.c b/core/ir/x86/decode_table.c index a9bc1393d54..36b26e0c5ce 100644 --- a/core/ir/x86/decode_table.c +++ b/core/ir/x86/decode_table.c @@ -75,7 +75,7 @@ const instr_info_t * const op_instr[] = { /* OP_INVALID */ NULL, - /* OP_UNDECODED */ NULL, + /* OP_UNDECODED */ (instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, /* OP_CONTD */ NULL, /* OP_LABEL */ NULL, From 599fb047f8a94be22a1f1ea2fa3937d044375db7 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 04:01:51 -0700 Subject: [PATCH 29/58] Addressed minor PR feedback. --- clients/drcachesim/CMakeLists.txt | 3 +-- clients/drcachesim/common/options.cpp | 10 +++++--- clients/drcachesim/common/trace_entry.h | 16 ++++++++----- clients/drcachesim/reader/reader.cpp | 4 ++++ .../tests/record_filter_unit_tests.cpp | 8 +++---- .../tools/filter/encodings2regdeps.h | 23 ++++++++++++------- .../drcachesim/tools/filter/record_filter.cpp | 8 ------- .../tools/record_filter_launcher.cpp | 10 +++++--- 8 files changed, 48 insertions(+), 34 deletions(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 2d3d26d53e0..5caf7413f03 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -393,8 +393,7 @@ add_dependencies(prefetch_analyzer_launcher api_headers) add_executable(record_filter_launcher tools/record_filter_launcher.cpp tests/test_helpers.cpp) -target_link_libraries(record_filter_launcher drmemtrace_analyzer drmemtrace_record_filter - drmemtrace_raw2trace drcovlib_static drfrontendlib ${static_libc} ${zlib_libs}) +target_link_libraries(record_filter_launcher drmemtrace_analyzer drmemtrace_record_filter) add_dependencies(record_filter_launcher api_headers) append_property_list(TARGET record_filter_launcher COMPILE_DEFINITIONS "NO_HELPER_MAIN") use_DynamoRIO_extension(record_filter_launcher droption) diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index d64ccb70f00..28c01b960d3 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -972,11 +972,15 @@ droption_t "Comma-separated integers for marker types to remove. " "See trace_marker_type_t for the list of marker types."); +/* XXX i#6369: we should partition our options by tool. This one should belong to the + * record_filter partition. For now we add the filter_ prefix to options that should be + * used in conjunction with record_filter. + */ droption_t op_encodings2regdeps( - DROPTION_SCOPE_FRONTEND, "encodings2regdeps", false, + DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false, "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", - "When present, converts the encoding of instructions from a real ISA to the " - "DR_ISA_REGDEPS synthetic ISA."); + "This option is intended to be used with record_filter. When present, it converts " + "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA."); droption_t op_trim_before_timestamp( DROPTION_SCOPE_ALL, "trim_before_timestamp", 0, 0, diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 019178e30fc..b7f423bc13c 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -666,8 +666,9 @@ type_is_instr(const trace_type_t type) } /** - * Returns whether the type represents an instruction. - * Includes TRACE_TYPE_INSTR_NO_FETCH and TRACE_TYPE_INSTR_MAYBE_FETCH. + * Returns whether \p type represents any type of instruction record whether an + * instruction fetch or operation hint. This is a superset of type_is_instr() and includes + * TRACE_TYPE_INSTR_NO_FETCH. */ static inline bool is_any_instr_type(const trace_type_t type) @@ -900,9 +901,6 @@ typedef enum { OFFLINE_FILE_TYPE_ARCH_ARM32 = 0x10, /**< Recorded on ARM (32-bit). */ OFFLINE_FILE_TYPE_ARCH_X86_32 = 0x20, /**< Recorded on x86 (32-bit). */ OFFLINE_FILE_TYPE_ARCH_X86_64 = 0x40, /**< Recorded on x86 (64-bit). */ - OFFLINE_FILE_TYPE_ARCH_ALL = OFFLINE_FILE_TYPE_ARCH_AARCH64 | - OFFLINE_FILE_TYPE_ARCH_ARM32 | OFFLINE_FILE_TYPE_ARCH_X86_32 | - OFFLINE_FILE_TYPE_ARCH_X86_64, /**< All possible architecture types. */ /** * Instruction addresses filtered online. * Note: this file type may transition to non-filtered. If so, the transition is @@ -980,9 +978,15 @@ typedef enum { */ OFFLINE_FILE_TYPE_CORE_SHARDED = 0x10000, /** - * Trace filtered by the record_filter tool using encodings2regdeps. + * Trace filtered by the record_filter tool using -filter_encodings2regdeps. */ OFFLINE_FILE_TYPE_ARCH_REGDEPS = 0x20000, + /** + * All possible architecture types, including synthetic ones. + */ + OFFLINE_FILE_TYPE_ARCH_ALL = OFFLINE_FILE_TYPE_ARCH_AARCH64 | + OFFLINE_FILE_TYPE_ARCH_ARM32 | OFFLINE_FILE_TYPE_ARCH_X86_32 | + OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ARCH_REGDEPS, } offline_file_type_t; static inline const char * diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index a2eb017ec2c..bbb55233295 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -210,6 +210,10 @@ reader_t::process_input_entry() ++cur_instr_count_; // Look for encoding bits that belong to this instr. if (last_encoding_.size > 0) { + /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with + * size != ifetch. It's a design choice, not an error, hence + * we avoid this sanity check. + */ if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_) && (last_encoding_.size != cur_ref_.instr.size)) { ERRMSG( diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index e14a94c5ad9..b328b2bf9f7 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -1152,10 +1152,10 @@ test_null_filter() std::unique_ptr(new dynamorio::drmemtrace::null_filter_t()); std::vector> filter_funcs; filter_funcs.push_back(std::move(null_filter)); - // We use a very small stop_timestamp for the record filter. This is to verify - // that we emit the TRACE_MARKER_TYPE_FILTER_ENDPOINT marker for each thread even - // if it starts after the given stop_timestamp. Since the stop_timestamp is so - // small, all other entries are expected to stay. + // We use a very small stop_timestamp for the record filter. This is to verify that + // we emit the TRACE_MARKER_TYPE_FILTER_ENDPOINT marker for each thread even if it + // starts after the given stop_timestamp. Since the stop_timestamp is so small, all + // other entries are expected to stay. static constexpr uint64_t stop_timestamp_us = 1; auto record_filter = std::unique_ptr( new dynamorio::drmemtrace::record_filter_t(output_dir, std::move(filter_funcs), diff --git a/clients/drcachesim/tools/filter/encodings2regdeps.h b/clients/drcachesim/tools/filter/encodings2regdeps.h index 46d84e2537b..4543f753361 100644 --- a/clients/drcachesim/tools/filter/encodings2regdeps.h +++ b/clients/drcachesim/tools/filter/encodings2regdeps.h @@ -50,6 +50,13 @@ namespace dynamorio { namespace drmemtrace { +/* This filter changes the encoding of trace_entry_t and generates discrepancies between + * encoding size and instruction length. So, we need to tell reader_t, which here comes in + * the form of memref_counter_t used in record_filter, to ignore such discrepancies. We do + * so by adding OFFLINE_FILE_TYPE_ARCH_REGDEPS to the file type of the filtered trace. + * Note that simulators that deal with these filtered traces will also have to handle the + * fact that encoding_size != instruction_length. + */ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { public: encodings2regdeps_t() @@ -140,10 +147,12 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { } /* Compute number of trace_entry_t to contain regdeps ISA encoding. - * Each trace_entry_t record can contain 8 byte encoding. + * Each trace_entry_t record can contain pointer-sized byte encoding + * (i.e., 4 bytes for 32 bits architectures and 8 bytes for 64 bits). */ - uint trace_entry_encoding_size = (uint)sizeof(entry.addr); /* == 8 */ - uint regdeps_encoding_size = (uint)(next_pc_regdeps - encoding_regdeps); + uint trace_entry_encoding_size = static_cast(sizeof(entry.addr)); + uint regdeps_encoding_size = + static_cast(next_pc_regdeps - encoding_regdeps); uint num_regdeps_encoding_entries = ALIGN_FORWARD(regdeps_encoding_size, trace_entry_encoding_size) / trace_entry_encoding_size; @@ -155,14 +164,12 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { uint regdeps_encoding_offset = 0; for (trace_entry_t &encoding_entry : *last_encoding) { encoding_entry.type = TRACE_TYPE_ENCODING; - uint size = regdeps_encoding_size < trace_entry_encoding_size - ? regdeps_encoding_size - : trace_entry_encoding_size; - encoding_entry.size = (unsigned short)size; + uint size = std::min(regdeps_encoding_size, trace_entry_encoding_size); + encoding_entry.size = static_cast(size); memset(encoding_entry.encoding, 0, trace_entry_encoding_size); memcpy(encoding_entry.encoding, encoding_regdeps + regdeps_encoding_offset, encoding_entry.size); - regdeps_encoding_size -= trace_entry_encoding_size; + regdeps_encoding_size -= encoding_entry.size; regdeps_encoding_offset += encoding_entry.size; } } diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index f95a3e62cd8..75a5e894b7c 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -138,14 +138,6 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp trim_after_timestamp))); } if (encodings2regdeps) { - /* If we are changing the encoding of trace_entry_t with encodings2regdeps, we - * will be generating discrepancies between encoding size and instruction length. - * So, we need to tell reader_t, which here comes in the form of memref_counter_t, - * to ignore such discrepancies. We do so by adding OFFLINE_FILE_TYPE_ARCH_REGDEPS - * to the file type of the filtered trace. Note that simulators that deal with - * these filtered traces will also have to handle the fact that - * encoding_size != instruction_length. - */ filter_funcs.emplace_back( std::unique_ptr( new dynamorio::drmemtrace::encodings2regdeps_t())); diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp index fb877b2270e..58ba606baa5 100644 --- a/clients/drcachesim/tools/record_filter_launcher.cpp +++ b/clients/drcachesim/tools/record_filter_launcher.cpp @@ -121,11 +121,15 @@ static droption_t op_trim_after_timestamp( "Removes all records from the first TRACE_MARKER_TYPE_TIMESTAMP marker with " "timestamp larger than the specified value."); +/* XXX i#6369: we should partition our options by tool. This one should belong to the + * record_filter partition. For now we add the filter_ prefix to options that should be + * used in conjunction with record_filter. + */ droption_t op_encodings2regdeps( - DROPTION_SCOPE_FRONTEND, "encodings2regdeps", false, + DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false, "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", - "When present, converts the encoding of instructions from a real ISA to the " - "DR_ISA_REGDEPS synthetic ISA."); + "This option is intended to be used with record_filter. When present, it converts " + "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA."); } // namespace int From 19c06a618ba7ad91f167939bdb9f81aeea756b48 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 04:31:34 -0700 Subject: [PATCH 30/58] Updated version. Added release compatibility-breaking comment. --- .github/workflows/ci-docs.yml | 2 +- .github/workflows/ci-package.yml | 12 ++++++------ CMakeLists.txt | 2 +- api/docs/release.dox | 4 ++++ 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml index 2ba40d9a760..70ced1ce403 100644 --- a/.github/workflows/ci-docs.yml +++ b/.github/workflows/ci-docs.yml @@ -90,7 +90,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi diff --git a/.github/workflows/ci-package.yml b/.github/workflows/ci-package.yml index 9bff2302ddc..abf38d6078e 100644 --- a/.github/workflows/ci-package.yml +++ b/.github/workflows/ci-package.yml @@ -103,7 +103,7 @@ jobs: # We only use a non-zero build # when making multiple manual builds in one day. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -195,7 +195,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -283,7 +283,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -371,7 +371,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -451,7 +451,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER=10.90.$((`git log -n 1 --format=%ct` / (60*60*24))) + export VERSION_NUMBER=10.91.$((`git log -n 1 --format=%ct` / (60*60*24))) else export VERSION_NUMBER=${{ github.event.inputs.version }} fi @@ -536,7 +536,7 @@ jobs: # XXX: See x86 job comments on sharing the default ver# with CMakeLists.txt. run: | if test -z "${{ github.event.inputs.version }}"; then - export VERSION_NUMBER="10.90.$((`git log -n 1 --format=%ct` / (60*60*24)))" + export VERSION_NUMBER="10.91.$((`git log -n 1 --format=%ct` / (60*60*24)))" export PREFIX="cronbuild-" else export VERSION_NUMBER=${{ github.event.inputs.version }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 2463024ee8c..8463e376f99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -575,7 +575,7 @@ endif (EXISTS "${PROJECT_SOURCE_DIR}/.svn") # N.B.: When updating this, update all the default versions in ci-package.yml # and ci-docs.yml. We should find a way to share (xref i#1565). -set(VERSION_NUMBER_DEFAULT "10.90.${VERSION_NUMBER_PATCHLEVEL}") +set(VERSION_NUMBER_DEFAULT "10.91.${VERSION_NUMBER_PATCHLEVEL}") # do not store the default VERSION_NUMBER in the cache to prevent a stale one # from preventing future version updates in a pre-existing build dir set(VERSION_NUMBER "" CACHE STRING "Version number: leave empty for default") diff --git a/api/docs/release.dox b/api/docs/release.dox index f108f45f4a5..bef0b002a41 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -150,6 +150,10 @@ changes: - Changed the type of the AArch64 #dr_mcontext_t members svep and ffr to #dr_svep_t. This breaks binary compatibility with clients that were built against versions of DynamoRIO before this change. + - Changed #dynamorio::drmemtrace::record_filter_t::record_filter_func_t + parallel_shard_filter() interface. Added a new parameter of type + #dynamorio::drmemtrace::record_filter_info_t that allows record_filter to share data + with its filters. Further non-compatibility-affecting changes include: - Added DWARF-5 support to the drsyms library by linking in 4 static libraries From f31811c16535768b8e6ee6fb2b9ec1feeb210e7a Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 04:40:21 -0700 Subject: [PATCH 31/58] Fix arm warning as error. --- core/ir/arm/table_encode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/ir/arm/table_encode.c b/core/ir/arm/table_encode.c index 7d78f5c167d..f15caec49e9 100644 --- a/core/ir/arm/table_encode.c +++ b/core/ir/arm/table_encode.c @@ -51,9 +51,9 @@ const op_to_instr_info_t op_instr[] = { /* Format is: {A32, T32, T32.it} */ /* OP_INVALID */ {NULL, NULL, NULL}, - /* OP_UNDECODED */ {(instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, - (instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, - (instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}}, + /* OP_UNDECODED */ (const op_to_instr_info_t){(const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, + (const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, + (const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}}, /* OP_CONTD */ {NULL, NULL, NULL}, /* OP_LABEL */ {NULL, NULL, NULL}, From 5fcc6c14f47383d1d375a52c297064401a91281f Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 04:46:54 -0700 Subject: [PATCH 32/58] Fixed doxygen link in release doc. --- api/docs/release.dox | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index bef0b002a41..ff6cfca5182 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -152,8 +152,8 @@ changes: DynamoRIO before this change. - Changed #dynamorio::drmemtrace::record_filter_t::record_filter_func_t parallel_shard_filter() interface. Added a new parameter of type - #dynamorio::drmemtrace::record_filter_info_t that allows record_filter to share data - with its filters. + #dynamorio::drmemtrace::record_filter_t::record_filter_info_t that allows record_filter + to share data with its filters. Further non-compatibility-affecting changes include: - Added DWARF-5 support to the drsyms library by linking in 4 static libraries From 201df94a74c62bfae6e6970c51ef8df273fbfba2 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 05:38:36 -0700 Subject: [PATCH 33/58] From -encodings2regdeps to -filter_encodings2regdeps in test. --- suite/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index b761e9d4879..bc66f354c33 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4680,7 +4680,7 @@ if (BUILD_CLIENTS) "record_filter_encodings2regdeps" # We assume the app name starts with "s" here to avoid colliding with # our output dir, while still letting the single precmd remove both. - "${drcachesim_path}@-simulator_type@record_filter@-encodings2regdeps@-indir@${testname}.s*.dir/trace@-core_sharded@-cores@4@-outdir@${testname}.filtered.dir" + "${drcachesim_path}@-simulator_type@record_filter@-filter_encodings2regdeps@-indir@${testname}.s*.dir/trace@-core_sharded@-cores@4@-outdir@${testname}.filtered.dir" # We run the opcode_mix analyzer to test econdings2regdeps filtered traces. "opcode_mix") endif () From a31cd26203a0d5a8fa2a69ac9b56071f78e42b1c Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 05:39:28 -0700 Subject: [PATCH 34/58] Doxygen could not resolve links. --- api/docs/release.dox | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index ff6cfca5182..40793ceebc7 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -150,9 +150,9 @@ changes: - Changed the type of the AArch64 #dr_mcontext_t members svep and ffr to #dr_svep_t. This breaks binary compatibility with clients that were built against versions of DynamoRIO before this change. - - Changed #dynamorio::drmemtrace::record_filter_t::record_filter_func_t + - Changed dynamorio::drmemtrace::record_filter_t::record_filter_func_t parallel_shard_filter() interface. Added a new parameter of type - #dynamorio::drmemtrace::record_filter_t::record_filter_info_t that allows record_filter + dynamorio::drmemtrace::record_filter_t::record_filter_info_t that allows record_filter to share data with its filters. Further non-compatibility-affecting changes include: From d763c30f8ff94fb8c616eae10b7bafe2441a23eb Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 05:39:50 -0700 Subject: [PATCH 35/58] Fixed test, using existing infrastructure. --- .../tests/record_filter_unit_tests.cpp | 117 ++++-------------- 1 file changed, 22 insertions(+), 95 deletions(-) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index b328b2bf9f7..9fb6bdf8b9d 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -172,17 +172,6 @@ struct test_case_t { std::vector output; }; -struct test_entries_t { - /* Represents entry in the input trace. - */ - trace_entry_t entry_test; - /* Represents corresponding expected entry in the output trace. - * XXX i#6662: consider making this into a std::vector to test cases - * where DR_ISA_REGDEPS encoding is longer or shorter than entry_test. - */ - trace_entry_t entry_expected; -}; - static bool local_create_dir(const char *dir) { @@ -300,101 +289,40 @@ process_entries_and_check_result(test_record_filter_t *record_filter, return true; } -bool -process_entries_and_check_result_against_ground_truth( - test_record_filter_t *record_filter, const std::vector &entries) -{ - auto stream = std::unique_ptr(new local_stream_t()); - void *shard_data = - record_filter->parallel_shard_init_stream(0, nullptr, stream.get()); - if (!*record_filter) { - fprintf(stderr, "Filtering init failed: %s\n", - record_filter->get_error_string().c_str()); - return false; - } - // Process each trace entry. - for (int i = 0; i < static_cast(entries.size()); ++i) { - // We need to emulate the stream for the tool. - if (entries[i].entry_test.type == TRACE_TYPE_MARKER && - entries[i].entry_test.size == TRACE_MARKER_TYPE_TIMESTAMP) - stream->set_last_timestamp(entries[i].entry_test.addr); - if (!record_filter->parallel_shard_memref(shard_data, entries[i].entry_test)) { - fprintf(stderr, "Filtering failed on entry %d: %s\n", i, - record_filter->parallel_shard_error(shard_data).c_str()); - return false; - } - } - if (!record_filter->parallel_shard_exit(shard_data) || !*record_filter) { - fprintf(stderr, "Filtering exit failed\n"); - return false; - } - if (!record_filter->print_results()) { - fprintf(stderr, "Filtering results failed\n"); - return false; - } - - std::vector filtered = record_filter->get_output_entries(); - // Verbose output for easier debugging. - fprintf(stderr, "Input:\n"); - for (int i = 0; i < static_cast(entries.size()); ++i) { - fprintf(stderr, " %d: ", i); - print_entry(entries[i].entry_test); - fprintf(stderr, "\n"); - } - fprintf(stderr, "Output:\n"); - for (int i = 0; i < static_cast(filtered.size()); ++i) { - fprintf(stderr, " %d: ", i); - print_entry(filtered[i]); - fprintf(stderr, "\n"); - } - // Check filtered output entries. - for (int i = 0; i < static_cast(entries.size()); ++i) { - if (memcmp(&filtered[i], &entries[i].entry_expected, sizeof(trace_entry_t)) != - 0) { - fprintf(stderr, "Wrong filter result for at pos=%d. Expected: ", i); - print_entry(entries[i].entry_expected); - fprintf(stderr, ", got: "); - print_entry(filtered[i]); - fprintf(stderr, "\n"); - return false; - } - } - return true; -} - static bool test_encodings2regdeps_filter() { - std::vector entries = { + std::vector entries = { // Trace shard header. - { { TRACE_TYPE_HEADER, 0, { 0x1 } }, { TRACE_TYPE_HEADER, 0, { 0x1 } } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, - { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } } }, - // File type, also modified by the record_filter encodings2regdeps. + { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, + // File type, modified by the record_filter encodings2regdeps. { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE, { OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ENCODINGS | OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } }, - { TRACE_TYPE_MARKER, + true, + { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE, { OFFLINE_FILE_TYPE_ARCH_REGDEPS | OFFLINE_FILE_TYPE_ENCODINGS | - OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | - OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } } }, - { { TRACE_TYPE_THREAD, 0, { 0x4 } }, { TRACE_TYPE_THREAD, 0, { 0x4 } } }, - { { TRACE_TYPE_PID, 0, { 0x5 } }, { TRACE_TYPE_PID, 0, { 0x5 } } }, + OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS } }, + false, + { true } }, + { { TRACE_TYPE_THREAD, 0, { 0x4 } }, true, { true } }, + { { TRACE_TYPE_PID, 0, { 0x5 } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, - { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } } }, + true, + { true } }, // Unit header. - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, - { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, - { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } } }, - { { TRACE_TYPE_ENCODING, 4, { 0xe78948 } }, - { TRACE_TYPE_ENCODING, 8, { 0x0006090600010011 } } }, - { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, - { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + // Encoding, modified by the record_filter encodings2regdeps. + { { TRACE_TYPE_ENCODING, 4, { 0xe78948 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { 0x0006090600010011 } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, // Trace shard footer. - { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, { TRACE_TYPE_FOOTER, 0, { 0x0 } } }, + { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } }, }; // Construct record_filter_t. @@ -409,8 +337,7 @@ test_encodings2regdeps_filter() filters.push_back(std::move(encodings2regdeps_filter)); auto record_filter = std::unique_ptr( new test_record_filter_t(std::move(filters), 0)); - if (!process_entries_and_check_result_against_ground_truth(record_filter.get(), - entries)) + if (!process_entries_and_check_result(record_filter.get(), entries, 0)) return false; fprintf(stderr, "test_encodings2regdeps_filter passed\n"); From 1fe93cb27ec588e6a328ceb39d2b3899daa96f6f Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 05:48:31 -0700 Subject: [PATCH 36/58] Testing arm fix for opcode name. --- core/ir/arm/table_encode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/ir/arm/table_encode.c b/core/ir/arm/table_encode.c index f15caec49e9..2b086f60003 100644 --- a/core/ir/arm/table_encode.c +++ b/core/ir/arm/table_encode.c @@ -51,9 +51,9 @@ const op_to_instr_info_t op_instr[] = { /* Format is: {A32, T32, T32.it} */ /* OP_INVALID */ {NULL, NULL, NULL}, - /* OP_UNDECODED */ (const op_to_instr_info_t){(const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, + /* OP_UNDECODED */ {NULL, NULL, NULL},/*(const op_to_instr_info_t){(const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, (const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, - (const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}}, + (const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}},*/ /* OP_CONTD */ {NULL, NULL, NULL}, /* OP_LABEL */ {NULL, NULL, NULL}, From 7d40b2a6622184d0a5e68cbad7e6f157f3318932 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 06:17:46 -0700 Subject: [PATCH 37/58] Fixed tests. --- clients/drcachesim/tools/opcode_mix.cpp | 14 +++++++++----- suite/tests/api/ir_regdeps.c | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 3465c3ef382..c334ee51d5c 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -158,7 +158,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { shard->filetype = static_cast(memref.marker.marker_value); - if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL, memref.marker.marker_value) && + /* We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS + * is not a real ISA and can coexist with any real architecture. + */ + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL | ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, + memref.marker.marker_value) && !TESTANY(build_target_arch_type(), memref.marker.marker_value)) { shard->error = std::string("Architecture mismatch: trace recorded on ") + trace_arch_string(static_cast( @@ -419,16 +423,16 @@ opcode_mix_t::print_interval_results( const auto *snap = reinterpret_cast(base_snap); std::cerr << "ID:" << snap->get_interval_id() << " ending at instruction " << snap->get_instr_count_cumulative() << " has " - << snap->opcode_counts_.size() << " opcodes" - << " and " << snap->category_counts_.size() << " categories.\n"; + << snap->opcode_counts_.size() << " opcodes" << " and " + << snap->category_counts_.size() << " categories.\n"; std::vector> sorted(snap->opcode_counts_.begin(), snap->opcode_counts_.end()); std::sort(sorted.begin(), sorted.end(), cmp_val); for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted.size()); ++i) { std::cerr << " [" << i + 1 << "]" << " Opcode: " << decode_opcode_name(sorted[i].first) << " (" - << sorted[i].first << ")" - << " Count=" << sorted[i].second << " PKI=" + << sorted[i].first << ")" << " Count=" << sorted[i].second + << " PKI=" << sorted[i].second * 1000.0 / snap->get_instr_count_delta() << "\n"; } diff --git a/suite/tests/api/ir_regdeps.c b/suite/tests/api/ir_regdeps.c index 5c2f0a30bfb..e4a9edf1914 100644 --- a/suite/tests/api/ir_regdeps.c +++ b/suite/tests/api/ir_regdeps.c @@ -84,7 +84,7 @@ test_instr_encode_decode_synthetic(void *dc, instr_t *instr) /* Check that we do not have an opcode for the converted instruction. */ - ASSERT(instr_get_opcode(instr_synthetic_converted) == OP_INVALID); + ASSERT(instr_get_opcode(instr_synthetic_converted) == OP_UNDECODED); /* Encode the synthetic instruction. */ From 86bd2f850613b8f03439b4e27025ae70a2bf69bc Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 06:27:59 -0700 Subject: [PATCH 38/58] Fixed encodings2regdeps opcode_mix analyzer test. --- clients/drcachesim/tools/opcode_mix.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index c334ee51d5c..58c4c75533e 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -54,7 +54,6 @@ #include "analysis_tool.h" #include "dr_api.h" -#include "dr_ir_encode.h" #include "memref.h" #include "raw2trace.h" #include "raw2trace_directory.h" @@ -161,7 +160,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) /* We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS * is not a real ISA and can coexist with any real architecture. */ - if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL | ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, memref.marker.marker_value) && !TESTANY(build_target_arch_type(), memref.marker.marker_value)) { shard->error = std::string("Architecture mismatch: trace recorded on ") + @@ -326,7 +325,6 @@ opcode_mix_t::print_results() for (const auto &keyvals : shard.second->category_counts) { total.category_counts[keyvals.first] += keyvals.second; } - total.filetype = shard.second->filetype; } } std::cerr << TOOL_NAME << " results:\n"; @@ -423,16 +421,16 @@ opcode_mix_t::print_interval_results( const auto *snap = reinterpret_cast(base_snap); std::cerr << "ID:" << snap->get_interval_id() << " ending at instruction " << snap->get_instr_count_cumulative() << " has " - << snap->opcode_counts_.size() << " opcodes" << " and " - << snap->category_counts_.size() << " categories.\n"; + << snap->opcode_counts_.size() << " opcodes" + << " and " << snap->category_counts_.size() << " categories.\n"; std::vector> sorted(snap->opcode_counts_.begin(), snap->opcode_counts_.end()); std::sort(sorted.begin(), sorted.end(), cmp_val); for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted.size()); ++i) { std::cerr << " [" << i + 1 << "]" << " Opcode: " << decode_opcode_name(sorted[i].first) << " (" - << sorted[i].first << ")" << " Count=" << sorted[i].second - << " PKI=" + << sorted[i].first << ")" + << " Count=" << sorted[i].second << " PKI=" << sorted[i].second * 1000.0 / snap->get_instr_count_delta() << "\n"; } From 831d0a6f5dd5c1a71816e56174b3b05b7f0ae37e Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 06:40:36 -0700 Subject: [PATCH 39/58] Reverted changed to OP_UNDECODED name for arm. --- core/ir/arm/table_encode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/ir/arm/table_encode.c b/core/ir/arm/table_encode.c index 2b086f60003..6a95c899ae1 100644 --- a/core/ir/arm/table_encode.c +++ b/core/ir/arm/table_encode.c @@ -51,9 +51,7 @@ const op_to_instr_info_t op_instr[] = { /* Format is: {A32, T32, T32.it} */ /* OP_INVALID */ {NULL, NULL, NULL}, - /* OP_UNDECODED */ {NULL, NULL, NULL},/*(const op_to_instr_info_t){(const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, - (const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, - (const instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}},*/ + /* OP_UNDECODED */ {NULL, NULL, NULL}, /* OP_CONTD */ {NULL, NULL, NULL}, /* OP_LABEL */ {NULL, NULL, NULL}, From 91fca5fffa6ae132a97fe5cb92ab34b99d8a1768 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 12:34:14 -0700 Subject: [PATCH 40/58] Moved include to its correct place, not the top of the file like clangd wants. --- core/ir/instr_shared.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index 794307b94fd..904e8aaf9b3 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -49,7 +49,6 @@ * this macro magic is unnecessary. * http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx */ -#include "opcode_api.h" #define INSTR_INLINE extern inline #include "../globals.h" @@ -59,6 +58,7 @@ #include "../link.h" #include "decode.h" #include "decode_fast.h" +#include "opcode_api.h" #include "opnd.h" #include "instr_create_shared.h" /* FIXME i#1551: refactor this file and avoid this x86-specific include in base arch/ */ From f12c3a80e83f34ffcd61620aa3d8589b605158c6 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 18:16:27 -0700 Subject: [PATCH 41/58] Addressing PR feedback. --- api/docs/release.dox | 10 +++++++--- clients/drcachesim/CMakeLists.txt | 3 +++ clients/drcachesim/common/options.cpp | 2 +- clients/drcachesim/common/trace_entry.h | 5 ++++- clients/drcachesim/reader/reader.cpp | 12 ++++++------ clients/drcachesim/tools/filter/encodings2regdeps.h | 4 ++-- clients/drcachesim/tools/filter/record_filter.cpp | 4 ++-- clients/drcachesim/tools/filter/record_filter.h | 10 +++++++--- .../drcachesim/tools/filter/record_filter_create.h | 3 ++- clients/drcachesim/tools/record_filter_launcher.cpp | 2 +- core/ir/x86/decode_table.c | 2 +- 11 files changed, 36 insertions(+), 21 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index 40793ceebc7..960443e0685 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -150,10 +150,10 @@ changes: - Changed the type of the AArch64 #dr_mcontext_t members svep and ffr to #dr_svep_t. This breaks binary compatibility with clients that were built against versions of DynamoRIO before this change. - - Changed dynamorio::drmemtrace::record_filter_t::record_filter_func_t + - Changed #dynamorio::drmemtrace::record_filter_t::record_filter_func_t parallel_shard_filter() interface. Added a new parameter of type - dynamorio::drmemtrace::record_filter_t::record_filter_info_t that allows record_filter - to share data with its filters. + #dynamorio::drmemtrace::record_filter_t::record_filter_info_t that allows + #dynamorio::drmemtrace::record_filter_t to share data with its filters. Further non-compatibility-affecting changes include: - Added DWARF-5 support to the drsyms library by linking in 4 static libraries @@ -230,6 +230,10 @@ Further non-compatibility-affecting changes include: purpose of preserving register dependencies. - Added instr_convert_to_isa_regdeps() API that converts an #instr_t from a real ISA (e.g., #DR_ISA_AMD64) to the #DR_ISA_REGDEPS synthetic ISA. + - Added encodings2regdeps_t filter to #dynamorio::drmemtrace::record_filter_t to generate + #DR_ISA_REGDEPS traces. + - Added #dynamorio::drmemtrace::OFFLINE_FILE_TYPE_ARCH_REGDEPS file type for + #DR_ISA_REGDEPS traces. **************************************************
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 5caf7413f03..75bb43e3cb4 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -192,6 +192,7 @@ add_exported_library(drmemtrace_simulator STATIC ) add_exported_library(drmemtrace_record_filter STATIC + tools/filter/record_filter.h tools/filter/record_filter.cpp tools/filter/cache_filter.h tools/filter/cache_filter.cpp @@ -358,6 +359,8 @@ install_client_nonDR_header(drmemtrace simulator/cache_simulator_create.h) install_client_nonDR_header(drmemtrace simulator/tlb_simulator_create.h) install_client_nonDR_header(drmemtrace tools/view_create.h) install_client_nonDR_header(drmemtrace tools/func_view_create.h) +install_client_nonDR_header(drmemtrace tools/filter/record_filter_create.h) +install_client_nonDR_header(drmemtrace tools/filter/record_filter.h) # TODO i#6412: Create a separate directory for non-tracer headers so that # we can more cleanly separate tracer and raw2trace code. install_client_nonDR_header(drmemtrace tracer/raw2trace.h) diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 28c01b960d3..39ab5112f6f 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -979,7 +979,7 @@ droption_t droption_t op_encodings2regdeps( DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false, "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", - "This option is intended to be used with record_filter. When present, it converts " + "This option is for -simulator_type " RECORD_FILTER ". When present, it converts " "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA."); droption_t op_trim_before_timestamp( diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index b7f423bc13c..5b26180ba5c 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -668,7 +668,7 @@ type_is_instr(const trace_type_t type) /** * Returns whether \p type represents any type of instruction record whether an * instruction fetch or operation hint. This is a superset of type_is_instr() and includes - * TRACE_TYPE_INSTR_NO_FETCH. + * #TRACE_TYPE_INSTR_NO_FETCH. */ static inline bool is_any_instr_type(const trace_type_t type) @@ -979,6 +979,9 @@ typedef enum { OFFLINE_FILE_TYPE_CORE_SHARDED = 0x10000, /** * Trace filtered by the record_filter tool using -filter_encodings2regdeps. + * The encodings2regdeps filter replaces real ISA encodings with #DR_ISA_REGDEPS + * encodings. Note that these encoding changes do not update the instruction length, + * hence encoding size and instruction fetch size may not match. */ OFFLINE_FILE_TYPE_ARCH_REGDEPS = 0x20000, /** diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index bbb55233295..93489f6c631 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -210,12 +210,12 @@ reader_t::process_input_entry() ++cur_instr_count_; // Look for encoding bits that belong to this instr. if (last_encoding_.size > 0) { - /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with - * size != ifetch. It's a design choice, not an error, hence - * we avoid this sanity check. - */ - if (!TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_) && - (last_encoding_.size != cur_ref_.instr.size)) { + if (last_encoding_.size != cur_ref_.instr.size && + /* OFFLINE_FILE_TYPE_ARCH_REGDEPS traces have encodings with + * size != ifetch. It's a design choice, not an error, hence + * we avoid this sanity check for these traces. + */ + !TESTANY(OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype_)) { ERRMSG( "Encoding size %zu != instr size %zu for PC 0x%zx at ord %" PRIu64 " instr %" PRIu64 " last_timestamp=0x%" PRIx64 "\n", diff --git a/clients/drcachesim/tools/filter/encodings2regdeps.h b/clients/drcachesim/tools/filter/encodings2regdeps.h index 4543f753361..0b78a832568 100644 --- a/clients/drcachesim/tools/filter/encodings2regdeps.h +++ b/clients/drcachesim/tools/filter/encodings2regdeps.h @@ -86,7 +86,7 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { static_cast(entry.size); if (marker_type == TRACE_MARKER_TYPE_FILETYPE) { uint64_t marker_value = static_cast(entry.addr); - marker_value = add_to_filetype(marker_value); + marker_value = update_filetype(marker_value); entry.addr = static_cast(marker_value); } } @@ -183,7 +183,7 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { } uint64_t - add_to_filetype(uint64_t filetype) override + update_filetype(uint64_t filetype) override { filetype &= ~OFFLINE_FILE_TYPE_ARCH_ALL; filetype |= OFFLINE_FILE_TYPE_ARCH_REGDEPS; diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 75a5e894b7c..979ca62acd5 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -590,7 +590,7 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t & // to the stop point, so a partial remove that does not change // the filetype? For now we do not support that, and we re-add // encodings at chunk boundaries regardless. Note that filters that modify - // encodings (even if they add or remove trace_entry_t records) do not incurr in + // encodings (even if they add or remove trace_entry_t records) do not incur in // this problem and we don't need support for partial removal of encodings in this // case. An example of such filters is encodings2regdeps_t. if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype) && @@ -651,7 +651,7 @@ record_filter_t::process_delayed_encodings(per_shard_t *per_shard, trace_entry_t // Output if we have encodings that haven't yet been output, and // there is no filter removing all encodings (we don't support // partial encoding removal). Note that filters that modify encodings (even if - // they add or remove trace_entry_t records) do not incurr in this problem and we + // they add or remove trace_entry_t records) do not incur in this problem and we // don't need support for partial removal of encodings in this case. An example // of such filters is encodings2regdeps_t. // We check prev_was_output to rule out filtered-out encodings diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index f61492ed89b..c8fa1c8b879 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -65,6 +65,10 @@ class record_filter_t : public record_analysis_tool_t { * Interface for the record_filter to share data with its filters. */ struct record_filter_info_t { + /** + * Stores the encoding of an instructions, which may be split among more than one + * #trace_entry_t, hence the vector. + */ std::vector *last_encoding; }; @@ -127,10 +131,10 @@ class record_filter_t : public record_analysis_tool_t { /** * If a filter modifies the file type of a trace, its changes should be made here, * so they are visible to the record_filter even if the #trace_entry_t containing - * the file type marker is not modified. + * the file type marker is not modified by the filter. */ virtual uint64_t - add_to_filetype(uint64_t filetype) + update_filetype(uint64_t filetype) { return filetype; } @@ -277,7 +281,7 @@ class record_filter_t : public record_analysis_tool_t { /* If filters modify the file type, add their changes here. */ for (auto &filter : filters_) { - filetype = filter->add_to_filetype(filetype); + filetype = filter->update_filetype(filetype); } return filetype; } diff --git a/clients/drcachesim/tools/filter/record_filter_create.h b/clients/drcachesim/tools/filter/record_filter_create.h index 1c38df93570..7e3b750b595 100644 --- a/clients/drcachesim/tools/filter/record_filter_create.h +++ b/clients/drcachesim/tools/filter/record_filter_create.h @@ -62,8 +62,9 @@ namespace drmemtrace { * up to its first timestamp whose value is greater or equal to this parameter. * @param[in] trim_after_timestamp Trim records after the trace's first timestamp * whose value is greater than this parameter. - * @param[in] encodings2regdeps If true, converts instruction encodings from the real ISA + * @param[in] encodings2regdeps If true, converts instruction encodings from the real ISA * of the input trace to the #DR_ISA_REGDEPS synthetic ISA. + * @param[in] verbose Verbosity level for notifications. */ record_analysis_tool_t * record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp, diff --git a/clients/drcachesim/tools/record_filter_launcher.cpp b/clients/drcachesim/tools/record_filter_launcher.cpp index 58ba606baa5..53adf2ca1d8 100644 --- a/clients/drcachesim/tools/record_filter_launcher.cpp +++ b/clients/drcachesim/tools/record_filter_launcher.cpp @@ -128,7 +128,7 @@ static droption_t op_trim_after_timestamp( droption_t op_encodings2regdeps( DROPTION_SCOPE_FRONTEND, "filter_encodings2regdeps", false, "Enable converting the encoding of instructions to synthetic ISA DR_ISA_REGDEPS.", - "This option is intended to be used with record_filter. When present, it converts " + "This option is for -simulator_type record_filter. When present, it converts " "the encoding of instructions from a real ISA to the DR_ISA_REGDEPS synthetic ISA."); } // namespace diff --git a/core/ir/x86/decode_table.c b/core/ir/x86/decode_table.c index 36b26e0c5ce..b88cbbc3e76 100644 --- a/core/ir/x86/decode_table.c +++ b/core/ir/x86/decode_table.c @@ -75,7 +75,7 @@ const instr_info_t * const op_instr[] = { /* OP_INVALID */ NULL, - /* OP_UNDECODED */ (instr_info_t[]){{1, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, + /* OP_UNDECODED */ (instr_info_t[]){{OP_UNDECODED, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, /* OP_CONTD */ NULL, /* OP_LABEL */ NULL, From 807df45f29066cc7936ea587dca46314040cc088 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 18:44:11 -0700 Subject: [PATCH 42/58] Attempt to fix doxygen. --- clients/drcachesim/tools/filter/record_filter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index c8fa1c8b879..c9cf1d0db7f 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -98,7 +98,7 @@ class record_filter_t : public record_analysis_tool_t { * Invoked for each #trace_entry_t in the shard. It returns * whether or not this \p entry should be included in the result * trace. \p shard_data is same as what was returned by - * parallel_shard_init(). The given \p entry is included in the result + * parallel_shard_init. The given \p entry is included in the result * trace iff all provided #record_filter_func_t return true. The * \p entry parameter can also be modified by the record_filter_func_t. * The passed \p entry is not guaranteed to be the original one from @@ -131,7 +131,7 @@ class record_filter_t : public record_analysis_tool_t { /** * If a filter modifies the file type of a trace, its changes should be made here, * so they are visible to the record_filter even if the #trace_entry_t containing - * the file type marker is not modified by the filter. + * the file type marker is not modified directly by the filter. */ virtual uint64_t update_filetype(uint64_t filetype) From 0e5d075abca81d11c3fa0aabbcdf2baeb6bf70e6 Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 18:48:11 -0700 Subject: [PATCH 43/58] Attempt to fix doxygen 2. --- clients/drcachesim/tools/filter/record_filter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index c9cf1d0db7f..ff634ba9374 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -98,8 +98,8 @@ class record_filter_t : public record_analysis_tool_t { * Invoked for each #trace_entry_t in the shard. It returns * whether or not this \p entry should be included in the result * trace. \p shard_data is same as what was returned by - * parallel_shard_init. The given \p entry is included in the result - * trace iff all provided #record_filter_func_t return true. The + * parallel_shard_init(). The given \p entry is included in the result + * trace iff all provided record_filter_func_t return true. The * \p entry parameter can also be modified by the record_filter_func_t. * The passed \p entry is not guaranteed to be the original one from * the trace if other filter tools are present, and may include changes From 8f483e93ccd36d633bd623c9ef434d3c02ef480c Mon Sep 17 00:00:00 2001 From: edeiana Date: Wed, 1 May 2024 18:55:10 -0700 Subject: [PATCH 44/58] Handling opcode name of OP_INVALID and OP_UNDECODED for x86. Returning the same name as other arches (i.e., "", ""). --- core/ir/x86/decode.c | 7 +++++++ core/ir/x86/decode_table.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core/ir/x86/decode.c b/core/ir/x86/decode.c index caceafd62c2..f608afea2db 100644 --- a/core/ir/x86/decode.c +++ b/core/ir/x86/decode.c @@ -2791,6 +2791,13 @@ const char * decode_opcode_name(int opcode) { const instr_info_t *info = op_instr[opcode]; + if (info == NULL) { + switch (opcode) { + case OP_INVALID: return ""; + case OP_UNDECODED: return ""; + default: return ""; + } + } return info->name; } diff --git a/core/ir/x86/decode_table.c b/core/ir/x86/decode_table.c index b88cbbc3e76..a9bc1393d54 100644 --- a/core/ir/x86/decode_table.c +++ b/core/ir/x86/decode_table.c @@ -75,7 +75,7 @@ const instr_info_t * const op_instr[] = { /* OP_INVALID */ NULL, - /* OP_UNDECODED */ (instr_info_t[]){{OP_UNDECODED, 0xffffff, 0, "", 0, 0, 0, 0, 0, 0, 0, 0}}, + /* OP_UNDECODED */ NULL, /* OP_CONTD */ NULL, /* OP_LABEL */ NULL, From fc509426cd686931a31e5030ef9e75c47b56e7b1 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 2 May 2024 02:22:10 -0700 Subject: [PATCH 45/58] Fixed doxygen comment. --- clients/drcachesim/tools/filter/record_filter.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clients/drcachesim/tools/filter/record_filter.h b/clients/drcachesim/tools/filter/record_filter.h index ff634ba9374..50a76e23268 100644 --- a/clients/drcachesim/tools/filter/record_filter.h +++ b/clients/drcachesim/tools/filter/record_filter.h @@ -97,10 +97,10 @@ class record_filter_t : public record_analysis_tool_t { /** * Invoked for each #trace_entry_t in the shard. It returns * whether or not this \p entry should be included in the result - * trace. \p shard_data is same as what was returned by - * parallel_shard_init(). The given \p entry is included in the result - * trace iff all provided record_filter_func_t return true. The - * \p entry parameter can also be modified by the record_filter_func_t. + * trace. \p shard_data is same as what was returned by parallel_shard_init(). + * The given \p entry is included in the result trace iff all provided + * #dynamorio::drmemtrace::record_filter_t::record_filter_func_t return true. + * The \p entry parameter can also be modified by the record_filter_func_t. * The passed \p entry is not guaranteed to be the original one from * the trace if other filter tools are present, and may include changes * made by other tools. From 0e0a9b9aba4979ebc4f33f02acc6c859930bb200 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 2 May 2024 02:23:47 -0700 Subject: [PATCH 46/58] Added test entries to check different conditions. --- .../drcachesim/tests/record_filter_unit_tests.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 9fb6bdf8b9d..2512bbbebce 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -321,6 +321,20 @@ test_encodings2regdeps_filter() { { TRACE_TYPE_ENCODING, 4, { 0xe78948 } }, true, { false } }, { { TRACE_TYPE_ENCODING, 8, { 0x0006090600010011 } }, false, { true } }, { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, + // Same instr same chunk. + { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, + // Unit header. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xc } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xd } }, true, { true } }, + // Duplicated encoding across chunk boundary. + { { TRACE_TYPE_ENCODING, 4, { 0xe78948 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { 0x0006090600010011 } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, + // Unit header. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xe } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xf } }, true, { true } }, + // Same instr in different chunk. + { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, // Trace shard footer. { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } }, }; From 3c416cb4fa6b303bf0f06f886d0adafdae06b52c Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 2 May 2024 02:45:54 -0700 Subject: [PATCH 47/58] Attempt to fix macos build. --- clients/drcachesim/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 75bb43e3cb4..a54a955e5a8 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -199,7 +199,7 @@ add_exported_library(drmemtrace_record_filter STATIC tools/filter/type_filter.h tools/filter/encodings2regdeps.h tools/filter/null_filter.h) -target_link_libraries(drmemtrace_record_filter drmemtrace_simulator) +target_link_libraries(drmemtrace_record_filter drmemtrace_simulator drdecode) add_exported_library(directory_iterator STATIC common/directory_iterator.cpp) add_dependencies(directory_iterator api_headers) From db81af6b210c861c72c0dbced1668da5cd2cfefa Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 2 May 2024 03:21:44 -0700 Subject: [PATCH 48/58] Attempt to fix macos build 2. --- clients/drcachesim/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index a54a955e5a8..f80caccc73d 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -199,7 +199,7 @@ add_exported_library(drmemtrace_record_filter STATIC tools/filter/type_filter.h tools/filter/encodings2regdeps.h tools/filter/null_filter.h) -target_link_libraries(drmemtrace_record_filter drmemtrace_simulator drdecode) +target_link_libraries(drmemtrace_record_filter drmemtrace_simulator dynamorio) add_exported_library(directory_iterator STATIC common/directory_iterator.cpp) add_dependencies(directory_iterator api_headers) From 5b20bb5413d2ef8a363ae487169f8c763dd5a685 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 2 May 2024 14:53:42 -0700 Subject: [PATCH 49/58] Now using configure_DynamoRIO_standalone() for drmemtrace_record_filter. --- clients/drcachesim/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index f80caccc73d..a57061d83a1 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -173,6 +173,7 @@ target_link_libraries(drmemtrace_invariant_checker drdecode) configure_DynamoRIO_standalone(drmemtrace_opcode_mix) configure_DynamoRIO_standalone(drmemtrace_view) configure_DynamoRIO_standalone(drmemtrace_invariant_checker) +configure_DynamoRIO_standalone(drmemtrace_record_filter) # We combine the cache and TLB simulators as they share code already. add_exported_library(drmemtrace_simulator STATIC @@ -199,7 +200,7 @@ add_exported_library(drmemtrace_record_filter STATIC tools/filter/type_filter.h tools/filter/encodings2regdeps.h tools/filter/null_filter.h) -target_link_libraries(drmemtrace_record_filter drmemtrace_simulator dynamorio) +target_link_libraries(drmemtrace_record_filter drmemtrace_simulator) add_exported_library(directory_iterator STATIC common/directory_iterator.cpp) add_dependencies(directory_iterator api_headers) From b35faf46758574248a445224877c7f1fe01d8e24 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 2 May 2024 15:52:48 -0700 Subject: [PATCH 50/58] Build fix, use before def in cmakelist. --- clients/drcachesim/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index a57061d83a1..538a866ebce 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -173,7 +173,6 @@ target_link_libraries(drmemtrace_invariant_checker drdecode) configure_DynamoRIO_standalone(drmemtrace_opcode_mix) configure_DynamoRIO_standalone(drmemtrace_view) configure_DynamoRIO_standalone(drmemtrace_invariant_checker) -configure_DynamoRIO_standalone(drmemtrace_record_filter) # We combine the cache and TLB simulators as they share code already. add_exported_library(drmemtrace_simulator STATIC @@ -201,6 +200,7 @@ add_exported_library(drmemtrace_record_filter STATIC tools/filter/encodings2regdeps.h tools/filter/null_filter.h) target_link_libraries(drmemtrace_record_filter drmemtrace_simulator) +configure_DynamoRIO_standalone(drmemtrace_record_filter) add_exported_library(directory_iterator STATIC common/directory_iterator.cpp) add_dependencies(directory_iterator api_headers) From 2509adf361fb2999ea2d392a10a14ccc846224f4 Mon Sep 17 00:00:00 2001 From: edeiana Date: Thu, 2 May 2024 15:53:14 -0700 Subject: [PATCH 51/58] Fixed chunking in encodings2regdeps unit test. --- .../tests/record_filter_unit_tests.cpp | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 2512bbbebce..0c91eb4783b 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -292,6 +292,9 @@ process_entries_and_check_result(test_record_filter_t *record_filter, static bool test_encodings2regdeps_filter() { + constexpr addr_t PC = 0x7f6fdd3ec360; + constexpr addr_t ENCODING_REAL_ISA = 0xe78948; + constexpr addr_t ENCODING_REGDEPS_ISA = 0x0006090600010011; std::vector entries = { // Trace shard header. { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, @@ -314,27 +317,41 @@ test_encodings2regdeps_filter() { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, { 0x6 } }, true, { true } }, - // Unit header. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, { 0x3 } }, + true, + { true } }, + + // Chunk 1. { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, // Encoding, modified by the record_filter encodings2regdeps. - { { TRACE_TYPE_ENCODING, 4, { 0xe78948 } }, true, { false } }, - { { TRACE_TYPE_ENCODING, 8, { 0x0006090600010011 } }, false, { true } }, - { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, - // Same instr same chunk. - { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, - // Unit header. + { { TRACE_TYPE_ENCODING, 4, { ENCODING_REAL_ISA } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { false } }, + + // Chunk 2. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 10 } }, + true, + { false } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xc } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xd } }, true, { true } }, // Duplicated encoding across chunk boundary. - { { TRACE_TYPE_ENCODING, 4, { 0xe78948 } }, true, { false } }, - { { TRACE_TYPE_ENCODING, 8, { 0x0006090600010011 } }, false, { true } }, - { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, - // Unit header. + { { TRACE_TYPE_ENCODING, 4, { ENCODING_REAL_ISA } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { false } }, + + // Chunk 3. + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 10 } }, + true, + { false } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xe } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xf } }, true, { true } }, // Same instr in different chunk. - { { TRACE_TYPE_INSTR, 3, { 0x7f6fdd3ec360 } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, // Trace shard footer. { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } }, }; From fc0a74444c5d72ac367fb9e0638e931927c2aff1 Mon Sep 17 00:00:00 2001 From: edeiana Date: Fri, 3 May 2024 15:10:17 -0700 Subject: [PATCH 52/58] Fixed test_encodings2regdeps_filter(). --- .../tests/record_filter_unit_tests.cpp | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 0c91eb4783b..8585127a1bd 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -325,32 +325,37 @@ test_encodings2regdeps_filter() { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, // Encoding, modified by the record_filter encodings2regdeps. - { { TRACE_TYPE_ENCODING, 4, { ENCODING_REAL_ISA } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } }, { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { false } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { true } }, // Chunk 2. - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 10 } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xa } }, true, - { false } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xc } }, true, { true } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xd } }, true, { true } }, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, // Duplicated encoding across chunk boundary. - { { TRACE_TYPE_ENCODING, 4, { ENCODING_REAL_ISA } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } }, { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { false } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 1 } }, true, { true } }, // Chunk 3. - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 10 } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xe } }, true, - { false } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0xe } }, true, { true } }, - { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0xf } }, true, { true } }, - // Same instr in different chunk. + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + // Same instr in different chunk. We expect record_filter to add + // ENCODING_REGDEPS_ISA here (not ENCODING_REAL_ISA). + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, // Trace shard footer. { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } }, @@ -367,7 +372,7 @@ test_encodings2regdeps_filter() } filters.push_back(std::move(encodings2regdeps_filter)); auto record_filter = std::unique_ptr( - new test_record_filter_t(std::move(filters), 0)); + new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); if (!process_entries_and_check_result(record_filter.get(), entries, 0)) return false; From 7a042722a090024656e84ce92ce4f9fc953d41ad Mon Sep 17 00:00:00 2001 From: edeiana Date: Fri, 3 May 2024 15:20:55 -0700 Subject: [PATCH 53/58] Updated comment on chunk_footer, whose value is the chunk ordinal. --- clients/drcachesim/common/trace_entry.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 5b26180ba5c..447d3eb95c7 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -476,8 +476,10 @@ typedef enum { TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, /** - * Marks the end of a chunk. The final chunk does not have such a marker - * but instead relies on the #TRACE_TYPE_FOOTER entry. + * Marks the end of a chunk. The value of this marker is the chunk ordinal, which + * is an increasing counter set by #dynamorio::drmemtrace::record_filter. + * The final chunk does not have such a marker but instead relies on the + * #TRACE_TYPE_FOOTER entry. */ TRACE_MARKER_TYPE_CHUNK_FOOTER, From 0f76ec8f799727542ed7a5ac6055cc83acf664b5 Mon Sep 17 00:00:00 2001 From: edeiana Date: Fri, 3 May 2024 15:40:29 -0700 Subject: [PATCH 54/58] Fixed doxygen comment on chunk_footer. --- clients/drcachesim/common/trace_entry.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 447d3eb95c7..d6c51bb3987 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -477,7 +477,8 @@ typedef enum { /** * Marks the end of a chunk. The value of this marker is the chunk ordinal, which - * is an increasing counter set by #dynamorio::drmemtrace::record_filter. + * is an increasing counter set by #dynamorio::drmemtrace::record_filter_t starting + * from 0. * The final chunk does not have such a marker but instead relies on the * #TRACE_TYPE_FOOTER entry. */ From 6b95e4d178704e29f87d67234a2ba3d05b8c707d Mon Sep 17 00:00:00 2001 From: edeiana Date: Sun, 5 May 2024 13:46:42 -0700 Subject: [PATCH 55/58] Removed comment on marker value of TRACE_MARKER_TYPE_CHUNK_FOOTER. --- clients/drcachesim/common/trace_entry.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index d6c51bb3987..5b26180ba5c 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -476,11 +476,8 @@ typedef enum { TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT, /** - * Marks the end of a chunk. The value of this marker is the chunk ordinal, which - * is an increasing counter set by #dynamorio::drmemtrace::record_filter_t starting - * from 0. - * The final chunk does not have such a marker but instead relies on the - * #TRACE_TYPE_FOOTER entry. + * Marks the end of a chunk. The final chunk does not have such a marker + * but instead relies on the #TRACE_TYPE_FOOTER entry. */ TRACE_MARKER_TYPE_CHUNK_FOOTER, From 4ffe2d1e232020c36cefa2afed04bf27fc3c77f0 Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 6 May 2024 01:35:57 -0700 Subject: [PATCH 56/58] Added memset for result reproducibility. --- clients/drcachesim/tools/filter/encodings2regdeps.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clients/drcachesim/tools/filter/encodings2regdeps.h b/clients/drcachesim/tools/filter/encodings2regdeps.h index 0b78a832568..fac93767151 100644 --- a/clients/drcachesim/tools/filter/encodings2regdeps.h +++ b/clients/drcachesim/tools/filter/encodings2regdeps.h @@ -137,6 +137,7 @@ class encodings2regdeps_t : public record_filter_t::record_filter_func_t { */ byte ALIGN_VAR(REGDEPS_ALIGN_BYTES) encoding_regdeps[REGDEPS_MAX_ENCODING_LENGTH]; + memset(encoding_regdeps, 0, sizeof(encoding_regdeps)); app_pc next_pc_regdeps = instr_encode(dcontext_.dcontext, &instr_regdeps, encoding_regdeps); instr_free(dcontext_.dcontext, &instr_regdeps); From 3d617abd3f03eac70c6880f4ec422863f4da5faf Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 6 May 2024 01:36:19 -0700 Subject: [PATCH 57/58] Added test for when real ISA encoding has more (or less) trace_entry_t records than regdeps ISA encoding. --- .../tests/record_filter_unit_tests.cpp | 75 +++++++++++++++---- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 8585127a1bd..2454a6edba5 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -293,13 +293,25 @@ static bool test_encodings2regdeps_filter() { constexpr addr_t PC = 0x7f6fdd3ec360; + constexpr addr_t PC2 = 0x7f6fdd3eb1f7; + // constexpr addr_t PC2 = 0x00007f6fdd3eb1f7; + constexpr addr_t PC3 = 0x7f6fdd3eb21a; constexpr addr_t ENCODING_REAL_ISA = 0xe78948; + constexpr addr_t ENCODING_REAL_ISA_2_PART1 = 0x841f0f66; + constexpr addr_t ENCODING_REAL_ISA_2_PART2 = 0x0; + constexpr addr_t ENCODING_REAL_ISA_3 = 0xab48f3; constexpr addr_t ENCODING_REGDEPS_ISA = 0x0006090600010011; + constexpr addr_t ENCODING_REGDEPS_ISA_2 = 0x0000020400004010; + constexpr addr_t ENCODING_REGDEPS_ISA_3_PART1 = 0x0209030600001042; + constexpr addr_t ENCODING_REGDEPS_ISA_3_PART2 = 0x0000000000220903; std::vector entries = { - // Trace shard header. + /* Trace shard header. + */ { { TRACE_TYPE_HEADER, 0, { 0x1 } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION, { 0x2 } }, true, { true } }, - // File type, modified by the record_filter encodings2regdeps. + /* File type, modified by record_filter encodings2regdeps to add + * OFFLINE_FILE_TYPE_ARCH_REGDEPS. + */ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE, { OFFLINE_FILE_TYPE_ARCH_X86_64 | OFFLINE_FILE_TYPE_ENCODINGS | @@ -321,10 +333,14 @@ test_encodings2regdeps_filter() true, { true } }, - // Chunk 1. + /* Chunk 1. + */ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, - // Encoding, modified by the record_filter encodings2regdeps. + /* Encoding, modified by the record_filter encodings2regdeps. + * encoding real ISA size == encoding regdeps ISA size + * (in terms of trace_entry_t). + */ { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } }, { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, @@ -332,13 +348,15 @@ test_encodings2regdeps_filter() { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 0 } }, true, { true } }, - // Chunk 2. + /* Chunk 2. + */ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xa } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, - // Duplicated encoding across chunk boundary. + /* Duplicated encoding across chunk boundary. + */ { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA } }, true, { false } }, { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, @@ -346,22 +364,45 @@ test_encodings2regdeps_filter() { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 1 } }, true, { true } }, - // Chunk 3. + /* Chunk 3. + */ { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0xe } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, - // Same instr in different chunk. We expect record_filter to add - // ENCODING_REGDEPS_ISA here (not ENCODING_REAL_ISA). - { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA } }, false, { true } }, - { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, - { { TRACE_TYPE_INSTR, 3, { PC } }, true, { true } }, - // Trace shard footer. + /* encoding real ISA size > encoding regdeps ISA size + */ + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REAL_ISA_2_PART1 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 1, { ENCODING_REAL_ISA_2_PART2 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA_2 } }, false, { true } }, + { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } }, + { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } }, + { { TRACE_TYPE_INSTR, 9, { PC2 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER, { 2 } }, true, { true } }, + + /* Chunk 4. + */ + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL, { 0x12 } }, + true, + { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP, { 0x7 } }, true, { true } }, + { { TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID, { 0x8 } }, true, { true } }, + /* encoding real ISA size < encoding regdeps ISA size + */ + { { TRACE_TYPE_ENCODING, 3, { ENCODING_REAL_ISA_3 } }, true, { false } }, + { { TRACE_TYPE_ENCODING, 8, { ENCODING_REGDEPS_ISA_3_PART1 } }, false, { true } }, + { { TRACE_TYPE_ENCODING, 4, { ENCODING_REGDEPS_ISA_3_PART2 } }, false, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC3 } }, true, { true } }, + { { TRACE_TYPE_INSTR, 3, { PC3 } }, true, { true } }, + + /* Trace shard footer. + */ { { TRACE_TYPE_FOOTER, 0, { 0x0 } }, true, { true } }, }; - // Construct record_filter_t. + /* Construct encodings2regdeps_filter. + */ std::vector> filters; auto encodings2regdeps_filter = std::unique_ptr( new dynamorio::drmemtrace::encodings2regdeps_t()); @@ -371,8 +412,14 @@ test_encodings2regdeps_filter() return false; } filters.push_back(std::move(encodings2regdeps_filter)); + + /* Construct record_filter_t. + */ auto record_filter = std::unique_ptr( new test_record_filter_t(std::move(filters), 0, /*write_archive=*/true)); + + /* Run the test. + */ if (!process_entries_and_check_result(record_filter.get(), entries, 0)) return false; From bc7e4a00a91a07b65983731fc746e055937c18bb Mon Sep 17 00:00:00 2001 From: edeiana Date: Mon, 6 May 2024 02:03:03 -0700 Subject: [PATCH 58/58] Renaming encodings2regdeps* to encodings2regdeps_filter* for consistency with other filters. --- api/docs/release.dox | 4 ++-- clients/drcachesim/CMakeLists.txt | 2 +- clients/drcachesim/tests/record_filter_unit_tests.cpp | 4 ++-- .../{encodings2regdeps.h => encodings2regdeps_filter.h} | 4 ++-- clients/drcachesim/tools/filter/record_filter.cpp | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) rename clients/drcachesim/tools/filter/{encodings2regdeps.h => encodings2regdeps_filter.h} (98%) diff --git a/api/docs/release.dox b/api/docs/release.dox index 960443e0685..a407e16edfb 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -230,8 +230,8 @@ Further non-compatibility-affecting changes include: purpose of preserving register dependencies. - Added instr_convert_to_isa_regdeps() API that converts an #instr_t from a real ISA (e.g., #DR_ISA_AMD64) to the #DR_ISA_REGDEPS synthetic ISA. - - Added encodings2regdeps_t filter to #dynamorio::drmemtrace::record_filter_t to generate - #DR_ISA_REGDEPS traces. + - Added encodings2regdeps_filter_t filter to #dynamorio::drmemtrace::record_filter_t to + generate #DR_ISA_REGDEPS traces. - Added #dynamorio::drmemtrace::OFFLINE_FILE_TYPE_ARCH_REGDEPS file type for #DR_ISA_REGDEPS traces. diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 538a866ebce..9ec0d0ddc6c 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -197,7 +197,7 @@ add_exported_library(drmemtrace_record_filter STATIC tools/filter/cache_filter.h tools/filter/cache_filter.cpp tools/filter/type_filter.h - tools/filter/encodings2regdeps.h + tools/filter/encodings2regdeps_filter.h tools/filter/null_filter.h) target_link_libraries(drmemtrace_record_filter drmemtrace_simulator) configure_DynamoRIO_standalone(drmemtrace_record_filter) diff --git a/clients/drcachesim/tests/record_filter_unit_tests.cpp b/clients/drcachesim/tests/record_filter_unit_tests.cpp index 2454a6edba5..c20a10a3034 100644 --- a/clients/drcachesim/tests/record_filter_unit_tests.cpp +++ b/clients/drcachesim/tests/record_filter_unit_tests.cpp @@ -42,7 +42,7 @@ #include "tools/filter/record_filter.h" #include "tools/filter/trim_filter.h" #include "tools/filter/type_filter.h" -#include "tools/filter/encodings2regdeps.h" +#include "tools/filter/encodings2regdeps_filter.h" #include "trace_entry.h" #include "zipfile_ostream.h" @@ -405,7 +405,7 @@ test_encodings2regdeps_filter() */ std::vector> filters; auto encodings2regdeps_filter = std::unique_ptr( - new dynamorio::drmemtrace::encodings2regdeps_t()); + new dynamorio::drmemtrace::encodings2regdeps_filter_t()); if (encodings2regdeps_filter->get_error_string() != "") { fprintf(stderr, "Couldn't construct a encodings2regdeps_filter %s", encodings2regdeps_filter->get_error_string().c_str()); diff --git a/clients/drcachesim/tools/filter/encodings2regdeps.h b/clients/drcachesim/tools/filter/encodings2regdeps_filter.h similarity index 98% rename from clients/drcachesim/tools/filter/encodings2regdeps.h rename to clients/drcachesim/tools/filter/encodings2regdeps_filter.h index fac93767151..cf521c176ab 100644 --- a/clients/drcachesim/tools/filter/encodings2regdeps.h +++ b/clients/drcachesim/tools/filter/encodings2regdeps_filter.h @@ -57,9 +57,9 @@ namespace drmemtrace { * Note that simulators that deal with these filtered traces will also have to handle the * fact that encoding_size != instruction_length. */ -class encodings2regdeps_t : public record_filter_t::record_filter_func_t { +class encodings2regdeps_filter_t : public record_filter_t::record_filter_func_t { public: - encodings2regdeps_t() + encodings2regdeps_filter_t() { } diff --git a/clients/drcachesim/tools/filter/record_filter.cpp b/clients/drcachesim/tools/filter/record_filter.cpp index 979ca62acd5..2afd74b9fa4 100644 --- a/clients/drcachesim/tools/filter/record_filter.cpp +++ b/clients/drcachesim/tools/filter/record_filter.cpp @@ -60,7 +60,7 @@ #include "cache_filter.h" #include "trim_filter.h" #include "type_filter.h" -#include "encodings2regdeps.h" +#include "encodings2regdeps_filter.h" #undef VPRINT #ifdef DEBUG @@ -140,7 +140,7 @@ record_filter_tool_create(const std::string &output_dir, uint64_t stop_timestamp if (encodings2regdeps) { filter_funcs.emplace_back( std::unique_ptr( - new dynamorio::drmemtrace::encodings2regdeps_t())); + new dynamorio::drmemtrace::encodings2regdeps_filter_t())); } // TODO i#5675: Add other filters. @@ -592,7 +592,7 @@ record_filter_t::process_chunk_encodings(per_shard_t *per_shard, trace_entry_t & // encodings at chunk boundaries regardless. Note that filters that modify // encodings (even if they add or remove trace_entry_t records) do not incur in // this problem and we don't need support for partial removal of encodings in this - // case. An example of such filters is encodings2regdeps_t. + // case. An example of such filters is encodings2regdeps_filter_t. if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, per_shard->filetype) && per_shard->cur_chunk_pcs.find(entry.addr) == per_shard->cur_chunk_pcs.end()) { if (per_shard->per_input == nullptr) @@ -653,7 +653,7 @@ record_filter_t::process_delayed_encodings(per_shard_t *per_shard, trace_entry_t // partial encoding removal). Note that filters that modify encodings (even if // they add or remove trace_entry_t records) do not incur in this problem and we // don't need support for partial removal of encodings in this case. An example - // of such filters is encodings2regdeps_t. + // of such filters is encodings2regdeps_filter_t. // We check prev_was_output to rule out filtered-out encodings // (we record all encodings for new-chunk insertion). if (!per_shard->last_encoding.empty() && per_shard->prev_was_output) {