diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a29c61..4af59b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,9 @@ add_library(gs_patterns_core SHARED gsnv_patterns.cpp gspin_patterns.h gspin_patterns.cpp + errors.h + config.h + config.cpp ) add_executable( gs_patterns diff --git a/TUNING_PARAMETERS.md b/TUNING_PARAMETERS.md new file mode 100644 index 0000000..df92032 --- /dev/null +++ b/TUNING_PARAMETERS.md @@ -0,0 +1,80 @@ +# Usage: + +`./gs_patterns [options] [|-nv]` + +Options accept both `--opt value` and `--opt=value` formats. +Argument parsing stops at `--`. + +> For large values, you can use shell arithmetic. +Example: `--max-pattern-size=$((2**24))` + +# Configuration Options: + +## Triggers: + +| Option | Short | Description | Default | Range | +|:---:|:---:|:---|:---:|:---:| +|`--per-sample`|`-ps`|Min memory operations before printing a progress dot|10000000 (≈2\*\*23)|2\*\*10 - 2\*\*30| + +## Info Parameters: + +| Option | Short | Description | Default | Range | +|:---:|:---:|:---|:---:|:---:| +|`--cache-line-size` |`-cls`|Cache line size (in bytes) to consider. |2\*\*6 |2\*\*4 - 2\*\*9 | +|`--trace-buffer-size` |`-tbs`|Number of trace entries to read at once (limits read buffer memory)|2\*\*10 |2\*\*0 - 2\*\*20 | +|`--iaddr-per-window` |`-iw` |Instruction window size (static iaddrs), prevents 1st pass slowdown|2\*\*10|2\*\*4 - 2\*\*12 | +|`--max-gather-scatter`|`-mgs`|Max number of unique gather/scatter iaddrs to track|8096 (≈2\*\*13) |2\*\*1 - 2\*\*14 | +|`--histogram-bounds` |`-hb` |Bound for the stride histogram (total bins = 2\*bounds+3)|2\*\*9 |2\*\*4 - 2\*\*12 | + +## Pattern Parameters: + +| Option | Short | Description | Default | Range | +|:---:|:---:|:---|:---:|:---:| +|`--min-accesses-threshold` |`-mat`|Min number of accesses a pattern must have to be considered|2\*\*10 |2\*\*4 - 2\*\*14 | +|`--unique-distances-threshold`|`-udt`|Min number of unique strides, used to identify complex patterns|15 (≈2\*\*4) |2\*\*0 - 2\*\*7 | +|`--out-threshold` |`-ot` |Percentage (0.0-1.0) of accesses "out of bounds" to consider a pattern as complex|0.5 |0.0 - 1.0 | +|`--top-patterns` |`-tp` |Number of top patterns to keep |10 |1 - 100 | +|`--initial-pattern-size` |`-ips`|Min *initial* size (indices) to allocate for a pattern|2\*\*15 |2\*\*10 - 2\*\*31| +|`--max-pattern-size` |`-mps`|Max size (indices) a pattern can grow to (prevents OOM)|2\*\*30|2\*\*10 - 2\*\*31| +|`--max-line-length` |`-mll`|Max buffer size (chars) for reading source code lines (addr2line)|2\*\*10 |2\*\*10 - 2\*\*13 | + +# Other flags: + +| Option | Explanation | +|:---:|:---| +| -nv | Interpret trace as NVBit (CUDA) trace | +| -v | Verbose logging | +| -ow | Overwrite outputs if present | +| -h | Show usage instructions | + +# Invocation: + +## For Pin/DynamoRIO traces: + +``` +./gs_patterns +``` + +## For NVBit (CUDA kernels): + +``` +./gs_patterns -nv +``` + +# Examples: + +``` +./gs_patterns app.pin.trace.gz ./app_with_symbols + +./gs_patterns kernel.nvbit.trace.gz -nv +``` + +# Notes: + +• Trace file must be gzipped (`.gz`) — not `tar.gz`. + +• For Pin/DynamoRIO, the `` should be compiled with symbols (e.g., `-g`). + +• For NVBit, compile CUDA kernels with line info (`--generate-line-info`). + +• See `nvbit_tracing/README.md` for extracting compatible CUDA traces. diff --git a/config.cpp b/config.cpp new file mode 100644 index 0000000..5627417 --- /dev/null +++ b/config.cpp @@ -0,0 +1,361 @@ +#include +#include +#include + +#include "config.h" +#include "errors.h" + +namespace gs_patterns +{ + // setters + void Config::set_per_sample(size_t per_sample) + { + if (per_sample < MIN_PER_SAMPLE || per_sample > MAX_PER_SAMPLE ) { + throw GSError("Invalid per_sample"); + } + _per_sample = per_sample; + } + + void Config::set_cache_line_size(size_t cache_line_size) + { + if (cache_line_size < MIN_CACHE_LINE_SIZE || cache_line_size > MAX_CACHE_LINE_SIZE ) { + throw gs_patterns::GSError("Invalid cache_line_size"); + } + // if (!isPowerOf2(cache_line_size)) { + // throw GSError("cache_line_size must be power of 2"); + // } + _cache_line_size = cache_line_size; + } + + void Config::set_trace_buffer_size(size_t trace_buffer_size) + { + if (trace_buffer_size < MIN_TRACE_BUFFER_SIZE || trace_buffer_size > MAX_TRACE_BUFFER_SIZE ) { + throw GSError("Invalid trace_buffer_size"); + } + _trace_buffer_size = trace_buffer_size; + } + + void Config::set_iaddr_per_window(size_t iaddr_per_window) + { + if (iaddr_per_window < MIN_IADDR_PER_WINDOW || iaddr_per_window > MAX_IADDR_PER_WINDOW ) { + throw GSError("Invalid iaddr_per_window"); + } + _iaddr_per_window = iaddr_per_window; + } + + void Config::set_max_gather_scatter(size_t max_gather_scatter) + { + if (max_gather_scatter < MIN_MAX_GATHER_SCATTER || max_gather_scatter > MAX_MAX_GATHER_SCATTER ) { + throw GSError("Invalid max_gather_scatter"); + } + _max_gather_scatter = max_gather_scatter; + } + + void Config::set_histogram_bounds(size_t histogram_bounds) + { + if (histogram_bounds < MIN_HISTOGRAM_BOUNDS || histogram_bounds > MAX_HISTOGRAM_BOUNDS ) { + throw GSError("Invalid histogram_bounds"); + } + _histogram_bounds = histogram_bounds; + } + + void Config::set_min_accesses_threshold(size_t min_accesses_threshold) + { + if (min_accesses_threshold < MIN_ACCESSES_THRESHOLD || min_accesses_threshold > MAX_ACCESSES_THRESHOLD ) { + throw GSError("Invalid min_accesses_threshold"); + } + _min_accesses_threshold = min_accesses_threshold; + } + + void Config::set_unique_distances_threshold(size_t unique_distances_threshold) + { + if (unique_distances_threshold < MIN_UNIQUE_DISTANCES_THRESHOLD || unique_distances_threshold > MAX_UNIQUE_DISTANCES_THRESHOLD ) { + throw GSError("Invalid unique_distances_threshold"); + } + _unique_distances_threshold = unique_distances_threshold; + } + + void Config::set_out_threshold(double out_threshold) + { + if (out_threshold < MIN_OUT_THRESHOLD || out_threshold > MAX_OUT_THRESHOLD ) { + throw GSError("Invalid out_threshold"); + } + _out_threshold = out_threshold; + } + + void Config::set_top_patterns(size_t top_patterns) + { + if (top_patterns < MIN_TOP_PATTERNS || top_patterns > MAX_TOP_PATTERNS ) { + throw GSError("Invalid top_patterns"); + } + _top_patterns = top_patterns; + } + + void Config::set_initial_pattern_size(size_t initial_pattern_size) + { + if (initial_pattern_size < MIN_PATTERN_SIZE || initial_pattern_size > MAX_PATTERN_SIZE ) { + throw GSError("Invalid initial_pattern_size"); + } + _initial_pattern_size = initial_pattern_size; + } + + void Config::set_max_pattern_size(size_t max_pattern_size) + { + if (max_pattern_size < MIN_PATTERN_SIZE || max_pattern_size > MAX_PATTERN_SIZE ) { + throw GSError("Invalid max_pattern_size"); + } + _max_pattern_size = max_pattern_size; + } + + void Config::set_max_line_length(size_t max_line_length) + { + if (max_line_length < MIN_MAX_LINE_LENGTH || max_line_length > MAX_MAX_LINE_LENGTH ) { + throw GSError("Invalid max_line_length"); + } + _max_line_length = max_line_length; + } + void Config::parseArgs(int argc, char* argv[]) + { + _verbose = false; + _use_gs_nv = false; + _one_warp = false; + _positional_args.clear(); + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + // Ignore empty tokens + if (arg.empty()) { + continue; + } + + // End-of-options marker + if (arg == "--") { + // Add all subsequent args as positional + for (int j = i + 1; j < argc; ++j) + { + _positional_args.push_back(argv[j]); + } + break; // stop parsing + } + + // if it doesn't start with "-", it's positional + if (arg[0] != '-') + { + _positional_args.push_back(arg); + continue; + } + + // Treat lone "-" as a positional stdin/stdout placeholder + if (arg == "-") { + // could add it as positional if want to support stdin + // _positional_args.push_back(arg); + continue; + } + + // operational flags (no value) + if (arg == "-nv") + { + _use_gs_nv = true; + continue; + } + if (arg == "-v") + { + _verbose = true; + continue; + } + if (arg == "-ow") + { + _one_warp = true; + continue; + } + if (arg == "help" || arg == "-h") + { + printHelp(argv[0]); + std::exit(0); + } + + // options with value + std::string value; + std::size_t eq = arg.find('='); + if (eq != std::string::npos) { + // '=' was found + value = arg.substr(eq + 1); + arg.erase(eq); // keep only the option and value for the ladder comparison + if (value.empty()) { + throw GSError("Missing value for " + arg); + } + } else { + // '=' wasn't found + if (i + 1 >= argc) { + // end of string + throw GSError("Missing value for " + arg); + } + // using space instead of '=' + value = argv[++i]; + } + + try { + if (arg == "--per-sample" || arg == "-ps") { + set_per_sample(std::stoull(value)); + } + else if (arg == "--cache-line-size" || arg == "-cls") { + set_cache_line_size(std::stoull(value)); + } + else if (arg == "--trace-buffer-size" || arg == "-tbs") { + set_trace_buffer_size(std::stoull(value)); + } + else if (arg == "--iaddr-per-window" || arg == "-iw") { + set_iaddr_per_window(std::stoull(value)); + } + else if (arg == "--max-gather-scatter" || arg == "-mgs") { + set_max_gather_scatter(std::stoull(value)); + } + else if (arg == "--histogram-bounds" || arg == "-hb") { + set_histogram_bounds(std::stoull(value)); + } + else if (arg == "--min-accesses-threshold" || arg == "-mat") { + set_min_accesses_threshold(std::stoull(value)); + } + else if (arg == "--unique-distances-threshold" || arg == "-udt") { + set_unique_distances_threshold(std::stoull(value)); + } + else if (arg == "--out-threshold" || arg == "-ot") { + set_out_threshold(std::stod(value)); + } + else if (arg == "--top-patterns" || arg == "-tp") { + set_top_patterns(std::stoull(value)); + } + else if (arg == "--initial-pattern-size" || arg == "-ips") { + set_initial_pattern_size(std::stoull(value)); + } + else if (arg == "--max-pattern-size" || arg == "-mps") { + set_max_pattern_size(std::stoull(value)); + } + else if (arg == "--max-line-length" || arg == "-mll") { + set_max_line_length(std::stoull(value)); + } + else { + throw GSError("Unknown configuration argument: " + arg); + } + } catch (const std::invalid_argument&) { + throw GSError("Invalid value for " + arg + ": " + value); + } catch (const std::out_of_range&) { + throw GSError("Value out of range for " + arg + ": " + value); + } + } + } + + void Config::printHelp(const char* program_name) + { + const Config& cfg = get_instance(); + constexpr int option_width = 45; + + // --- Usage header (generic) --- + std::cout << "\nUsage:\n" + << " " << program_name << " [options] [|-nv]\n\n" + << " Options accept both '--opt value' and '--opt=value' formats.\n" + << " Argument parsing stops at '--'.\n\n"; + // Could add, if implemented in the future + // A lone '-' is treated as stdin/stdout. + // current parser skips '-', but treating it as stdin/stdout is not yet implemented + + // (from README) --- + std::cout << "Invocation:\n" + << " For Pin/DynamoRIO traces:\n" + << " " << program_name << " \n" + << " For NVBit (CUDA kernels):\n" + << " " << program_name << " -nv\n\n"; + + // --- Examples --- + std::cout << "Examples:\n" + << " " << program_name << " app.pin.trace.gz ./app_with_symbols\n" + << " " << program_name << " kernel.nvbit.trace.gz -nv\n\n"; + + // --- Notes / prerequisites --- + std::cout << "Notes:\n" + << " • Trace file must be gzipped ('.gz') — not 'tar.gz'.\n" + << " • For Pin/DynamoRIO, the should be compiled with symbols (e.g., -g).\n" + << " • For NVBit, compile CUDA kernels with line info (--generate-line-info).\n" + << " • See nvbit_tracing/README.md for extracting compatible CUDA traces.\n\n"; + + // --- Configuration Options --- + std::cout << "Configuration Options:\n\n"; + std::cout << "Triggers:\n"; + std::cout << " " << std::left << std::setw(option_width) << "--per-sample, -ps " + << "Memory operations before printing a progress dot (default: " << cfg.get_per_sample() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_PER_SAMPLE << ", " << MAX_PER_SAMPLE << "]\n\n"; + + std::cout << "Info Parameters:\n"; + std::cout << " " << std::left << std::setw(option_width) << "--cache-line-size, -cls " + << "Cache line size in bytes (default: " << cfg.get_cache_line_size() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_CACHE_LINE_SIZE << ", " << MAX_CACHE_LINE_SIZE << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--trace-buffer-size, -tbs " + << "Trace buffer size (default: " << cfg.get_trace_buffer_size() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_TRACE_BUFFER_SIZE << ", " << MAX_TRACE_BUFFER_SIZE << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--iaddr-per-window, -iw " + << "Instruction window size (default: " << cfg.get_iaddr_per_window() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_IADDR_PER_WINDOW << ", " << MAX_IADDR_PER_WINDOW << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--max-gather-scatter, -mgs " + << "Max number of unique gather/scatter iaddrs to track (default: " << cfg.get_max_gather_scatter() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_MAX_GATHER_SCATTER << ", " << MAX_MAX_GATHER_SCATTER << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--histogram-bounds, -hb " + << "Bound for the stride histogram (default: " << cfg.get_histogram_bounds() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_HISTOGRAM_BOUNDS << ", " << MAX_HISTOGRAM_BOUNDS << "]\n\n"; + + std::cout << "Pattern Parameters:\n"; + std::cout << " " << std::left << std::setw(option_width) << "--min-accesses-threshold, -mat " + << "Min number of accesses a pattern must have to be considered (default: " << cfg.get_min_accesses_threshold() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_ACCESSES_THRESHOLD << ", " << MAX_ACCESSES_THRESHOLD << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--unique-distances-threshold, -udt " + << "Min number of unique strides a pattern must have to be considered (default: " << cfg.get_unique_distances_threshold() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_UNIQUE_DISTANCES_THRESHOLD << ", " << MAX_UNIQUE_DISTANCES_THRESHOLD << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--out-threshold, -ot " + << "Percentage (0.0-1.0) of accesses out of bounds to consider a pattern as complex (default: " << cfg.get_out_threshold() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_OUT_THRESHOLD << ", " << MAX_OUT_THRESHOLD << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--top-patterns, -tp " + << "Number of top patterns to keep (default: " << cfg.get_top_patterns() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_TOP_PATTERNS << ", " << MAX_TOP_PATTERNS << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--initial-pattern-size, -ips " + << "Min initial size (indices) to allocate for a pattern (default: " << cfg.get_initial_pattern_size() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_PATTERN_SIZE << ", " << MAX_PATTERN_SIZE << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--max-pattern-size, -mps " + << "Maximum size (indices) a pattern can grow to (default: " << cfg.get_max_pattern_size() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_PATTERN_SIZE << ", " << MAX_PATTERN_SIZE << "]\n"; + std::cout << " " << std::left << std::setw(option_width) << "--max-line-length, -mll " + << "Max buffer size (chars) for reading source code lines (default: " << cfg.get_max_line_length() << ")\n"; + std::cout << " " << std::left << std::setw(option_width) << "" + << "Range: [" << MIN_MAX_LINE_LENGTH << ", " << MAX_MAX_LINE_LENGTH << "]\n\n"; + + // std::cout << "Note: Most numeric values must be powers of 2.\n" + // << " Exceptions: out-threshold, num-unique-distances, and top-patterns.\n\n"; + + // --- How it works --- + std::cout << "How gs_patterns works:\n" + << " • Detects gather/scatter (g/s) by finding repeated instruction addresses (loops)\n" + << " that correspond to memory instructions (scalar or vector).\n" + << " • Pass 1: ranks top g/s instructions and filters out trivial access patterns.\n" + << " • Pass 2: focuses on those top g/s; records normalized address array indices\n" + << " to a binary file and a spatter YAML file.\n\n"; + + // --- Quick flags reminder (non-config) --- + std::cout << "Other flags:\n" + << " -nv Interpret trace as NVBit (CUDA) trace.\n" + << " -v Verbose logging.\n" + << " -ow Overwrite outputs if present.\n\n"; + } + + +} // namespace gs_patterns \ No newline at end of file diff --git a/config.h b/config.h new file mode 100644 index 0000000..43557a9 --- /dev/null +++ b/config.h @@ -0,0 +1,148 @@ +# pragma once + +// symbol lookup options +#if !defined(SYMBOLS_ONLY) +#define SYMBOLS_ONLY 1 //Filter out instructions that have no symbol +#include +#endif + +#if !defined(VBITS) +# define VBITS (512L) +# define VBYTES (VBITS/8) +#endif + +namespace gs_patterns +{ + class Config + { + public: + Config(const Config&) = delete; + Config& operator=(const Config&) = delete; + + static Config& get_instance() + { + static Config instance; + return instance; + } + + void parseArgs(int argc, char* argv[]); + void printHelp(const char* program_name); + + + // expose compile-time choices for run time inspection + static constexpr size_t vector_bits = VBITS; + static constexpr size_t vector_bytes = VBYTES; + static constexpr bool symbols_only = (SYMBOLS_ONLY != 0); + + // getters implemented in line for performance + [[nodiscard]] bool get_verbose() const {return _verbose;} + [[nodiscard]] bool get_use_gs_nv() const {return _use_gs_nv;} + [[nodiscard]] bool get_one_warp() const {return _one_warp;} + [[nodiscard]] const std::vector& get_positional_args() const {return _positional_args;} + [[nodiscard]] size_t get_per_sample() const { return _per_sample; } + [[nodiscard]] size_t get_cache_line_size() const { return _cache_line_size; } + [[nodiscard]] size_t get_trace_buffer_size() const { return _trace_buffer_size; } + [[nodiscard]] size_t get_iaddr_per_window() const { return _iaddr_per_window; } + [[nodiscard]] size_t get_max_gather_scatter() const { return _max_gather_scatter; } + [[nodiscard]] size_t get_histogram_bounds() const { return _histogram_bounds; } + [[nodiscard]] size_t get_histogram_bounds_alloc() const { return 2 * _histogram_bounds + 3; } + [[nodiscard]] size_t get_min_accesses_threshold() const { return _min_accesses_threshold; } + [[nodiscard]] size_t get_unique_distances_threshold() const { return _unique_distances_threshold; } + [[nodiscard]] double get_out_threshold() const { return _out_threshold; } + [[nodiscard]] size_t get_top_patterns() const { return _top_patterns; } + [[nodiscard]] size_t get_initial_pattern_size() const { return _initial_pattern_size; } + [[nodiscard]] size_t get_max_pattern_size() const { return _max_pattern_size; } + [[nodiscard]] size_t get_max_line_length() const { return _max_line_length; } + + // setters + void set_per_sample(size_t per_sample); + void set_cache_line_size(size_t cache_line_size); + void set_trace_buffer_size(size_t trace_buffer_size); + void set_iaddr_per_window(size_t iaddr_per_window); + void set_max_gather_scatter(size_t max_gather_scatter); + void set_histogram_bounds(size_t histogram_bounds); + void set_min_accesses_threshold(size_t min_accesses_threshold); + void set_unique_distances_threshold(size_t unique_distances_threshold); + void set_out_threshold(double out_threshold); + void set_top_patterns(size_t top_patterns); + void set_initial_pattern_size(size_t initial_pattern_size); + void set_max_pattern_size(size_t max_pattern_size); + void set_max_line_length(size_t max_line_length); + + private: + Config() = default; + + // static bool isPowerOf2(const size_t n) + // { + // // positive powers of two are always like: 1000... + // // if n = 1000...; (n - 1) = 0111...; AND of both is zero + // return n > 0 && (n & (n - 1)) == 0; + // } + // existing options + bool _verbose = false; + bool _use_gs_nv = false; + bool _one_warp = false; + std::vector _positional_args; + + // triggers + size_t _per_sample = 10000000; + + // info + size_t _cache_line_size = 64; + size_t _trace_buffer_size = 1LL << 10; //trace reading buffer size + size_t _iaddr_per_window = 1024; //number of iaddrs per window + size_t _max_gather_scatter = 8096; //max number for gathers and scatters + size_t _histogram_bounds = 512; //histogram positive max + + // patterns + size_t _min_accesses_threshold = 1024; //Threshold for number of accesses + size_t _unique_distances_threshold = 15; //Threshold for number of unique distances + double _out_threshold = 0.5; //Threshold for percentage of distances at boundaries of histogram + size_t _top_patterns = 10; //Final gather / scatters to keep + size_t _initial_pattern_size = 1 << 15; + size_t _max_pattern_size = 1 << 30; //Max number of indices recorded per gather/scatter + + size_t _max_line_length = 1024; + + + // -- Validation boundaries -- + // Triggers + static constexpr size_t MIN_PER_SAMPLE = 1LL << 10; // Minimum number of memory operations before printing a progress dot. + static constexpr size_t MAX_PER_SAMPLE = 1LL << 30; // Maximum number of memory operations before printing a progress dot (avoids impractically large values). + + // Info + static constexpr size_t MIN_CACHE_LINE_SIZE = 16; // Smallest cache line size (in bytes) to consider. + static constexpr size_t MAX_CACHE_LINE_SIZE = 512; // Largest cache line size (in bytes) to consider (e.g., for specialized hardware). + + static constexpr size_t MIN_TRACE_BUFFER_SIZE = 1; // Smallest number of trace entries to read at once (at least 1). + static constexpr size_t MAX_TRACE_BUFFER_SIZE = 1LL << 20; // Largest number of trace entries to read at once (~1M), limits memory for the read buffer. + + static constexpr size_t MIN_IADDR_PER_WINDOW = 16; // Smallest "window" of unique instruction addresses to analyze for 1st pass. + static constexpr size_t MAX_IADDR_PER_WINDOW = 1LL << 12; // Largest instruction window (4,096), prevents extreme 1st pass slowdown. + + static constexpr size_t MIN_MAX_GATHER_SCATTER = 2; // Minimum number of unique gather/scatter iaddrs to track (at least 2). + static constexpr size_t MAX_MAX_GATHER_SCATTER = 1LL << 14; // Maximum number of unique gather/scatter iaddrs (16,384) to track across the whole trace. + + static constexpr size_t MIN_HISTOGRAM_BOUNDS = 16; // Smallest positive/negative bound for the stride histogram. + static constexpr size_t MAX_HISTOGRAM_BOUNDS = 1LL << 12; // Largest bound (4,096), controls memory/size of the stride histogram (total bins = 2*bounds+3). + + // Patterns + static constexpr size_t MIN_ACCESSES_THRESHOLD = 16; // Minimum number of accesses a pattern must have to be considered. + static constexpr size_t MAX_ACCESSES_THRESHOLD = 1LL << 14; // Maximum number of accesses (16,384) to require for a pattern. + + static constexpr size_t MIN_UNIQUE_DISTANCES_THRESHOLD = 1; // Minimum number of unique strides (distances) to trigger filtering (at least 1). + static constexpr size_t MAX_UNIQUE_DISTANCES_THRESHOLD = 128; // Maximum number of unique strides, used to identify complex patterns. + + static constexpr double MIN_OUT_THRESHOLD = 0.0; // Minimum percentage (0%) of accesses allowed "out of bounds" of the histogram. + static constexpr double MAX_OUT_THRESHOLD = 1.0; // Maximum percentage (100%) of accesses allowed "out of bounds". + + static constexpr size_t MIN_TOP_PATTERNS = 1; // Minimum number of top gather/scatter patterns to save (at least 1). + static constexpr size_t MAX_TOP_PATTERNS = 100; // Maximum number of top gather/scatter patterns to save. + + static constexpr size_t MIN_PATTERN_SIZE = 1LL << 10; // Minimum *initial* size (1,024 indices) to allocate for storing a pattern. + static constexpr size_t MAX_PATTERN_SIZE = 1LL << 31; // Absolute *maximum* size (2B indices) a pattern can grow to, prevents OOM errors. + + static constexpr size_t MIN_MAX_LINE_LENGTH = 1LL << 10; // Minimum buffer size (in chars) for reading source code lines (addr2line). + static constexpr size_t MAX_MAX_LINE_LENGTH = 1LL << 13; // Maximum buffer size (8,192 chars) for reading source code lines. + }; +} \ No newline at end of file diff --git a/errors.h b/errors.h new file mode 100644 index 0000000..90e11ec --- /dev/null +++ b/errors.h @@ -0,0 +1,38 @@ +#pragma once +#include +#include + +namespace gs_patterns +{ + class GSError : public std::exception + { + public: + explicit GSError (std::string reason) : _reason(std::move(reason)) { } + ~GSError() override = default; + + [[nodiscard]] const char * what() const noexcept override { return _reason.c_str(); } + private: + std::string _reason; + }; + + class GSFileError : public GSError + { + public: + explicit GSFileError (std::string reason) : GSError(std::move(reason)) { } + ~GSFileError() override = default; + }; + + class GSDataError : public GSError + { + public: + explicit GSDataError (std::string reason) : GSError(std::move(reason)) { } + ~GSDataError() override = default; + }; + + class GSAllocError : public GSError + { + public: + explicit GSAllocError (std::string reason) : GSError(std::move(reason)) { } + ~GSAllocError() override = default; + }; +} \ No newline at end of file diff --git a/gs_patterns.h b/gs_patterns.h index f74b67c..e315440 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -4,33 +4,21 @@ #include #include #include +#include #include +#include +#include + +#include "errors.h" +#include "config.h" //symbol lookup options #if !defined(SYMBOLS_ONLY) #define SYMBOLS_ONLY 1 //Filter out instructions that have no symbol #endif -//triggers -#define PERSAMPLE 10000000 - //info -#define CLSIZE (64) //cacheline bytes -#define NBUFS (1LL<<10) //trace reading buffer size -#define IWINDOW (1024) //number of iaddrs per window -#define NGS (8096) //max number for gathers and scatters -#define OBOUNDS (512) //histogram positive max -#define OBOUNDS_ALLOC (2*OBOUNDS + 3) - -//patterns -#define USTRIDES 1024 //Threshold for number of accesses -#define NSTRIDES 15 //Threshold for number of unique distances -#define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram -#define NTOP (10) //Final gather / scatters to keep -#define INITIAL_PSIZE (1<<15) -#define MAX_PSIZE (1<<30) //Max number of indices recorded per gather/scatter - -#define MAX_LINE_LENGTH 1024 +// #define CLSIZE (64) //cacheline bytes - Unused - available via Config::get_instance().get_cache_line_size() #if !defined(VBITS) # define VBITS (512L) @@ -43,62 +31,32 @@ namespace gs_patterns typedef enum { GATHER=0, SCATTER } mem_access_type; typedef enum { VECTOR=0, CTA } mem_instr_type; - class GSError : public std::exception - { - public: - GSError (const std::string & reason) : _reason(reason) { } - ~GSError() {} - const char * what() const noexcept override { return _reason.c_str(); } - private: - std::string _reason; - }; - - class GSFileError : public GSError - { - public: - GSFileError (const std::string & reason) : GSError(reason) { } - ~GSFileError() {} - }; - - class GSDataError : public GSError - { - public: - GSDataError (const std::string & reason) : GSError(reason) { } - ~GSDataError() {} - }; - - class GSAllocError : public GSError - { - public: - GSAllocError (const std::string & reason) : GSError(reason) { } - ~GSAllocError() {} - }; class InstrAddrAdapter { public: - InstrAddrAdapter() { } - virtual ~InstrAddrAdapter() { } - - virtual bool is_valid() const = 0; - virtual bool is_mem_instr() const = 0; - virtual bool is_other_instr() const = 0; - virtual mem_access_type get_mem_access_type() const = 0; - virtual mem_instr_type get_mem_instr_type() const = 0; - - virtual size_t get_size() const = 0; - virtual addr_t get_base_addr() const = 0; - virtual addr_t get_address() const = 0; - virtual addr_t get_iaddr() const = 0; - virtual addr_t get_maddr() const = 0; - virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! - virtual int64_t get_max_access_size() const = 0; - - virtual bool is_gather() const + InstrAddrAdapter() = default; + virtual ~InstrAddrAdapter() = default; + + [[nodiscard]] virtual bool is_valid() const = 0; + [[nodiscard]] virtual bool is_mem_instr() const = 0; + [[nodiscard]] virtual bool is_other_instr() const = 0; + [[nodiscard]] virtual mem_access_type get_mem_access_type() const = 0; + [[nodiscard]] virtual mem_instr_type get_mem_instr_type() const = 0; + + [[nodiscard]] virtual size_t get_size() const = 0; + [[nodiscard]] virtual addr_t get_base_addr() const = 0; + [[nodiscard]] virtual addr_t get_address() const = 0; + [[nodiscard]] virtual addr_t get_iaddr() const = 0; + [[nodiscard]] virtual addr_t get_maddr() const = 0; + [[nodiscard]] virtual unsigned short get_type() const = 0; // must be 0 for GATHER, 1 for SCATTER !! + [[nodiscard]] virtual int64_t get_max_access_size() const = 0; + + [[nodiscard]] virtual bool is_gather() const { return (is_valid() && is_mem_instr() && GATHER == get_mem_access_type()) ? true : false; } - virtual bool is_scatter() const + [[nodiscard]] virtual bool is_scatter() const { return (is_valid() && is_mem_instr() && SCATTER == get_mem_access_type()) ? true : false; } virtual void output(std::ostream & os) const = 0; @@ -109,14 +67,57 @@ namespace gs_patterns class Metrics { + private: + const size_t _initial_size; + const size_t _top_patterns; + const size_t _max_gather_scatter; + const size_t _max_line_length; + const size_t _max_pattern_size; + + std::unique_ptr srcline; + + mem_access_type _mType; + public: - Metrics(mem_access_type mType) : _mType(mType), _pattern_sizes(NTOP) + // Proxy class for 2D array access + // This class is public so it can be the return type of get_srcline() + + class SrcLine2D { + char* base; + size_t max_len; + public: + SrcLine2D(char* ptr, size_t ml) : base(ptr), max_len(ml) {} + + char* get(size_t j) { + return base + (j * max_len); + } + }; + explicit Metrics(mem_access_type mType, + size_t initial_size = Config::get_instance().get_initial_pattern_size(), + size_t top_patterns = Config::get_instance().get_top_patterns(), + size_t max_gather_scatter = Config::get_instance().get_max_gather_scatter(), + size_t max_line_length = Config::get_instance().get_max_line_length(), + size_t max_pattern_size = Config::get_instance().get_max_pattern_size() + ) + : _initial_size{initial_size}, + _top_patterns{top_patterns}, + _max_gather_scatter{max_gather_scatter}, + _max_line_length{max_line_length}, + _max_pattern_size{max_pattern_size}, + srcline(std::make_unique(2 * _max_gather_scatter * _max_line_length)), + _mType(mType), + offset(std::make_unique(_top_patterns)), + size(std::make_unique(_top_patterns)), + tot(std::make_unique(_top_patterns)), + top(std::make_unique(_top_patterns)), + top_idx(std::make_unique(_top_patterns)), + patterns(_top_patterns) { try { - for (int j = 0; j < NTOP; j++) { - patterns[j] = new int64_t[INITIAL_PSIZE]; - _pattern_sizes[j] = INITIAL_PSIZE; + for (int j = 0; j < _top_patterns; j++) + { + patterns[j].resize(_initial_size); } } catch (const std::exception & ex) @@ -125,33 +126,22 @@ namespace gs_patterns } } - ~Metrics() - { - for (int i = 0; i < NTOP; i++) { - delete [] patterns[i]; - } + ~Metrics() = default; - delete [] srcline; - } - - size_t get_pattern_size(int pattern_index) { - return _pattern_sizes[pattern_index]; + [[nodiscard]] size_t get_pattern_size(int pattern_index) const + { + return patterns[pattern_index].size(); } bool grow(int pattern_index) { try { - size_t old_size = _pattern_sizes[pattern_index]; + size_t old_size = patterns[pattern_index].size(); size_t new_size = old_size * 2; - if (new_size > MAX_PSIZE) { + if (new_size > _max_pattern_size) { return false; } - int64_t *tmp = new int64_t[new_size]; - memcpy(tmp, patterns[pattern_index], old_size * sizeof(int64_t)); - - delete [] patterns[pattern_index]; - patterns[pattern_index] = tmp; - _pattern_sizes[pattern_index] = new_size; + patterns[pattern_index].resize(new_size); return true; } @@ -163,12 +153,14 @@ namespace gs_patterns Metrics(const Metrics &) = delete; Metrics & operator=(const Metrics & right) = delete; - std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } - std::string getName() { return !_mType ? "Gather" : "Scatter"; } - std::string getShortName() { return !_mType ? "G" : "S"; } - std::string getShortNameLower() { return !_mType ? "g" : "s"; } + [[nodiscard]] std::string type_as_string() const { return !_mType ? "GATHER" : "SCATTER"; } + [[nodiscard]] std::string getName() const { return !_mType ? "Gather" : "Scatter"; } + [[nodiscard]] std::string getShortName() const { return !_mType ? "G" : "S"; } + [[nodiscard]] std::string getShortNameLower() const { return !_mType ? "g" : "s"; } - auto get_srcline() { return srcline[_mType]; } + SrcLine2D get_srcline() { + return SrcLine2D(srcline.get() + (_mType * _max_gather_scatter * _max_line_length), _max_line_length); + } int ntop = 0; int64_t iaddrs_nosym = 0; @@ -176,45 +168,44 @@ namespace gs_patterns int64_t iaddrs_sym = 0; int64_t indices_sym = 0; double cnt = 0.0; - int offset[NTOP] = {0}; - int size[NTOP] = {0}; - - addr_t tot[NTOP] = {0}; - addr_t top[NTOP] = {0}; - addr_t top_idx[NTOP] = {0}; - - int64_t* patterns[NTOP] = {0}; - private: - char (*srcline)[NGS][MAX_LINE_LENGTH] = new char[2][NGS][MAX_LINE_LENGTH]; - - mem_access_type _mType; + std::unique_ptr offset; + std::unique_ptr size; - std::vector _pattern_sizes; + std::unique_ptr tot; + std::unique_ptr top; + std::unique_ptr top_idx; + std::vector> patterns; }; class InstrInfo { public: - InstrInfo(mem_access_type mType) : _mType(mType) { } - ~InstrInfo() { - delete [] iaddrs; - delete [] icnt; - delete [] occ; - } + explicit InstrInfo( + mem_access_type mType, + size_t max_gather_scatter = Config::get_instance().get_max_gather_scatter()) + : + _mType(mType), + _max_gather_scatter(max_gather_scatter), + _iaddrs(std::make_unique(2 * _max_gather_scatter)), + _icnt(std::make_unique(2 * _max_gather_scatter)), + _occ(std::make_unique(2 * _max_gather_scatter)) + {} + ~InstrInfo() = default; InstrInfo(const InstrInfo &) = delete; InstrInfo & operator=(const InstrInfo & right) = delete; - addr_t* get_iaddrs() { return iaddrs[_mType]; } - int64_t* get_icnt() { return icnt[_mType]; } - int64_t* get_occ() { return occ[_mType]; } + addr_t* get_iaddrs() { return &_iaddrs[_mType * _max_gather_scatter]; } + int64_t* get_icnt() { return &_icnt[_mType * _max_gather_scatter]; } + int64_t* get_occ() { return &_occ[_mType * _max_gather_scatter]; } private: - addr_t (*iaddrs)[NGS] = new addr_t[2][NGS]; - int64_t (*icnt)[NGS] = new int64_t[2][NGS]; //vector instances - int64_t (*occ)[NGS] = new int64_t[2][NGS]; //load/store instances + const size_t _max_gather_scatter; + std::unique_ptr _iaddrs; + std::unique_ptr _icnt; //vector instances + std::unique_ptr _occ; //load/store instances mem_access_type _mType; }; @@ -244,42 +235,38 @@ namespace gs_patterns class InstrWindow { public: - InstrWindow() { + explicit InstrWindow(size_t iaddr_per_window = Config::get_instance().get_iaddr_per_window()) + : _iaddr_per_window(iaddr_per_window), + _w_iaddrs{std::make_unique(2 * _iaddr_per_window)}, + _w_bytes {std::make_unique(2 * _iaddr_per_window)}, + _w_maddr {std::make_unique(2 * _iaddr_per_window * MAX_ACCESS_SIZE)}, + _w_cnt {std::make_unique(2 * _iaddr_per_window)} + { // First dimension is 0=GATHER/1=SCATTER - _w_iaddrs = new int64_t[2][IWINDOW]; - _w_bytes = new int64_t[2][IWINDOW]; - _w_maddr = new int64_t[2][IWINDOW][MAX_ACCESS_SIZE]; - _w_cnt = new int64_t[2][IWINDOW]; - init(); } - virtual ~InstrWindow() { - delete [] _w_iaddrs; - delete [] _w_bytes; - delete [] _w_maddr; - delete [] _w_cnt; - } + virtual ~InstrWindow() = default; void init() { for (int w = 0; w < 2; w++) { - for (int i = 0; i < IWINDOW; i++) { - _w_iaddrs[w][i] = -1; - _w_bytes[w][i] = 0; - _w_cnt[w][i] = 0; + for (int i = 0; i < _iaddr_per_window; i++) { + w_iaddrs(w, i) = -1; + w_bytes(w, i) = 0; + w_cnt(w, i) = 0; for (uint64_t j = 0; j < MAX_ACCESS_SIZE; j++) - _w_maddr[w][i][j] = -1; + w_maddr(w, i, j) = -1; } } } void reset(int w) { - for (int i = 0; i < IWINDOW; i++) { - _w_iaddrs[w][i] = -1; - _w_bytes[w][i] = 0; - _w_cnt[w][i] = 0; + for (int i = 0; i < _iaddr_per_window; i++) { + w_iaddrs(w, i) = -1; + w_bytes(w, i) = 0; + w_cnt(w, i) = 0; for (uint64_t j = 0; j < MAX_ACCESS_SIZE; j++) - _w_maddr[w][i][j] = -1; + w_maddr(w, i, j) = -1; } } @@ -292,34 +279,48 @@ namespace gs_patterns InstrWindow(const InstrWindow &) = delete; InstrWindow & operator=(const InstrWindow & right) = delete; - int64_t & w_iaddrs(int32_t i, int32_t j) { return _w_iaddrs[i][j]; } - int64_t & w_bytes(int32_t i, int32_t j) { return _w_bytes[i][j]; } - int64_t & w_maddr(int32_t i, int32_t j, int32_t k) { return _w_maddr[i][j][k]; } - int64_t & w_cnt(int32_t i, int32_t j) { return _w_cnt[i][j]; } + int64_t & w_iaddrs(int32_t i, int32_t j) + { + return _w_iaddrs[i * _iaddr_per_window + j]; + } + int64_t & w_bytes(int32_t i, int32_t j) + { + return _w_bytes[i * _iaddr_per_window + j]; + } + int64_t & w_maddr(int32_t i, int32_t j, int32_t k) + { + return _w_maddr[i * _iaddr_per_window * MAX_ACCESS_SIZE + j * MAX_ACCESS_SIZE + k]; + } + int64_t & w_cnt(int32_t i, int32_t j) + { + return _w_cnt[i * _iaddr_per_window + j]; + } + [[nodiscard]] size_t get_window_size() const { return _iaddr_per_window; } addr_t & get_iaddr() { return iaddr; } int64_t & get_maddr_prev() { return maddr_prev; } int64_t & get_maddr() { return maddr; } private: + const size_t _iaddr_per_window; // First dimension is 0=GATHER/1=SCATTER - int64_t (*_w_iaddrs)[IWINDOW]; - int64_t (*_w_bytes)[IWINDOW]; - int64_t (*_w_maddr)[IWINDOW][MAX_ACCESS_SIZE]; - int64_t (*_w_cnt)[IWINDOW]; + std::unique_ptr _w_iaddrs; + std::unique_ptr _w_bytes; + std::unique_ptr _w_maddr; + std::unique_ptr _w_cnt; // State which must be carried with each call to handle a trace - addr_t iaddr; - int64_t maddr_prev; - int64_t maddr; + addr_t iaddr = -1; + int64_t maddr_prev = -1; + int64_t maddr = -1; }; template class MemPatterns { public: - MemPatterns() { } - virtual ~MemPatterns() { }; + MemPatterns() = default; + virtual ~MemPatterns() = default; MemPatterns(const MemPatterns &) = delete; MemPatterns & operator=(const MemPatterns &) = delete; diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index b9b0354..36134f2 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -7,6 +7,7 @@ #include "utils.h" #include "gs_patterns.h" +#include "config.h" namespace gs_patterns { @@ -14,43 +15,44 @@ namespace gs_patterns_core { using namespace gs_patterns; - void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr) + void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr, size_t max_line_length) { - char path[MAX_LINE_LENGTH]; - char cmd[MAX_LINE_LENGTH]; + auto path = std::make_unique(max_line_length); + auto cmd = std::make_unique(max_line_length); + FILE *fp; - sprintf(cmd, "addr2line -e %s 0x%lx", binary.c_str(), iaddr); + snprintf(cmd.get(), max_line_length, "addr2line -e %s 0x%lx", binary.c_str(), iaddr); /* Open the command for reading. */ - fp = popen(cmd, "r"); + fp = popen(cmd.get(), "r"); if (NULL == fp) { throw GSError("Failed to run command"); } /* Read the output a line at a time - output it. */ - while (fgets(path, sizeof(path), fp) != NULL) { - strcpy(source_line, path); + while (fgets(path.get(), static_cast(max_line_length), fp) != NULL) { + strcpy(source_line, path.get()); source_line[strcspn(source_line, "\n")] = 0; } /* close */ pclose(fp); + } - return; - } - - void create_metrics_file(FILE * fp, FILE * fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) + void create_metrics_file(FILE * fp, FILE * fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter, + size_t unique_distances_threshold, double out_threshold, size_t min_accesses_threshold, size_t histogram_bounds, + size_t histogram_bounds_alloc) { int i = 0; int j = 0; //Create stride histogram and create spatter int sidx; - int firstgs = 1; + int firstgs = 1; int unique_strides; - int64_t hbin = 0; - int64_t n_stride[OBOUNDS_ALLOC]; + int64_t hbin = 0; + auto n_stride = std::make_unique(histogram_bounds_alloc); double outbounds; if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); @@ -62,26 +64,32 @@ namespace gs_patterns_core printf("***************************************************************************************\n"); unique_strides = 0; - for (j = 0; j < OBOUNDS_ALLOC; j++) + for (j = 0; j < histogram_bounds_alloc; j++) n_stride[j] = 0; for (j = 1; j < target_metrics.offset[i]; j++) { - sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + OBOUNDS + 1; + sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + histogram_bounds + 1; sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > OBOUNDS_ALLOC - 1) ? OBOUNDS_ALLOC - 1 : sidx; + sidx = (sidx > histogram_bounds_alloc - 1) ? histogram_bounds_alloc - 1 : sidx; n_stride[sidx]++; } - for (j = 0; j < OBOUNDS_ALLOC; j++) { + for (j = 0; j < histogram_bounds_alloc; j++) { if (n_stride[j] > 0) { unique_strides++; } } - outbounds = (double) (n_stride[0] + n_stride[OBOUNDS_ALLOC-1]) / (double) target_metrics.offset[i]; + outbounds = (double) (n_stride[0] + n_stride[histogram_bounds_alloc-1]) / (double) target_metrics.offset[i]; - if (((unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) && (target_metrics.offset[i] > USTRIDES ) )) { - //if (true) { + bool has_too_few_instances = (target_metrics.offset[i] < min_accesses_threshold ); // FILTER 4 ("Less than 1024 instances") + bool is_not_complex = (unique_strides < unique_distances_threshold); // FILTER 5 ("Less than 6 unique index distances... + bool is_not_out_of_bounds = (outbounds < out_threshold); // FILTER 5 ...and less than 50% out of bounds distances") + bool exclude = ( + (has_too_few_instances) || (is_not_complex && is_not_out_of_bounds) + ); // Fails Filter 4 or fails Filter 5 + + if (!exclude) { if (firstgs) { firstgs = 0; @@ -100,80 +108,80 @@ namespace gs_patterns_core // file_prefix + "." + target_metrics.getShortNameLower().c_str() + "." + std::to_string(i) + "." + \ // std::to_string(target_metrics.size[i]) + "B.sbin"; - fp_bin = fopen(bin_name, "w"); - if (NULL == fp_bin) - throw GSFileError("Could not open " + std::string(bin_name) + "!"); - - printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); - printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); - printf("GATHER %c -- %6.3f%c (%4ld-bit chunks)\n", - '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%', VBITS); - printf("DTYPE -- %d bytes\n", target_metrics.size[i]); - printf("NINDICES -- %d\n", target_metrics.offset[i]); - printf("INDICES:\n"); - - int64_t nlcnt = 0; - for (j = 0; j < target_metrics.offset[i]; j++) { - - if (j <= 49) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (( ++nlcnt % 10) == 0) - printf("\n"); - - } else if (j >= (target_metrics.offset[i] - 50)) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (( ++nlcnt % 10) == 0) - printf("\n"); - - } else if (j == 50) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); - - hbin = 0; - for(j=0; j= (target_metrics.offset[i] - 50)) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if (( ++nlcnt % 10) == 0) + printf("\n"); + + } else if (j == 50) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + + hbin = 0; + int64_t print_bounds = (int64_t)histogram_bounds; + for(j=0; j #include "gs_patterns.h" +#include "config.h" namespace gs_patterns { namespace gs_patterns_core { - void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr); + void translate_iaddr(const std::string & binary, char * source_line, addr_t iaddr, size_t max_line_length); template - void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia) + void handle_trace_entry(MemPatterns & mp, const InstrAddrAdapter & ia, size_t max_gather_scatter, size_t per_sample) { int i, j, k, w = 0; int w_rw_idx; // Index into instruction window first dimension (RW: 0=Gather(R) or 1=Scatter(W)) @@ -63,14 +64,14 @@ namespace gs_patterns_core //printf("M DRTRACE -- iaddr: %016lx addr: %016lx cl_start: %d bytes: %d\n", // iw.iaddr, ia.get_address(), ia.get_address() % 64, ia.get_size()); - if ((++trace_info.mcnt % PERSAMPLE) == 0) { + if ((++trace_info.mcnt % per_sample) == 0) { printf("."); fflush(stdout); } //is iaddr in window w_idx = -1; - for (i = 0; i < IWINDOW; i++) { + for (i = 0; i < iw.get_window_size(); i++) { //new iaddr if (iw.w_iaddrs(w_rw_idx, i) == -1) { @@ -94,7 +95,7 @@ namespace gs_patterns_core // i = each window for (w = 0; w < 2; w++) { // 2 - for (i = 0; i < IWINDOW; i++) { // 1024 + for (i = 0; i < iw.get_window_size(); i++) { // 1024 if (iw.w_iaddrs(w,i) == -1) break; @@ -126,7 +127,7 @@ namespace gs_patterns_core if (gs == -1) { InstrInfo & target_iinfo = (w == 0) ? gather_iinfo : scatter_iinfo; - for(k=0; k - void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) + void create_spatter_file(MemPatterns & mp, const std::string & file_prefix, size_t unique_distances_threshold, double out_threshold, size_t min_accesses_threshold, + size_t histogram_bounds, size_t histogram_bounds_alloc) { // Create spatter file FILE *fp, *fp2; @@ -301,9 +309,10 @@ namespace gs_patterns_core fprintf(fp2, "#iaddr, sourceline, type size bytes, g/s, nindices, final percentage of g/s\n"); bool first_spatter = true; - create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter); + create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter, unique_distances_threshold, out_threshold, min_accesses_threshold, histogram_bounds, histogram_bounds_alloc); - create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); + create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter, unique_distances_threshold, out_threshold, min_accesses_threshold, histogram_bounds, + histogram_bounds_alloc); // Footer fprintf(fp, " ]"); diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index 3794c28..e8082bb 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -17,44 +17,38 @@ using namespace gs_patterns::gs_patterns_core; using namespace gs_patterns::gsnv_patterns; using namespace gs_patterns::gspin_patterns; -void usage (const std::string & prog_name) -{ - std::cerr << "Usage: " << prog_name << " \n" - << " " << prog_name << " -nv [-ow] [-v]" << std::endl; -} +// explained in helper function in config.cpp +// void usage (const std::string & prog_name) +// { +// std::cerr << "Usage: " << prog_name << " \n" +// << " " << prog_name << " -nv [-ow] [-v]" << std::endl; +// } int main(int argc, char ** argv) { try { - bool use_gs_nv = false; - bool verbose = false; - bool one_warp = false; - for (int i = 0; i < argc; i++) { - if (std::string(argv[i]) == "-nv") { - use_gs_nv = true; - } - else if (std::string(argv[i]) == "-v") { - verbose = true; - } - else if (std::string(argv[i]) == "-ow") { - one_warp = true; - } - } + // Parse configuration arguments first + Config& config = Config::get_instance(); + config.parseArgs(argc, argv); + bool use_gs_nv = config.get_use_gs_nv(); + bool verbose = config.get_verbose(); + bool one_warp = config.get_one_warp(); + const auto& positional_args = config.get_positional_args(); size_t pos = std::string(argv[0]).find_last_of("/"); std::string prog_name = std::string(argv[0]).substr(pos+1); - if (argc < 3) { - usage(prog_name); - throw GSError("Invalid program arguments"); - } - if (use_gs_nv) { + if (positional_args.empty()) { + config.printHelp(prog_name.c_str()); + throw GSError("Missing required argument."); + } + MemPatternsForNV mp; - mp.set_trace_file(argv[1]); + mp.set_trace_file(positional_args[0]); const char * config_file = std::getenv(GSNV_CONFIG_FILE); if (config_file) { @@ -73,10 +67,15 @@ int main(int argc, char ** argv) } else { + if (positional_args.size() < 2) { + config.printHelp(prog_name.c_str()); + throw GSError("Missing required and arguments."); + } + MemPatternsForPin mp; - mp.set_trace_file(argv[1]); - mp.set_binary_file(argv[2]); + mp.set_trace_file(positional_args[0]); + mp.set_binary_file(positional_args[1]); if (verbose) mp.set_log_level(1); // ----------------- Process Traces ----------------- diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 11e84e5..83dfb41 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -81,7 +81,7 @@ int tline_read_maps(gzFile fp, trace_map_entry_t * val, trace_map_entry_t **p_va return 1; } -int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) +int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx, size_t trace_buffer_size) { int idx; @@ -89,11 +89,11 @@ int tline_read(gzFile fp, mem_access_t * val, mem_access_t **p_val, int *edx) idx = (*edx) / sizeof(mem_access_t); //first read if (NULL == *p_val) { - *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); + *edx = gzread(fp, val, sizeof(mem_access_t) * trace_buffer_size); *p_val = val; } else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(mem_access_t) * NBUFS); + *edx = gzread(fp, val, sizeof(mem_access_t) * trace_buffer_size); *p_val = val; } @@ -128,7 +128,7 @@ InstrInfo & MemPatternsForNV::get_iinfo(mem_access_type m) void MemPatternsForNV::handle_trace_entry(const InstrAddrAdapter & ia) { // Call libgs_patterns - gs_patterns_core::handle_trace_entry(*this, ia); + gs_patterns_core::handle_trace_entry(*this, ia, _max_gather_scatter, _per_sample); const InstrAddrAdapterForNV &ianv = dynamic_cast (ia); #ifdef USE_VECTOR_FOR_SECOND_PASS @@ -162,17 +162,17 @@ void MemPatternsForNV::generate_patterns() // ----------------- Create Spatter File ----------------- - create_spatter_file(*this, get_file_prefix()); + create_spatter_file(*this, get_file_prefix(),_unique_distances_threshold, _out_threshold, _min_accesses_threshold, _histogram_bounds, _histogram_bounds_alloc); } void MemPatternsForNV::update_metrics() { // Get top gathers - get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics()); + get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics(), _top_patterns, _max_gather_scatter); // Get top scatters - get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics()); + get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics(), _top_patterns, _max_gather_scatter); // ----------------- Second Pass ----------------- @@ -327,8 +327,8 @@ void MemPatternsForNV::process_traces() uint64_t lines_read = 0; uint64_t pos = 0; mem_access_t * p_trace = NULL; - mem_access_t trace_buff[NBUFS]; // was static (1024 bytes) - while (tline_read(fp_trace, trace_buff, &p_trace, &iret)) + auto trace_buff = std::make_unique(_trace_buffer_size); + while (tline_read(fp_trace, trace_buff.get(), &p_trace, &iret, _trace_buffer_size)) { // Decode trace t_line = p_trace; @@ -397,7 +397,7 @@ double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) InstrInfo & target_iinfo = get_iinfo(mType); Metrics & target_metrics = get_metrics(mType); - for (int k = 0; k < NGS; k++) { + for (int k = 0; k < _max_gather_scatter; k++) { if (0 == target_iinfo.get_iaddrs()[k]) { break; @@ -405,9 +405,10 @@ double MemPatternsForNV::update_source_lines_from_binary(mem_access_type mType) std::string line; line = addr_to_line(target_iinfo.get_iaddrs()[k]); - strncpy(target_metrics.get_srcline()[k], line.c_str(), MAX_LINE_LENGTH-1); - - if (std::string(target_metrics.get_srcline()[k]).empty()) + strncpy(target_metrics.get_srcline().get(k), line.c_str(), _max_line_length-1); + // for safety if line size is bigger than max_line_length + target_metrics.get_srcline().get(k)[_max_line_length - 1] = '\0'; + if (std::string(target_metrics.get_srcline().get(k)).empty()) target_iinfo.get_icnt()[k] = 0; target_cnt += target_iinfo.get_icnt()[k]; @@ -425,8 +426,8 @@ void MemPatternsForNV::process_second_pass() // State carried thru addr_t iaddr; int64_t maddr; - addr_t gather_base[NTOP] = {0}; - addr_t scatter_base[NTOP] = {0}; + std::unique_ptr gather_base(new addr_t[_top_patterns]()); + std::unique_ptr scatter_base(new addr_t[_top_patterns]()); bool breakout = false; printf("\nSecond pass to fill gather / scatter subtraces\n"); @@ -438,7 +439,7 @@ void MemPatternsForNV::process_second_pass() InstrAddrAdapter & ia = *itr; breakout = ::handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), - iaddr, maddr, mcnt, gather_base, scatter_base); + iaddr, maddr, mcnt, gather_base, scatter_base, _per_sample); if (breakout) { break; } @@ -457,7 +458,7 @@ void MemPatternsForNV::process_second_pass() { InstrAddrAdapterForNV ia(const_cast(ta[i])); breakout = handle_2nd_pass_trace_entry(ia, get_gather_metrics(), get_scatter_metrics(), - iaddr, maddr, mcnt, gather_base, scatter_base); + iaddr, maddr, mcnt, gather_base.get(), scatter_base.get(), _per_sample); count_read++; if (breakout) break; diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 133beb9..2c3e2c0 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -19,6 +19,7 @@ #include "gs_patterns.h" #include "gs_patterns_core.h" #include "utils.h" +#include "config.h" // Enable to use a vector for storing trace data for use by second pass (if not defined data is stored to a temp file //#define USE_VECTOR_FOR_SECOND_PASS 1 @@ -117,11 +118,34 @@ namespace gsnv_patterns static constexpr const char * GSNV_ONE_WARP_MODE = "GSNV_ONE_WARP_MODE"; - MemPatternsForNV(): _metrics(GATHER, SCATTER), + explicit MemPatternsForNV( + size_t per_sample = Config::get_instance().get_per_sample(), + size_t histogram_bounds = Config::get_instance().get_histogram_bounds(), + size_t histogram_bounds_alloc = Config::get_instance().get_histogram_bounds_alloc(), + size_t trace_buffer_size = Config::get_instance().get_trace_buffer_size(), + size_t top_patterns = Config::get_instance().get_top_patterns(), + size_t max_gather_scatter = Config::get_instance().get_max_gather_scatter(), + size_t max_line_length = Config::get_instance().get_max_line_length(), + size_t unique_distances_threshold = Config::get_instance().get_unique_distances_threshold(), + double out_threshold = Config::get_instance().get_out_threshold(), + size_t min_accesses_threshold = Config::get_instance().get_min_accesses_threshold() + ): + _per_sample(per_sample), + _histogram_bounds(histogram_bounds), + _histogram_bounds_alloc(histogram_bounds_alloc), + _unique_distances_threshold(unique_distances_threshold), + _out_threshold(out_threshold), + _min_accesses_threshold(min_accesses_threshold), + _trace_buffer_size(trace_buffer_size), + _top_patterns(top_patterns), + _max_line_length(max_line_length), + _max_gather_scatter(max_gather_scatter), + _metrics(GATHER, SCATTER), _iinfo(GATHER, SCATTER), _target_opcodes { "LD", "ST", "LDS", "STS", "LDG", "STG" } { } + virtual ~MemPatternsForNV() override { } void handle_trace_entry(const InstrAddrAdapter & ia) override; @@ -213,7 +237,16 @@ namespace gsnv_patterns bool convert_to_trace_entry(const mem_access_t & ma, bool ignore_partial_warps, std::vector & te_list); private: - + size_t _per_sample; + size_t _histogram_bounds; + size_t _histogram_bounds_alloc; + size_t _unique_distances_threshold; + double _out_threshold; + size_t _min_accesses_threshold; + size_t _trace_buffer_size; + size_t _top_patterns; + size_t _max_line_length; + size_t _max_gather_scatter; std::pair _metrics; std::pair _iinfo; TraceInfo _trace_info; diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index 7aae7b9..0aab465 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -11,6 +11,9 @@ #include "gs_patterns.h" #include "gs_patterns_core.h" #include "gspin_patterns.h" + +#include + #include "utils.h" namespace gs_patterns @@ -20,7 +23,7 @@ namespace gspin_patterns using namespace gs_patterns::gs_patterns_core; -int drline_read(gzFile fp, trace_entry_t * val, trace_entry_t ** p_val, int * edx) +int drline_read(gzFile fp, trace_entry_t * val, trace_entry_t ** p_val, int * edx, size_t trace_buffer_size) { int idx; @@ -28,11 +31,11 @@ int drline_read(gzFile fp, trace_entry_t * val, trace_entry_t ** p_val, int * ed idx = (*edx) / sizeof(trace_entry_t); //first read if (NULL == *p_val) { - *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *edx = gzread(fp, val, sizeof(trace_entry_t) * trace_buffer_size); *p_val = val; } else if (*p_val == &val[idx]) { - *edx = gzread(fp, val, sizeof(trace_entry_t) * NBUFS); + *edx = gzread(fp, val, sizeof(trace_entry_t) * trace_buffer_size); *p_val = val; } @@ -67,7 +70,7 @@ InstrInfo & MemPatternsForPin::get_iinfo(mem_access_type m) void MemPatternsForPin::handle_trace_entry(const InstrAddrAdapter & ia) { // Call libgs_patterns - gs_patterns_core::handle_trace_entry(*this, ia); + gs_patterns_core::handle_trace_entry(*this, ia, _max_gather_scatter, _per_sample); } void MemPatternsForPin::generate_patterns() @@ -82,7 +85,8 @@ void MemPatternsForPin::generate_patterns() // ----------------- Create Spatter File ----------------- - create_spatter_file(*this, get_file_prefix()); + create_spatter_file(*this, get_file_prefix(), _unique_distances_threshold, _out_threshold, _min_accesses_threshold, _histogram_bounds, + _histogram_bounds_alloc); } @@ -99,10 +103,10 @@ void MemPatternsForPin::update_metrics() } // Get top gathers - get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics()); + get_gather_metrics().ntop = get_top_target(get_gather_iinfo(), get_gather_metrics(), _top_patterns, _max_gather_scatter); // Get top scatters - get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics()); + get_scatter_metrics().ntop = get_top_target(get_scatter_iinfo(), get_scatter_metrics(), _top_patterns, _max_gather_scatter); // ----------------- Second Pass ----------------- @@ -135,15 +139,16 @@ double MemPatternsForPin::update_source_lines_from_binary(mem_access_type mType) Metrics & target_metrics = get_metrics(mType); //Check it is not a library - for (int k = 0; k < NGS; k++) { + for (int k = 0; k < _max_gather_scatter; k++) { if (0 == target_iinfo.get_iaddrs()[k]) { break; } #if SYMBOLS_ONLY - translate_iaddr(get_binary_file_name(), target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); - if (startswith(target_metrics.get_srcline()[k], "?")) { + translate_iaddr(get_binary_file_name(), target_metrics.get_srcline().get(k), target_iinfo.get_iaddrs()[k], + _max_line_length); + if (startswith(target_metrics.get_srcline().get(k), "?")) { target_iinfo.get_icnt()[k] = 0; target_metrics.iaddrs_nosym++; target_metrics.indices_nosym += target_iinfo.get_occ()[k]; @@ -182,10 +187,10 @@ void MemPatternsForPin::process_traces() uint64_t lines_read = 0; trace_entry_t *p_drtrace = NULL; - trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) + auto drtrace = std::make_unique(_trace_buffer_size); // was static (1024 bytes) - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { + while (drline_read(fp_drtrace, drtrace.get(), &p_drtrace, &iret, _trace_buffer_size)) { //decode drtrace drline = p_drtrace; @@ -217,25 +222,26 @@ void MemPatternsForPin::process_second_pass(gzFile & fp_drtrace) int iret = 0; trace_entry_t *drline; - // State carried thru + // State carried through addr_t iaddr; int64_t maddr; - addr_t gather_base[NTOP] = {0}; - addr_t scatter_base[NTOP] = {0}; + auto gather_base = std::make_unique(_top_patterns); + auto scatter_base = std::make_unique(_top_patterns); + bool breakout = false; printf("\nSecond pass to fill gather / scatter subtraces\n"); fflush(stdout); trace_entry_t *p_drtrace = NULL; - trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) + auto drtrace = std::make_unique(_trace_buffer_size); // was static (1024 bytes) - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret) && !breakout) { + while (drline_read(fp_drtrace, drtrace.get(), &p_drtrace, &iret, _trace_buffer_size) && !breakout) { //decode drtrace drline = p_drtrace; breakout = handle_2nd_pass_trace_entry(InstrAddrAdapterForPin(drline), get_gather_metrics(), get_scatter_metrics(), - iaddr, maddr, mcnt, gather_base, scatter_base); + iaddr, maddr, mcnt, gather_base.get(), scatter_base.get(), _per_sample); p_drtrace++; } diff --git a/gspin_patterns.h b/gspin_patterns.h index 78d64de..f236128 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -87,8 +87,31 @@ namespace gspin_patterns class MemPatternsForPin : public MemPatterns { public: - MemPatternsForPin() : _metrics(GATHER, SCATTER), - _iinfo(GATHER, SCATTER) { } + MemPatternsForPin( + size_t max_line_length = Config::get_instance().get_max_line_length(), + size_t per_sample = Config::get_instance().get_per_sample(), + size_t histogram_bounds = Config::get_instance().get_histogram_bounds(), + size_t histogram_bounds_alloc = Config::get_instance().get_histogram_bounds_alloc(), + size_t unique_distances_threshold = Config::get_instance().get_unique_distances_threshold(), + double out_threshold = Config::get_instance().get_out_threshold(), + size_t min_accesses_threshold = Config::get_instance().get_min_accesses_threshold(), + size_t trace_buffer_size = Config::get_instance().get_trace_buffer_size(), + size_t max_gather_scatter = Config::get_instance().get_max_gather_scatter(), + size_t top_patterns = Config::get_instance().get_top_patterns() + ): + _max_line_length(max_line_length), + _per_sample(per_sample), + _histogram_bounds(histogram_bounds), + _histogram_bounds_alloc(histogram_bounds_alloc), + _unique_distances_threshold(unique_distances_threshold), + _out_threshold(out_threshold), + _min_accesses_threshold(min_accesses_threshold), + _trace_buffer_size(trace_buffer_size), + _top_patterns(top_patterns), + _max_gather_scatter(max_gather_scatter), + _metrics(GATHER, SCATTER), + _iinfo(GATHER, SCATTER) { } + virtual ~MemPatternsForPin() override { } void handle_trace_entry(const InstrAddrAdapter & ia) override; @@ -124,6 +147,16 @@ namespace gspin_patterns void process_second_pass(gzFile & fp_drtrace); private: + size_t _max_line_length; + size_t _per_sample; + size_t _histogram_bounds; + size_t _histogram_bounds_alloc; + size_t _unique_distances_threshold; + double _out_threshold; + size_t _min_accesses_threshold; + size_t _trace_buffer_size; + size_t _top_patterns; + size_t _max_gather_scatter; std::pair _metrics; std::pair _iinfo; TraceInfo _trace_info;