"""Kerncraft static analytical performance modeling framework and tool.""" -__version__ = '0.8.15' +__version__ = "0.8.15" from .kerncraft import main + __main__ = main # To trigger travis deployment to pypi, do the following: # 1. Increment __version___ # 2. commit to RRZE-HPC/kerncraft's master branch

kerncraft/cacheprediction.py#L14

'collections.defaultdict' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/__init__.py#L12

def get_header_path() -> str: """Return local folder path of header files.""" import os - return os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + '/headers/' + + return os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + "/headers/"

kerncraft/cacheprediction.py#L68

'...'.format(...) has unused arguments at position(s): 1 (F523)

/home/runner/work/kerncraft/kerncraft/examples/machine-files/plot_machine_file.py#L5

import matplotlib.pyplot as plt from matplotlib.ticker import EngFormatter from kerncraft import machinemodel -kernel_colors = 'bgrcmyk' +kernel_colors = "bgrcmyk" + def main(): mm = machinemodel.MachineModel(sys.argv[1]) - kernels = sorted(mm['benchmarks']['kernels']) - cache_levels = sorted(mm['benchmarks']['measurements']) + kernels = sorted(mm["benchmarks"]["kernels"]) + cache_levels = sorted(mm["benchmarks"]["measurements"]) fig, axs = plt.subplots(len(cache_levels), 1, figsize=(7, 14), tight_layout=True) lines = {} for i, cache_level in enumerate(cache_levels): max_bw = 0 max_bw_core = 0 axs[i].set_title(cache_level) formatter1 = EngFormatter(places=0) # , sep="\N{THIN SPACE}") # U+2009 axs[i].yaxis.set_major_formatter(formatter1) - if cache_level == 'L1': + if cache_level == "L1": axs[i].set_ylabel("Bandwidth [B/s]") else: axs[i].set_ylabel("Bandwidth [B/s]\n(incl. write-allocate)") - axs[i].set_xlabel('cores') + axs[i].set_xlabel("cores") # axs[i].set_xscale('log') for ki, kernel in enumerate(kernels): - if cache_level == 'L1': + if cache_level == "L1": # L1 does not have write-allocate, so everything is measured correctly factor = 1.0 else: - measurement_kernel_info = mm['benchmarks']['kernels'][kernel] - factor = (float(measurement_kernel_info['read streams']['bytes']) + - 2.0 * float(measurement_kernel_info['write streams']['bytes']) - - float(measurement_kernel_info['read+write streams']['bytes'])) / \ - (float(measurement_kernel_info['read streams']['bytes']) + - float(measurement_kernel_info['write streams']['bytes'])) + measurement_kernel_info = mm["benchmarks"]["kernels"][kernel] + factor = ( + float(measurement_kernel_info["read streams"]["bytes"]) + + 2.0 * float(measurement_kernel_info["write streams"]["bytes"]) + - float(measurement_kernel_info["read+write streams"]["bytes"]) + ) / ( + float(measurement_kernel_info["read streams"]["bytes"]) + + float(measurement_kernel_info["write streams"]["bytes"]) + ) - for SMT in mm['benchmarks']['measurements'][cache_level]: + for SMT in mm["benchmarks"]["measurements"][cache_level]: measurements = [ - bw*factor - for bw in mm['benchmarks']['measurements'][cache_level][SMT]['results'][kernel]] - max_bw = max(measurements+[max_bw]) + bw * factor + for bw in mm["benchmarks"]["measurements"][cache_level][SMT][ + "results" + ][kernel] + ] + max_bw = max(measurements + [max_bw]) max_bw_core = max(max_bw_core, measurements[0]) - lines[kernel], = axs[i].plot( + (lines[kernel],) = axs[i].plot( range(1, 1 + len(measurements)), measurements, - linestyle=['-', '--', '..', '-.'][SMT-1], - color=kernel_colors[ki]) + linestyle=["-", "--", "..", "-."][SMT - 1], + color=kernel_colors[ki], + ) axs[i].set_xlim(1) - axs[i].axhline(max_bw, color='black') - axs[i].axhline(max_bw_core, color='black') - axs[i].set_yticks(np.append(axs[i].get_yticks(), [float(max_bw), float(max_bw_core)])) - axs[i].set_xticks(range(1, 1+len(measurements))) - fig.legend(lines.values(), lines.keys(), 'lower center', ncol=10) - fig.savefig(sys.argv[1]+'.pdf') - #plt.show() + axs[i].axhline(max_bw, color="black") + axs[i].axhline(max_bw_core, color="black") + axs[i].set_yticks( + np.append(axs[i].get_yticks(), [float(max_bw), float(max_bw_core)]) + ) + axs[i].set_xticks(range(1, 1 + len(measurements))) + fig.legend(lines.values(), lines.keys(), "lower center", ncol=10) + fig.savefig(sys.argv[1] + ".pdf") + # plt.show() -if __name__ == '__main__': +if __name__ == "__main__": main()

kerncraft/cacheprediction.py#L297

Ambiguous variable name 'l' (E741)

/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L10

from .machinemodel import MachineModel from .prefixedunit import PrefixedUnit def create_parser(): - parser = argparse.ArgumentParser(description='Find optimal tiling sizes using the ECMData ' - 'model.') - parser.add_argument('--machine', '-m', type=argparse.FileType('r'), required=True, - help='Path to machine description yaml file.') - parser.add_argument('--define', '-D', nargs=2, metavar=('KEY', 'VALUE'), default=[], - action='append', - help='Define fixed constants. Values must be integer.') - parser.add_argument('--min-block-length', '-b', type=int, metavar='MIN', default=100) - parser.add_argument('--verbose', '-v', action='count', default=0, - help='Increases verbosity level.') - parser.add_argument('--cores', '-c', metavar='CORES', type=int, default=1, - help='Number of cores to be used in parallel. (default: 1)') - parser.add_argument('description_file', metavar='FILE', type=argparse.FileType(), - help='File with loop kernel description in YAML') + parser = argparse.ArgumentParser( + description="Find optimal tiling sizes using the ECMData " "model." + ) + parser.add_argument( + "--machine", + "-m", + type=argparse.FileType("r"), + required=True, + help="Path to machine description yaml file.", + ) + parser.add_argument( + "--define", + "-D", + nargs=2, + metavar=("KEY", "VALUE"), + default=[], + action="append", + help="Define fixed constants. Values must be integer.", + ) + parser.add_argument( + "--min-block-length", "-b", type=int, metavar="MIN", default=100 + ) + parser.add_argument( + "--verbose", "-v", action="count", default=0, help="Increases verbosity level." + ) + parser.add_argument( + "--cores", + "-c", + metavar="CORES", + type=int, + default=1, + help="Number of cores to be used in parallel. (default: 1)", + ) + parser.add_argument( + "description_file", + metavar="FILE", + type=argparse.FileType(), + help="File with loop kernel description in YAML", + ) return parser def simulate(kernel, model, define_dict, blocking_constant, blocking_length): """Setup and execute model with given blocking length"""

kerncraft/cacheprediction.py#L304

Ambiguous variable name 'l' (E741)

kerncraft/cacheprediction.py#L360

Local variable 'indices' is assigned to but never used (F841)

/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L38

kernel.set_constant(k, v) kernel.set_constant(blocking_constant, blocking_length) model.analyze() - return sum([cy for dscr, cy in model.results['cycles']]) + return sum([cy for dscr, cy in model.results["cycles"]]) def run(parser, args): # machine information # Read machine description machine = MachineModel(args.machine.name) # process kernel description description = str(args.description_file.read()) - yaml = ruamel.yaml.YAML(typ='unsafe') + yaml = ruamel.yaml.YAML(typ="unsafe") yaml.register_class(PrefixedUnit) kernel = KernelDescription(yaml.load(description)) # Add constants from define arguments define_dict = {}

kerncraft/cacheprediction.py#L360

Ambiguous variable name 'l' (E741)

/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L68

var_type, var_size = var_info for size in var_size: for s in size.atoms(sympy.Symbol): if s.name not in define_dict: undefined_constants.add(s) - assert len(undefined_constants) == 1, "There are multiple or none undefined constants {!r}. " \ + assert len(undefined_constants) == 1, ( + "There are multiple or none undefined constants {!r}. " "Exactly one must be undefined.".format(undefined_constants) + ) blocking_constant = undefined_constants.pop() if args.verbose >= 1: print("blocking constant:", blocking_constant)

kerncraft/cacheprediction.py#L411

Missing whitespace after ',' (E231)

/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L81

min_length = args.min_block_length min_runtime = simulate(kernel, model, define_dict, blocking_constant, min_length) # determain max search length # upper bound: number of floats that fit into the last level cache - max_length = int(machine['memory hierarchy'][-2]['size per group'])//4 + max_length = int(machine["memory hierarchy"][-2]["size per group"]) // 4 if args.verbose >= 1: print("upper search bound:", max_length) - length = min_length*3 + length = min_length * 3 while length < max_length: runtime = simulate(kernel, model, define_dict, blocking_constant, length) if args.verbose >= 1: - print("min", min_length, min_runtime, "current", length, runtime, "max", max_length) + print( + "min", + min_length, + min_runtime, + "current", + length, + runtime, + "max", + max_length, + ) # Increase search window if runtime > min_runtime: max_length = length # and break else:

kerncraft/cacheprediction.py#L445

Continuation line over-indented for visual indent (E127)

/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L104

length = (max_length - min_length) // 2 + min_length # Execute simulation runtime = simulate(kernel, model, define_dict, blocking_constant, length) if args.verbose >= 1: - print("min", min_length, min_runtime, "current", length, runtime, "max", max_length) + print( + "min", + min_length, + min_runtime, + "current", + length, + runtime, + "max", + max_length, + ) # Narrow search area if runtime <= min_runtime: min_runtime = runtime min_length = length

kerncraft/cacheprediction.py#L533

Line too long (120 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L120

print("found for {}:".format(blocking_constant)) print(length) sys.exit(0) else: if args.verbose: - print("nothing found. exceeded search window and not change in performance found.") + print( + "nothing found. exceeded search window and not change in performance found." + ) sys.exit(1) def main(): # Create and populate parser

kerncraft/cacheprediction.py#L548

Local variable 'elements_per_cacheline' is assigned to but never used (F841)

/home/runner/work/kerncraft/kerncraft/kerncraft/cachetile.py#L135

# BUSINESS LOGIC IS FOLLOWING run(parser, args) -if __name__ == '__main__': +if __name__ == "__main__": main()

/home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py#L6

"""Very simple interval implementation for integers (might also work on floats).""" def __init__(self, *args, **kwargs): """If keywords *sane* is True (default: False), checks will not be done on given data.""" self.data = list(args) - if not kwargs.get('sane', False): + if not kwargs.get("sane", False): self.data = [d for d in self.data if d[1] > d[0]] self._enforce_order() self._enforce_no_overlap() def _enforce_order(self):

kerncraft/cacheprediction.py#L554

Local variable 'inner_index' is assigned to but never used (F841)

/home/runner/work/kerncraft/kerncraft/kerncraft/intervals.py#L18

self.data.sort(key=lambda d: d[0]) def _enforce_no_overlap(self, start_at=0): """Enforce that no ranges overlap in internal storage.""" i = start_at - while i+1 < len(self.data): - if self.data[i][1] >= self.data[i+1][0]: + while i + 1 < len(self.data): + if self.data[i][1] >= self.data[i + 1][0]: # beginning of i+1-th range is contained in i-th range - if self.data[i][1] < self.data[i+1][1]: + if self.data[i][1] < self.data[i + 1][1]: # i+1-th range is longer, thus enlarge i-th range - self.data[i][1] = self.data[i+1][1] + self.data[i][1] = self.data[i + 1][1] # removed contained range - del self.data[i+1] + del self.data[i + 1] i += 1 def __and__(self, other): """Combine two intervals, under the assumption that they are sane.""" - return Intervals(*(self.data+other.data)) + return Intervals(*(self.data + other.data)) def __len__(self): """Return sum of range lengths.""" - return int(sum(upper-lower for (lower, upper) in self.data)) + return int(sum(upper - lower for (lower, upper) in self.data)) def __contains__(self, needle): """Return True if needle is contained in intervals.""" return any(lower <= needle < upper for (lower, upper) in self.data) def __repr__(self): """Return string representation of object.""" - return str(self.__class__) + '(' + ', '.join([list.__repr__(d) for d in self.data]) + ')' + return ( + str(self.__class__) + + "(" + + ", ".join([list.__repr__(d) for d in self.data]) + + ")" + ) def __eq__(self, other): """Return True if other contains exactly the same interval regions.""" return self.data == other.data

kerncraft/cacheprediction.py#L555

Local variable 'inner_increment' is assigned to but never used (F841)

/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L10

import platform from urllib.request import urlopen url_dict = { - 'v3.0': { - 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-mac.zip', - 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-lin64.zip', + "v3.0": { + "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-mac.zip", + "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-v3.0-lin64.zip", }, - 'v2.3': { - 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-mac.zip', - 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-lin64.zip', + "v2.3": { + "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-mac.zip", + "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.3-lin64.zip", }, - 'v2.2': { - 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-mac.zip', - 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-lin64.zip', + "v2.2": { + "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-mac.zip", + "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.2-lin64.zip", }, - 'v2.1': { - 'mac': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-mac64.zip', - 'lin64': 'https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-lin64.zip', - } + "v2.1": { + "mac": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-mac64.zip", + "lin64": "https://software.intel.com/content/dam/develop/external/us/en/protected/iaca-version-2.1-lin64.zip", + }, } + class TemporaryDirectory: def __enter__(self): self.tempdir = tempfile.mkdtemp() return self.tempdir

kerncraft/cacheprediction.py#L561

Too many blank lines (2) (E303)

/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L38

def __exit__(self, type_, value, traceback): shutil.rmtree(self.tempdir) def get_os(): - os_map = {'Darwin': 'mac', 'Linux': 'lin64'} + os_map = {"Darwin": "mac", "Linux": "lin64"} system = platform.system() - assert system in os_map, "Unsupported operating system (platform.system() should return " \ - "Linux or Darwin)." + assert system in os_map, ( + "Unsupported operating system (platform.system() should return " + "Linux or Darwin)." + ) return os_map[system] def search_path(): """Return potential locations of IACA installation.""" operating_system = get_os() # 1st choice: in ~/.kerncraft/iaca-{} # 2nd choice: in package directory / iaca-{} - return [os.path.expanduser("~/.kerncraft/iaca/{}/".format(operating_system)), - os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + '/iaca/{}/'.format( - operating_system)] + return [ + os.path.expanduser("~/.kerncraft/iaca/{}/".format(operating_system)), + os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + + "/iaca/{}/".format(operating_system), + ] def find_iaca(): """Return (hopefully) valid installation of IACA.""" - requires = ['iaca2.2', 'iaca2.3', 'iaca3.0'] + requires = ["iaca2.2", "iaca2.3", "iaca3.0"] for path in search_path(): - path += 'bin/' + path += "bin/" valid = True for r in requires: if not os.path.exists(path + r): valid = False break if valid: return path - raise RuntimeError("No IACA installation found in {}. Run iaca_get command to fix this issue." - "".format(search_path())) + raise RuntimeError( + "No IACA installation found in {}. Run iaca_get command to fix this issue." + "".format(search_path()) + ) def main(): try: path = find_iaca() - print('IACA already installed at', path) - if '--force' in sys.argv: - sys.argv.remove('--force') + print("IACA already installed at", path) + if "--force" in sys.argv: + sys.argv.remove("--force") else: - print('For forced installation add --force') + print("For forced installation add --force") sys.exit() except RuntimeError: pass - if len(sys.argv) < 2 or sys.argv[1] != \ - "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul": - print("Go to https://software.intel.com/protected-download/267266/157552 and read the" - "Intel Pre-Release License Agreement.") + if ( + len(sys.argv) < 2 + or sys.argv[1] + != "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul" + ): + print( + "Go to https://software.intel.com/protected-download/267266/157552 and read the" + "Intel Pre-Release License Agreement." + ) print("") - print("Add " - "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul" - " for installation of IACA.") + print( + "Add " + "--I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul" + " for installation of IACA." + ) sys.exit(1) if len(sys.argv) >= 3: - assert sys.argv[2] in ['lin64', 'mac'] + assert sys.argv[2] in ["lin64", "mac"] operating_system = sys.argv[2] else: operating_system = get_os() # Locate and create IACA base directory, in reverse server order base_dir = None for path in reversed(search_path()): - print("Trying " + path + ": ", end='', file=sys.stderr) + print("Trying " + path + ": ", end="", file=sys.stderr) try: os.makedirs(path) base_dir = path break except PermissionError:

kerncraft/cacheprediction.py#L650

Too many blank lines (2) (E303)

/home/runner/work/kerncraft/kerncraft/kerncraft/iaca_get.py#L115

except OSError: # Directory already exists print("already exists.", file=sys.stderr) continue if base_dir is None: - print('Aborted.', file=sys.stderr) + print("Aborted.", file=sys.stderr) sys.exit(1) else: print("selected.", file=sys.stderr) - print("IACA v2.1 (for manual use - only version analyzing latency):", file=sys.stderr) - if operating_system == 'mac': - operating_system_temp = 'mac64' + print( + "IACA v2.1 (for manual use - only version analyzing latency):", file=sys.stderr + ) + if operating_system == "mac": + operating_system_temp = "mac64" else: operating_system_temp = operating_system - url = url_dict['v2.1'][operating_system] + url = url_dict["v2.1"][operating_system] print("Downloading", url, "...", file=sys.stderr) zfile = zipfile.ZipFile(BytesIO(urlopen(url).read())) - members = [n - for n in zfile.namelist() - if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system_temp))] + members = [ + n + for n in zfile.namelist() + if "/." not in n and n.startswith("iaca-{:}/".format(operating_system_temp)) + ] # Exctract to temp folder and copy to correct directory print("Extracting...", file=sys.stderr) with TemporaryDirectory() as tempdir: zfile.extractall(tempdir, members=members) - shutil.copytree(tempdir + '/iaca-{}'.format(operating_system_temp), base_dir + 'v2.1') + shutil.copytree( + tempdir + "/iaca-{}".format(operating_system_temp), base_dir + "v2.1" + ) # Correct permissions of executables print("Correcting permissions of binary...") - st = os.stat(base_dir + 'v2.1/bin/iaca') - os.chmod( - base_dir + 'v2.1/bin/iaca', - st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH - ) - st = os.stat(base_dir + 'v2.1/bin/iaca.sh') - os.chmod( - base_dir + 'v2.1/bin/iaca.sh', - st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH + st = os.stat(base_dir + "v2.1/bin/iaca") + os.chmod( + base_dir + "v2.1/bin/iaca", + st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH, + ) + st = os.stat(base_dir + "v2.1/bin/iaca.sh") + os.chmod( + base_dir + "v2.1/bin/iaca.sh", + st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH, ) # Fix iaca.sh print("Fixing iaca.sh...", file=sys.stderr) - iaca_sh = open(base_dir + 'v2.1/bin/iaca.sh').read() - iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1) - iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1) - iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)', - 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1) - open(base_dir + 'v2.1/bin/iaca.sh', 'w').write(iaca_sh) - print("IACA v2.1 installed to", os.getcwd() + '/' + base_dir + 'v2.1', file=sys.stderr) + iaca_sh = open(base_dir + "v2.1/bin/iaca.sh").read() + iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1) + iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1) + iaca_sh = iaca_sh.replace( + 'path=$(cd "$(dirname "$0")"; pwd)', + 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', + 1, + ) + open(base_dir + "v2.1/bin/iaca.sh", "w").write(iaca_sh) + print( + "IACA v2.1 installed to", os.getcwd() + "/" + base_dir + "v2.1", file=sys.stderr + ) print("IACA v2.2 (for NHM and WSM support):", file=sys.stderr) - url = url_dict['v2.2'][operating_system] + url = url_dict["v2.2"][operating_system] print("Downloading", url, "...", file=sys.stderr) zfile = zipfile.ZipFile(BytesIO(urlopen(url).read())) - members = [n - for n in zfile.namelist() - if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))] + members = [ + n + for n in zfile.namelist() + if "/." not in n and n.startswith("iaca-{:}/".format(operating_system)) + ] # Exctract to temp folder and copy to correct directory print("Extracting...", file=sys.stderr) with TemporaryDirectory() as tempdir: zfile.extractall(tempdir, members=members) - shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v2.2') + shutil.copytree( + tempdir + "/iaca-{}".format(operating_system), base_dir + "v2.2" + ) # Correct permissions of executables print("Correcting permissions of binary...") - st = os.stat(base_dir + 'v2.2/bin/iaca') - os.chmod( - base_dir + 'v2.2/bin/iaca', - st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH - ) - st = os.stat(base_dir + 'v2.2/bin/iaca.sh') - os.chmod( - base_dir + 'v2.2/bin/iaca.sh', - st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH + st = os.stat(base_dir + "v2.2/bin/iaca") + os.chmod( + base_dir + "v2.2/bin/iaca", + st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH, + ) + st = os.stat(base_dir + "v2.2/bin/iaca.sh") + os.chmod( + base_dir + "v2.2/bin/iaca.sh", + st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH, ) # Fix iaca.sh print("Fixing iaca.sh...", file=sys.stderr) - iaca_sh = open(base_dir + 'v2.2/bin/iaca.sh').read() - iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1) - iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1) - iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)', - 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1) - open(base_dir + 'v2.2/bin/iaca.sh', 'w').write(iaca_sh) - print("IACA v2.2 installed to", os.getcwd() + '/' + base_dir + 'v2.2', file=sys.stderr) + iaca_sh = open(base_dir + "v2.2/bin/iaca.sh").read() + iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1) + iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1) + iaca_sh = iaca_sh.replace( + 'path=$(cd "$(dirname "$0")"; pwd)', + 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', + 1, + ) + open(base_dir + "v2.2/bin/iaca.sh", "w").write(iaca_sh) + print( + "IACA v2.2 installed to", os.getcwd() + "/" + base_dir + "v2.2", file=sys.stderr + ) print("IACA v2.3 (for SNB and IVY support):", file=sys.stderr) - url = url_dict['v2.3'][operating_system] + url = url_dict["v2.3"][operating_system] print("Downloading", url, "...", file=sys.stderr) zfile = zipfile.ZipFile(BytesIO(urlopen(url).read())) - members = [n - for n in zfile.namelist() - if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))] + members = [ + n + for n in zfile.namelist() + if "/." not in n and n.startswith("iaca-{:}/".format(operating_system)) + ] # Exctract to temp folder and copy to correct directory print("Extracting...", file=sys.stderr) with TemporaryDirectory() as tempdir: zfile.extractall(tempdir, members=members) - shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v2.3') + shutil.copytree( + tempdir + "/iaca-{}".format(operating_system), base_dir + "v2.3" + ) # Correct permissions of executables print("Correcting permissions of binary...") - st = os.stat(base_dir + 'v2.3/bin/iaca') - os.chmod( - base_dir + 'v2.3/bin/iaca', - st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH - ) - st = os.stat(base_dir + 'v2.3/bin/iaca.sh') - os.chmod( - base_dir + 'v2.3/bin/iaca.sh', - st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH + st = os.stat(base_dir + "v2.3/bin/iaca") + os.chmod( + base_dir + "v2.3/bin/iaca", + st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH, + ) + st = os.stat(base_dir + "v2.3/bin/iaca.sh") + os.chmod( + base_dir + "v2.3/bin/iaca.sh", + st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH, ) # Fix iaca.sh print("Fixing iaca.sh...", file=sys.stderr) - iaca_sh = open(base_dir + 'v2.3/bin/iaca.sh').read() - iaca_sh = iaca_sh.replace('realpath', 'readlink -f', 1) - iaca_sh = iaca_sh.replace('mypath=`pwd`', 'mypath=`dirname $0`', 1) - iaca_sh = iaca_sh.replace('path=$(cd "$(dirname "$0")"; pwd)', - 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', 1) - open(base_dir + 'v2.3/bin/iaca.sh', 'w').write(iaca_sh) - print("IACA v2.3 installed to", os.getcwd() + '/' + base_dir + 'v2.3', file=sys.stderr) + iaca_sh = open(base_dir + "v2.3/bin/iaca.sh").read() + iaca_sh = iaca_sh.replace("realpath", "readlink -f", 1) + iaca_sh = iaca_sh.replace("mypath=`pwd`", "mypath=`dirname $0`", 1) + iaca_sh = iaca_sh.replace( + 'path=$(cd "$(dirname "$0")"; pwd)', + 'script=`readlink -f $0`\n\tpath=`dirname "$script"`', + 1, + ) + open(base_dir + "v2.3/bin/iaca.sh", "w").write(iaca_sh) + print( + "IACA v2.3 installed to", os.getcwd() + "/" + base_dir + "v2.3", file=sys.stderr + ) print("IACA v3.0 (for HSW, BDW, SKL and SKX support):", file=sys.stderr) - url = url_dict['v3.0'][operating_system] + url = url_dict["v3.0"][operating_system] print("Downloading", url, "...", file=sys.stderr) zfile = zipfile.ZipFile(BytesIO(urlopen(url).read())) - members = [n - for n in zfile.namelist() - if '/.' not in n and n.startswith('iaca-{:}/'.format(operating_system))] + members = [ + n + for n in zfile.namelist() + if "/." not in n and n.startswith("iaca-{:}/".format(operating_system)) + ] # Exctract to temp folder and copy to correct directory print("Extracting...", file=sys.stderr) with TemporaryDirectory() as tempdir: zfile.extractall(tempdir, members=members) - shutil.copytree(tempdir + '/iaca-{}'.format(operating_system), base_dir + 'v3.0') + shutil.copytree( + tempdir + "/iaca-{}".format(operating_system), base_dir + "v3.0" + ) print("Correcting permissions of binary...", file=sys.stderr) - st = os.stat(base_dir + 'v3.0/iaca') - os.chmod( - base_dir + 'v3.0/iaca', - st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH - ) - print("IACA v3.0 installed to", os.getcwd() + '/' + base_dir + 'v3.0', file=sys.stderr) + st = os.stat(base_dir + "v3.0/iaca") + os.chmod( + base_dir + "v3.0/iaca", st.st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH + ) + print( + "IACA v3.0 installed to", os.getcwd() + "/" + base_dir + "v3.0", file=sys.stderr + ) # Create unified bin directory to access both operating_systems - os.mkdir(base_dir + 'bin') - os.symlink('../v2.1/bin/iaca.sh', base_dir + 'bin/iaca2.1') - os.symlink('../v2.2/bin/iaca.sh', base_dir + 'bin/iaca2.2') - os.symlink('../v2.3/bin/iaca.sh', base_dir + 'bin/iaca2.3') - os.symlink('../v3.0/iaca', base_dir + 'bin/iaca3.0') - print('export PATH=' + base_dir + 'bin/:$PATH') - - -if __name__ == '__main__': + os.mkdir(base_dir + "bin") + os.symlink("../v2.1/bin/iaca.sh", base_dir + "bin/iaca2.1") + os.symlink("../v2.2/bin/iaca.sh", base_dir + "bin/iaca2.2") + os.symlink("../v2.3/bin/iaca.sh", base_dir + "bin/iaca2.3") + os.symlink("../v3.0/iaca", base_dir + "bin/iaca3.0") + print("export PATH=" + base_dir + "bin/:$PATH") + + +if __name__ == "__main__": main()

kerncraft/cacheprediction.py#L672

Continuation line over-indented for visual indent (E127)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L33

Numbers can either be evenly distributed in a linear space (if *log* is False) or in a log space (if *log* is True). If *log* is True, base is used to define the log space basis. If *endpoint* is True, *stop* will be the last retruned value, as long as *num* >= 2. """ - assert type(start) is int and type(stop) is int and type(num) is int, \ - "start, stop and num need to be intergers" + assert ( + type(start) is int and type(stop) is int and type(num) is int + ), "start, stop and num need to be intergers" assert num >= 2, "num has to be atleast 2" if log: start = math.log(start, base) stop = math.log(stop, base)

kerncraft/cacheprediction.py#L695

Blank line contains whitespace (W293)

kerncraft/cacheprediction.py#L711

Continuation line under-indented for visual indent (E128)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L96

if base is given, the integers are evenly spaced on that base (default: 10) """ def __call__(self, parser, namespace, values, option_string=None): """Execute action.""" - message = '' + message = "" if len(values) != 2: - message = 'requires 2 arguments' + message = "requires 2 arguments" else: - m = re.match(r'(?P<start>\d+)(?:-(?P<stop>\d+)(?::(?P<num>\d+)' - r'(:?(?P<log>log)(:?(?P<base>\d+))?)?)?)?', - values[1]) + m = re.match( + r"(?P<start>\d+)(?:-(?P<stop>\d+)(?::(?P<num>\d+)" + r"(:?(?P<log>log)(:?(?P<base>\d+))?)?)?)?", + values[1], + ) if m: gd = m.groupdict() - if gd['stop'] is None: - values[1] = [int(gd['start'])] - elif gd['num'] is None: - values[1] = list(range(int(gd['start']), int(gd['stop']) + 1)) + if gd["stop"] is None: + values[1] = [int(gd["start"])] + elif gd["num"] is None: + values[1] = list(range(int(gd["start"]), int(gd["stop"]) + 1)) else: - log = gd['log'] is not None - base = int(gd['base']) if gd['base'] is not None else 10 - values[1] = list(space( - int(gd['start']), int(gd['stop']), int(gd['num']), log=log, base=base)) + log = gd["log"] is not None + base = int(gd["base"]) if gd["base"] is not None else 10 + values[1] = list( + space( + int(gd["start"]), + int(gd["stop"]), + int(gd["num"]), + log=log, + base=base, + ) + ) else: - message = 'second argument must match: start[-stop[:num[log[base]]]]' + message = "second argument must match: start[-stop[:num[log[base]]]]" if message: raise argparse.ArgumentError(self, message) if hasattr(namespace, self.dest):

kerncraft/iaca_get.py#L6

're' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L128

setattr(namespace, self.dest, [values]) class VersionAction(argparse.Action): """Reimplementation of the version action, because argparse's version outputs to stderr.""" - def __init__(self, option_strings, version, dest=argparse.SUPPRESS, - default=argparse.SUPPRESS, - help="show program's version number and exit"): + + def __init__( + self, + option_strings, + version, + dest=argparse.SUPPRESS, + default=argparse.SUPPRESS, + help="show program's version number and exit", + ): super(VersionAction, self).__init__( option_strings=option_strings, dest=dest, default=default, nargs=0, - help=help) + help=help, + ) self.version = version def __call__(self, parser, namespace, values, option_string=None): print(parser.prog, self.version) parser.exit() def create_parser(): """Return argparse parser.""" parser = argparse.ArgumentParser( - description='Analytical performance modelling and benchmarking toolkit.', - epilog='For help, examples, documentation and bug reports go to:\nhttps://github.com' - '/RRZE-HPC/kerncraft\nLicense: AGPLv3',) - parser.add_argument('--version', action=VersionAction, version='{}'.format(__version__)) - parser.add_argument('--machine', '-m', type=argparse.FileType('r'), required=True, - help='Path to machine description yaml file.') - parser.add_argument('--pmodel', '-p', choices=models.__all__, required=True, action='append', - default=[], help='Performance model to apply') - parser.add_argument('-D', '--define', nargs=2, metavar=('KEY', 'VALUE'), default=[], - action=AppendStringRange, - help='Define constant to be used in C code. Values must be integer or ' - 'match start-stop[:num[log[base]]]. If range is given, all ' - 'permutation s will be tested. Overwrites constants from testcase ' - 'file. Key can be . for default value for all used constants.') - parser.add_argument('--verbose', '-v', action='count', default=0, - help='Increases verbosity level.') - parser.add_argument('code_file', metavar='FILE', type=argparse.FileType(), - help='File with loop kernel C code') - parser.add_argument('--asm-block', metavar='BLOCK', default='auto', - help='Number of ASM block to mark for IACA, "auto" for automatic ' - 'selection or "manual" for interactiv selection.') - parser.add_argument('--pointer-increment', metavar='INCR', default='auto', type=int_or_str, - help='Increment of store pointer within one ASM block in bytes. If "auto": ' - 'automatic detection, error on failure to detect, if ' - '"auto_with_manual_fallback": fallback to manual input, or if ' - '"manual": always prompt user.') - parser.add_argument('--store', metavar='PICKLE', type=argparse.FileType('a+b'), - help='Addes results to PICKLE file for later processing.') - parser.add_argument('--json', metavar='JSON', type=argparse.FileType('a+'), - help='Stores result as JSON file for later processing') - parser.add_argument('--unit', '-u', choices=['cy/CL', 'cy/It', 'It/s', 'FLOP/s'], - help='Select the output unit, defaults to model specific if not given.') - parser.add_argument('--cores', '-c', metavar='CORES', type=int, default=1, - help='Number of cores to be used in parallel. (default: 1) ' - 'ECM model will consider the scaling of the last level cache and ' - 'predict the overall performance in addition to single-core behavior. ' - 'The benchmark mode will run the code with OpenMP on as many physical ' - 'cores.') - parser.add_argument('--kernel-description', action='store_true', - help='Use kernel description instead of analyzing the kernel code.') - parser.add_argument('--clean-intermediates', action='store_true', - help='If set, will delete all intermediate files after completion.') + description="Analytical performance modelling and benchmarking toolkit.", + epilog="For help, examples, documentation and bug reports go to:\nhttps://github.com" + "/RRZE-HPC/kerncraft\nLicense: AGPLv3", + ) + parser.add_argument( + "--version", action=VersionAction, version="{}".format(__version__) + ) + parser.add_argument( + "--machine", + "-m", + type=argparse.FileType("r"), + required=True, + help="Path to machine description yaml file.", + ) + parser.add_argument( + "--pmodel", + "-p", + choices=models.__all__, + required=True, + action="append", + default=[], + help="Performance model to apply", + ) + parser.add_argument( + "-D", + "--define", + nargs=2, + metavar=("KEY", "VALUE"), + default=[], + action=AppendStringRange, + help="Define constant to be used in C code. Values must be integer or " + "match start-stop[:num[log[base]]]. If range is given, all " + "permutation s will be tested. Overwrites constants from testcase " + "file. Key can be . for default value for all used constants.", + ) + parser.add_argument( + "--verbose", "-v", action="count", default=0, help="Increases verbosity level." + ) + parser.add_argument( + "code_file", + metavar="FILE", + type=argparse.FileType(), + help="File with loop kernel C code", + ) + parser.add_argument( + "--asm-block", + metavar="BLOCK", + default="auto", + help='Number of ASM block to mark for IACA, "auto" for automatic ' + 'selection or "manual" for interactiv selection.', + ) + parser.add_argument( + "--pointer-increment", + metavar="INCR", + default="auto", + type=int_or_str, + help='Increment of store pointer within one ASM block in bytes. If "auto": ' + "automatic detection, error on failure to detect, if " + '"auto_with_manual_fallback": fallback to manual input, or if ' + '"manual": always prompt user.', + ) + parser.add_argument( + "--store", + metavar="PICKLE", + type=argparse.FileType("a+b"), + help="Addes results to PICKLE file for later processing.", + ) + parser.add_argument( + "--json", + metavar="JSON", + type=argparse.FileType("a+"), + help="Stores result as JSON file for later processing", + ) + parser.add_argument( + "--unit", + "-u", + choices=["cy/CL", "cy/It", "It/s", "FLOP/s"], + help="Select the output unit, defaults to model specific if not given.", + ) + parser.add_argument( + "--cores", + "-c", + metavar="CORES", + type=int, + default=1, + help="Number of cores to be used in parallel. (default: 1) " + "ECM model will consider the scaling of the last level cache and " + "predict the overall performance in addition to single-core behavior. " + "The benchmark mode will run the code with OpenMP on as many physical " + "cores.", + ) + parser.add_argument( + "--kernel-description", + action="store_true", + help="Use kernel description instead of analyzing the kernel code.", + ) + parser.add_argument( + "--clean-intermediates", + action="store_true", + help="If set, will delete all intermediate files after completion.", + ) # Needed for ECM, ECMData and RooflineFLOP models: - parser.add_argument('--cache-predictor', '-P', choices=['LC', 'SIM'], default='SIM', - help='Change cache predictor to use, options are LC (layer conditions) and ' - 'SIM (cache simulation with pycachesim), default is SIM.') + parser.add_argument( + "--cache-predictor", + "-P", + choices=["LC", "SIM"], + default="SIM", + help="Change cache predictor to use, options are LC (layer conditions) and " + "SIM (cache simulation with pycachesim), default is SIM.", + ) # Needed for ECM, RooflineASM and Benchmark models: - parser.add_argument('--compiler', '-C', type=str, default=None, - help='Compiler to use, default is first in machine description file.') - parser.add_argument('--compiler-flags', type=str, default=None, - help='Compiler flags to use. If not set, flags are taken from machine ' - 'description file (-std=c99 is always added).') + parser.add_argument( + "--compiler", + "-C", + type=str, + default=None, + help="Compiler to use, default is first in machine description file.", + ) + parser.add_argument( + "--compiler-flags", + type=str, + default=None, + help="Compiler flags to use. If not set, flags are taken from machine " + "description file (-std=c99 is always added).", + ) # Needed for ECM and RooflineASM models: - parser.add_argument('--incore-model', '-i', type=str, default="OSACA", - help='In-core model to use, default is first in machine description file.') + parser.add_argument( + "--incore-model", + "-i", + type=str, + default="OSACA", + help="In-core model to use, default is first in machine description file.", + ) for m in models.__all__: - ag = parser.add_argument_group('arguments for ' + m + ' model', getattr(models, m).name) + ag = parser.add_argument_group( + "arguments for " + m + " model", getattr(models, m).name + ) getattr(models, m).configure_arggroup(ag) return parser def check_arguments(args, parser): """ Check arguments passed by user that are not checked by argparse itself. Also register files for closing. """ - if args.asm_block not in ['auto', 'manual']: + if args.asm_block not in ["auto", "manual"]: try: args.asm_block = int(args.asm_block) except ValueError: parser.error('--asm-block can only be "auto", "manual" or an integer') # Set default unit depending on performance model requested if not args.unit: - if 'RooflineFLOP' in args.pmodel or 'RooflineASM' in args.pmodel or 'RooflineIACA' in args.pmodel: - args.unit = 'FLOP/s' + if ( + "RooflineFLOP" in args.pmodel + or "RooflineASM" in args.pmodel + or "RooflineIACA" in args.pmodel + ): + args.unit = "FLOP/s" else: - args.unit = 'cy/CL' + args.unit = "cy/CL" # Register all opened files for closing at exit. if args.store: atexit.register(args.store.close) if args.json:

kerncraft/iaca_get.py#L16

Line too long (115 > 100 characters) (E501)

kerncraft/iaca_get.py#L17

Line too long (119 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L243

if args.machine: atexit.register(args.machine.close) def to_tuple(x): - '''Transform nested lists (and tuple) in purely nested tuples.''' + """Transform nested lists (and tuple) in purely nested tuples.""" if isinstance(x, (list, tuple)): if len(x) >= 2: return tuple(to_tuple(x[:1]) + to_tuple(x[1:])) elif len(x) == 1: return (to_tuple(x[0]),)

kerncraft/iaca_get.py#L20

Line too long (114 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L261

identifier = [] for k in sorted(args.__dict__): if k in kwargs: identifier.append((k, kwargs[k])) continue - if k in ['verbose', 'store', 'unit', 'clean_intermediates']: + if k in ["verbose", "store", "unit", "clean_intermediates"]: # Ignore these, as they do not change the outcome continue v = args.__dict__[k] if isinstance(v, list): v = to_tuple(v)

kerncraft/iaca_get.py#L21

Line too long (118 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L274

if isinstance(v, io.IOBase): v = v.name identifier.append((k, v)) return tuple(identifier) + def jsonify_obj(obj): - #print("jsonify {}".format(str(obj) if len(str(obj)) < 15 else str(obj)[:12] + "...")) + # print("jsonify {}".format(str(obj) if len(str(obj)) < 15 else str(obj)[:12] + "...")) # if obj is str, int, or float, keep it this way if isinstance(obj, str) or isinstance(obj, int) or isinstance(obj, float): return obj # if obj is list, use recursion elif isinstance(obj, list) or isinstance(obj, tuple):

kerncraft/iaca_get.py#L24

Line too long (114 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L290

return tuple(new_list) return new_list # if obj is dict, use recursion elif isinstance(obj, dict): new_dict = {} - for k,v in obj.items(): + for k, v in obj.items(): # key must be one element - k = str(k) if not (isinstance(k, str) or isinstance(k, int) or isinstance(k, float)) else k + k = ( + str(k) + if not ( + isinstance(k, str) or isinstance(k, int) or isinstance(k, float) + ) + else k + ) new_dict[k] = jsonify_obj(v) return new_dict else: return str(obj)

kerncraft/iaca_get.py#L25

Line too long (118 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L321

# process kernel if not args.kernel_description: code = str(args.code_file.read()) args.code_file.close() code = clean_code(code) - kernel = KernelCode(code, filename=args.code_file.name, machine=machine, - keep_intermediates=not args.clean_intermediates) + kernel = KernelCode( + code, + filename=args.code_file.name, + machine=machine, + keep_intermediates=not args.clean_intermediates, + ) else: description = str(args.code_file.read()) args.code_file.close() - yaml = ruamel.yaml.YAML(typ='unsafe') + yaml = ruamel.yaml.YAML(typ="unsafe") yaml.register_class(PrefixedUnit) kernel = KernelDescription(yaml.load(description), machine=machine) - loop_indices = set([symbol_pos_int(l['index']) for l in kernel.get_loop_stack()]) + loop_indices = set([symbol_pos_int(l["index"]) for l in kernel.get_loop_stack()]) # define constants required_consts = [v[1] for v in kernel.variables.values() if v[1] is not None] - required_consts += [[l['start'], l['stop']] for l in kernel.get_loop_stack()] + required_consts += [[l["start"], l["stop"]] for l in kernel.get_loop_stack()] required_consts += [i for a in kernel.sources.values() for i in a if i is not None] - required_consts += [i for a in kernel.destinations.values() for i in a if i is not None] + required_consts += [ + i for a in kernel.destinations.values() for i in a if i is not None + ] # split into individual consts required_consts = [i for l in required_consts for i in l] required_consts = set([i for l in required_consts for i in l.free_symbols]) # remove loop indices required_consts -= loop_indices

kerncraft/iaca_get.py#L28

Line too long (116 > 100 characters) (E501)

kerncraft/iaca_get.py#L29

Line too long (118 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L347

if len(required_consts) > 0: # build defines permutations define_dict = OrderedDict() args.define.sort() # Prefill with default value, if any is given - if '.' in [n for n,v in args.define]: - default_const_values = dict(args.define)['.'] + if "." in [n for n, v in args.define]: + default_const_values = dict(args.define)["."] for name in required_consts: name = str(name) define_dict[str(name)] = [[str(name), v] for v in default_const_values] for name, values in args.define: if name not in [str(n) for n in required_consts]:

kerncraft/iaca_get.py#L33

Expected 2 blank lines, found 1 (E302)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L364

for v in values: if v not in define_dict[name]: define_dict[name].append([name, v]) define_product = list(itertools.product(*list(define_dict.values()))) # Check that all consts have been defined - if set(required_consts).difference(set([symbol_pos_int(k) for k in define_dict.keys()])): - raise ValueError("Not all constants have been defined. Required are: {}".format( - required_consts)) + if set(required_consts).difference( + set([symbol_pos_int(k) for k in define_dict.keys()]) + ): + raise ValueError( + "Not all constants have been defined. Required are: {}".format( + required_consts + ) + ) else: define_product = [{}] for define in define_product: # Reset state of kernel

kerncraft/incore_model.py#L8

'copy.copy' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L380

for k, v in define: kernel.set_constant(k, v) for model_name in uniquify(args.pmodel): # print header - print('{:^80}'.format(' kerncraft '), file=output_file) - print('{:<40}{:>40}'.format(args.code_file.name, '-m ' + args.machine.name), - file=output_file) - print(' '.join(['-D {} {}'.format(k, v) for k, v in define]), file=output_file) - print('{:-^80}'.format(' ' + model_name + ' '), file=output_file) + print("{:^80}".format(" kerncraft "), file=output_file) + print( + "{:<40}{:>40}".format(args.code_file.name, "-m " + args.machine.name), + file=output_file, + ) + print( + " ".join(["-D {} {}".format(k, v) for k, v in define]), file=output_file + ) + print("{:-^80}".format(" " + model_name + " "), file=output_file) if args.verbose > 1: if not args.kernel_description: kernel.print_kernel_code(output_file=output_file) - print('', file=output_file) + print("", file=output_file) kernel.print_variables_info(output_file=output_file) kernel.print_kernel_info(output_file=output_file) if args.verbose > 0: kernel.print_constants_info(output_file=output_file)

kerncraft/incore_model.py#L10

'pprint.pformat' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L402

model.analyze() model.report(output_file=output_file) # Add results to storage result_identifier = identifier_from_arguments( - args, define=to_tuple(define), pmodel=model_name) + args, define=to_tuple(define), pmodel=model_name + ) result_storage[result_identifier] = model.results - print('', file=output_file) + print("", file=output_file) # Save storage to file (if requested) if args.store: - temp_name = args.store.name + '.tmp' - with open(temp_name, 'wb+') as f: + temp_name = args.store.name + ".tmp" + with open(temp_name, "wb+") as f: pickle.dump(result_storage, f) shutil.move(temp_name, args.store.name) if args.json: - temp_name = args.json.name + '.tmp' + temp_name = args.json.name + ".tmp" json_dict = jsonify_obj(result_storage) - with open(temp_name, 'w+') as f: + with open(temp_name, "w+") as f: json.dump(json_dict, f, indent=4) shutil.move(temp_name, args.json.name) return result_storage

kerncraft/incore_model.py#L10

'pprint.pprint' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/kerncraft.py#L437

# BUSINESS LOGIC IS FOLLOWING run(parser, args) -if __name__ == '__main__': +if __name__ == "__main__": main()

kerncraft/incore_model.py#L12

'textwrap' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L22

def uneven_tuple_cmp(a, b): length_diff = max(len(a), len(b)) - min(len(a), len(b)) if len(a) < len(b): - a = (0,)*length_diff + a + a = (0,) * length_diff + a elif len(b) < len(a): - b = (0,)*length_diff + b + b = (0,) * length_diff + b if a > b: return 1 elif a < b: return -1 else:

kerncraft/incore_model.py#L14

'io' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L62

e = e.expand() key = [] # split into terms terms, gens = e.as_terms() - assert gens == [first_s] or first_s is None and gens == [], \ - "Expression was split into unusable terms: {}, expected.".format(gens, first_s) + assert ( + gens == [first_s] or first_s is None and gens == [] + ), "Expression was split into unusable terms: {}, expected.".format(gens, first_s) # extract exponent and coefficient for term, (coeff, cpart, ncpart) in terms: coeff_real, coeff_imag = coeff assert coeff_imag == 0, "Not supporting imaginary coefficients." # Sort order: exponent (cpart), factor

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L75

key[0] = (key[0][0], key[0][1]) # build key key.sort(reverse=True) # add missing exponent, coefficient tuples i = 0 - for exponent in reversed(range(key[0][0]+1)): + for exponent in reversed(range(key[0][0] + 1)): if len(key) > i and key[i][0] == exponent: i += 1 continue else: key[i:i] = [(exponent, 0.0)]

kerncraft/incore_model.py#L22

'osaca.semantics.MachineModel' imported but unused (F401)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L110

return 0 if isinstance(dimension_factor, sympy.Symbol): return 1 # Replace all free symbols with one: if not dimension_factor.free_symbols: - raise ValueError("dimension_factor is neither a number, a symbol nor an expression based " - "on symbols.") + raise ValueError( + "dimension_factor is neither a number, a symbol nor an expression based " + "on symbols." + ) free_symbols = list(dimension_factor.free_symbols) for s in free_symbols[1:]: dimension_factor = dimension_factor.subs(s, free_symbols[0]) if isinstance(dimension_factor, sympy.Pow): return dimension_factor.as_base_exp()[1]

kerncraft/incore_model.py#L33

Indentation is not a multiple of 4 (E111)

kerncraft/incore_model.py#L117

Continuation line under-indented for visual indent (E128)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L172

if c: coefficients[factor_idx] += c break # Test: reassemble original expression - if expr != reduce(operator.add, [c*f for c, f in zip(coefficients, dimension_factors)], 0): - raise ValueError("Unable to split expression and reproduce from coefficients and factors: " - "{!r} with {!r}".format(terms, dimension_factors)) + if expr != reduce( + operator.add, [c * f for c, f in zip(coefficients, dimension_factors)], 0 + ): + raise ValueError( + "Unable to split expression and reproduce from coefficients and factors: " + "{!r} with {!r}".format(terms, dimension_factors) + ) return tuple(coefficients) def canonical_relational(rel):

kerncraft/incore_model.py#L160

Line too long (113 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L260

def __init__(self, kernel, machine, cores=1, symbolic=False): """Initialize layer condition based predictor from kernel and machine object.""" CachePredictor.__init__(self, kernel, machine, cores=cores) if isinstance(kernel, KernelCode): # Make use of caching for symbolic LC representation: - file_name = 'LC_analysis.pickle.lzma' + file_name = "LC_analysis.pickle.lzma" file_path = kernel.get_intermediate_location( - file_name, machine_and_compiler_dependent=False, other_dependencies=[str(cores)]) + file_name, + machine_and_compiler_dependent=False, + other_dependencies=[str(cores)], + ) lock_mode, lock_fp = kernel.lock_intermediate(file_path) if lock_mode == fcntl.LOCK_SH: # use cache self.results = compress_pickle.load(file_path) lock_fp.close() # release lock

kerncraft/incore_model.py#L169

Line too long (113 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L282

if not symbolic: self.desymbolize() def desymbolize(self): """Evaluate LCs and remove symbols""" - for i, options in enumerate(self.results['cache']): + for i, options in enumerate(self.results["cache"]): for o in options: - if self.kernel.subs_consts(o['condition']): - self.results['cache'][i] = o + if self.kernel.subs_consts(o["condition"]): + self.results["cache"][i] = o break def build_symbolic_LCs(self): # check that layer conditions can be applied on this kernel: # 1. All iterations may only have a step width of 1 loop_stack = list(self.kernel.get_loop_stack()) - if any([l['increment'] != 1 for l in loop_stack]): - raise ValueError("Can not apply layer condition, since not all loops are of step " - "length 1.") + if any([l["increment"] != 1 for l in loop_stack]): + raise ValueError( + "Can not apply layer condition, since not all loops are of step " + "length 1." + ) # 2. The order of iterations must be reflected in the order of indices in all array # references containing the inner loop index. If the inner loop index is not part of the # reference, the reference is simply ignored - index_order = [symbol_pos_int(l['index']) for l in loop_stack] - for var_name, arefs in chain(self.kernel.sources.items(), self.kernel.destinations.items()): + index_order = [symbol_pos_int(l["index"]) for l in loop_stack] + for var_name, arefs in chain( + self.kernel.sources.items(), self.kernel.destinations.items() + ): try: if next(iter(arefs)) is None: # Anything that is a scalar may be ignored continue except StopIteration:

kerncraft/incore_model.py#L170

Multiple spaces after keyword (E271)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L319

# Terms without any indices can be treat as constant offsets and are acceptable if not idx: continue if len(idx) != 1: - raise ValueError("Only one loop counter may appear per term. " - "Problematic term: {}.".format(t)) + raise ValueError( + "Only one loop counter may appear per term. " + "Problematic term: {}.".format(t) + ) else: # len(idx) == 1 idx = idx.pop() # Check that number of multiplication match access order of iterator - pow_dict = {k: v for k, v in t.as_powers_dict().items() - if k != idx} + pow_dict = { + k: v for k, v in t.as_powers_dict().items() if k != idx + } stride_dim = sum(pow_dict.values()) error = False try: - if loop_stack[-stride_dim-1]['index'] != idx.name: + if loop_stack[-stride_dim - 1]["index"] != idx.name: error = True except IndexError: error = True if error: - raise ValueError("Number of multiplications in index term does not " - "match loop counter order. " - "Problematic term: {}.".format(t)) + raise ValueError( + "Number of multiplications in index term does not " + "match loop counter order. " + "Problematic term: {}.".format(t) + ) # 3. Indices may only increase with one - inner_index = symbol_pos_int(loop_stack[-1]['index']) - inner_increment = loop_stack[-1]['increment'] - for aref in chain(chain(*self.kernel.sources.values()), - chain(*self.kernel.destinations.values())): + inner_index = symbol_pos_int(loop_stack[-1]["index"]) + inner_increment = loop_stack[-1]["increment"] + for aref in chain( + chain(*self.kernel.sources.values()), + chain(*self.kernel.destinations.values()), + ): if aref is None: continue for expr in aref: - diff = expr.subs(inner_index, 1+inner_increment) - expr.subs(inner_index, 1) + diff = expr.subs(inner_index, 1 + inner_increment) - expr.subs( + inner_index, 1 + ) if diff != 0 and diff != 1: # TODO support -1 aswell - raise ValueError("Can not apply layer condition, array references may not " - "increment more then one per iteration.") + raise ValueError( + "Can not apply layer condition, array references may not " + "increment more then one per iteration." + ) # FIXME handle multiple datatypes element_size = self.kernel.datatypes_size[self.kernel.datatype] indices = list([symbol_pos_int(l[0]) for l in self.kernel._loop_stack]) sympy_accesses = self.kernel.compile_sympy_accesses()

kerncraft/incore_model.py#L177

Multiple spaces before operator (E221)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L369

array_dimensions = self.kernel.variables[var_name][1] # Skipping scalars if array_dimensions is None: continue for dimension in range(len(array_dimensions)): - dimension_factors.append(reduce(operator.mul, array_dimensions[dimension+1:], - sympy.Integer(1))) + dimension_factors.append( + reduce( + operator.mul, + array_dimensions[dimension + 1 :], + sympy.Integer(1), + ) + ) for a in sympy_accesses[var_name]: o = split_sympy_access_in_dim_offset(a, dimension_factors) accesses[var_name].append(o) # Skip non-variable offsets, where acs is [None, None, None] (or similar) or only made # up from constant offsets if not any(accesses[var_name]) or not any( - [a == inner_index or a.coeff(inner_index) != 0 - for a in chain.from_iterable(accesses[var_name])]): + [ + a == inner_index or a.coeff(inner_index) != 0 + for a in chain.from_iterable(accesses[var_name]) + ] + ): continue destinations.update( - [(var_name, tuple(r)) for r in self.kernel.destinations.get(var_name, [])]) + [ + (var_name, tuple(r)) + for r in self.kernel.destinations.get(var_name, []) + ] + ) acs = list(accesses[var_name]) # If accesses are of unequal length, pad with leading zero elements max_dims = max(map(len, acs)) for i in range(len(acs)): if len(acs[i]) < max_dims: - acs[i] = (sympy.Integer(0),)*(max_dims-len(acs[i])) + acs[i] + acs[i] = (sympy.Integer(0),) * (max_dims - len(acs[i])) + acs[i] # Sort accesses by decreasing order acs.sort(reverse=True) # Transform back into sympy expressions for i in range(len(acs)): - acs[i] = reduce(sympy.Add, [f*df for f, df in zip(acs[i], dimension_factors)]) + acs[i] = reduce( + sympy.Add, [f * df for f, df in zip(acs[i], dimension_factors)] + ) # Create reuse distances by substracting accesses pairwise in decreasing order - distances += [(acs[i-1]-acs[i]).simplify() for i in range(1, len(acs))] + distances += [(acs[i - 1] - acs[i]).simplify() for i in range(1, len(acs))] # Add infinity for each array distances.append(sympy.oo) # Sort distances by decreasing order distances.sort(reverse=True, key=sympy_expr_abs_distance_key) # Create copy of distances in bytes: - distances_bytes = [d*element_size for d in distances] + distances_bytes = [d * element_size for d in distances] # CAREFUL! From here on we are working in byte offsets and not in indices anymore. # converting access sets to lists, otherwise pprint will fail during obligatory sorting step - results = {'accesses': {k: sorted(list(v), key=cmp_to_key(uneven_tuple_cmp)) - for k,v in accesses.items()}, - 'distances': distances, - 'destinations': destinations, - 'distances_bytes': distances_bytes, - 'cache': []} - - sum_array_sizes = sum(self.kernel.array_sizes(in_bytes=True, subs_consts=False).values()) + results = { + "accesses": { + k: sorted(list(v), key=cmp_to_key(uneven_tuple_cmp)) + for k, v in accesses.items() + }, + "distances": distances, + "destinations": destinations, + "distances_bytes": distances_bytes, + "cache": [], + } + + sum_array_sizes = sum( + self.kernel.array_sizes(in_bytes=True, subs_consts=False).values() + ) for c in self.machine.get_cachesim(self.cores).levels(with_mem=False): # Assuming increasing order of cache sizes options = [] # Full caching - options.append({ - 'condition': canonical_relational(c.size() > sum_array_sizes), - 'hits': len(distances), - 'misses': 0, - 'evicts': 0, - 'tail': sympy.oo, - }) - - for tail in sorted(set([d.simplify().expand() for d in distances_bytes]), reverse=True, - key=sympy_expr_abs_distance_key): + options.append( + { + "condition": canonical_relational(c.size() > sum_array_sizes), + "hits": len(distances), + "misses": 0, + "evicts": 0, + "tail": sympy.oo, + } + ) + + for tail in sorted( + set([d.simplify().expand() for d in distances_bytes]), + reverse=True, + key=sympy_expr_abs_distance_key, + ): # Assuming decreasing order of tails # Ignoring infinity tail: if tail is sympy.oo: continue cache_requirement = ( # Sum of inter-access caches - sum([d for d in distances_bytes - if sympy_expr_abs_distance_key(d) <= sympy_expr_abs_distance_key(tail)] - ) + + sum( + [ + d + for d in distances_bytes + if sympy_expr_abs_distance_key(d) + <= sympy_expr_abs_distance_key(tail) + ] + ) + + # Tails - tail*len([d for d in distances_bytes - if sympy_expr_abs_distance_key(d) > - sympy_expr_abs_distance_key(tail)])) + tail + * len( + [ + d + for d in distances_bytes + if sympy_expr_abs_distance_key(d) + > sympy_expr_abs_distance_key(tail) + ] + ) + ) condition = canonical_relational(cache_requirement <= c.size()) hits = len( - [d for d in distances_bytes - if sympy_expr_abs_distance_key(d) <= sympy_expr_abs_distance_key(tail)]) + [ + d + for d in distances_bytes + if sympy_expr_abs_distance_key(d) + <= sympy_expr_abs_distance_key(tail) + ] + ) misses = len( - [d for d in distances_bytes - if sympy_expr_abs_distance_key(d) > sympy_expr_abs_distance_key(tail)]) + [ + d + for d in distances_bytes + if sympy_expr_abs_distance_key(d) + > sympy_expr_abs_distance_key(tail) + ] + ) # Resulting analysis - options.append({ - 'condition': condition, - 'hits': hits, - 'misses': misses, - 'evicts': len(destinations), - 'tail': tail}) + options.append( + { + "condition": condition, + "hits": hits, + "misses": misses, + "evicts": len(destinations), + "tail": tail, + } + ) # If we encountered a True condition, break to not include multiple such. if isinstance(condition, BooleanTrue): break - if not isinstance(options[-1]['condition'], BooleanTrue): + if not isinstance(options[-1]["condition"], BooleanTrue): # Fallback: no condition matched - options.append({ - 'condition': True, - 'hits': 0, - 'misses': len(distances), - 'evicts': len(destinations), - 'tail': 0 - }) - - results['cache'].append(options) + options.append( + { + "condition": True, + "hits": 0, + "misses": len(distances), + "evicts": len(destinations), + "tail": 0, + } + ) + + results["cache"].append(options) self.results = results def get_loads(self): """Return a list with number of loaded cache lines per memory hierarchy level.""" # TODO FIXME L1 loads need to be derived from accesses - return [float('nan')]+[c['misses'] for c in self.results['cache']] + return [float("nan")] + [c["misses"] for c in self.results["cache"]] def get_hits(self): """Return a list with number of hit cache lines per memory hierarchy level.""" # At last level, all previous misses are hits - return [c['hits'] for c in self.results['cache']]+[self.results['cache'][-1]['misses']] + return [c["hits"] for c in self.results["cache"]] + [ + self.results["cache"][-1]["misses"] + ] def get_misses(self): """Return a list with number of missed cache lines per memory hierarchy level.""" # At last level, there are no misses - return [c['misses'] for c in self.results['cache']]+[0] + return [c["misses"] for c in self.results["cache"]] + [0] def get_stores(self): """Return a list with number of stored cache lines per memory hierarchy level.""" # TODO FIXME L1 stores need to be derived from accesses - return [float('nan')]+[c['evicts'] for c in self.results['cache']] + return [float("nan")] + [c["evicts"] for c in self.results["cache"]] def get_evicts(self): """Return a list with number of evicted cache lines per memory hierarchy level.""" # At last level, there are no evicts - return [c['evicts'] for c in self.results['cache']]+[0] + return [c["evicts"] for c in self.results["cache"]] + [0] def get_infos(self): """Return verbose information about the predictor.""" return self.results class CacheSimulationPredictor(CachePredictor): """Predictor class based on layer condition analysis.""" + def __init__(self, kernel, machine, cores=1): """Initialize cache simulation based predictor from kernel and machine object.""" CachePredictor.__init__(self, kernel, machine, cores) if isinstance(kernel, KernelCode): # Make use of caching for symbolic LC representation: - file_name = 'CSIM_analysis.pickle.lzma' + file_name = "CSIM_analysis.pickle.lzma" file_path = kernel.get_intermediate_location( - file_name, machine_and_compiler_dependent=False, - other_dependencies=[str(cores)]+[str(t) for t in self.kernel.constants.items()]) + file_name, + machine_and_compiler_dependent=False, + other_dependencies=[str(cores)] + + [str(t) for t in self.kernel.constants.items()], + ) lock_mode, lock_fp = kernel.lock_intermediate(file_path) if lock_mode == fcntl.LOCK_SH: # use cache cache = compress_pickle.load(file_path) lock_fp.close() # release lock - self.first_dim_factor = cache['first_dim_factor'] - self.stats = cache['stats'] - self.pretty_stats = cache['pretty_stats'] + self.first_dim_factor = cache["first_dim_factor"] + self.stats = cache["stats"] + self.pretty_stats = cache["pretty_stats"] else: # lock_mode == fcntl.LOCK_EX # needs update self.simulate() compress_pickle.dump( - {'first_dim_factor': self.first_dim_factor, 'stats': self.stats, 'pretty_stats': self.pretty_stats}, - file_path) + { + "first_dim_factor": self.first_dim_factor, + "stats": self.stats, + "pretty_stats": self.pretty_stats, + }, + file_path, + ) lock_fp.close() # release lock else: # No caching support without filename for kernel code self.simulate()

kerncraft/incore_model.py#L185

Multiple spaces before operator (E221)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L542

# Get the machine's cache model and simulator self.csim = self.machine.get_cachesim(self.cores) # FIXME handle multiple datatypes element_size = self.kernel.datatypes_size[self.kernel.datatype] - cacheline_size = self.machine['cacheline size'] + cacheline_size = self.machine["cacheline size"] elements_per_cacheline = int(cacheline_size // element_size) - iterations_per_cacheline = (sympy.Integer(self.machine['cacheline size']) / - sympy.Integer(self.kernel.bytes_per_iteration)) + iterations_per_cacheline = sympy.Integer( + self.machine["cacheline size"] + ) / sympy.Integer(self.kernel.bytes_per_iteration) # Gathering some loop information: inner_loop = list(self.kernel.get_loop_stack(subs_consts=True))[-1] - inner_index = symbol_pos_int(inner_loop['index']) - inner_increment = inner_loop['increment'] # Calculate the number of iterations for warm-up + inner_index = symbol_pos_int(inner_loop["index"]) + inner_increment = inner_loop[ + "increment" + ] # Calculate the number of iterations for warm-up total_length = self.kernel.iteration_length() max_iterations = self.kernel.subs_consts(total_length) max_cache_size = sum([c.size() for c in self.csim.levels(with_mem=False)]) - # Warmup # Phase 1: # define warmup interval boundaries max_steps = 100

kerncraft/incore_model.py#L257

Block comment should start with '# ' (E265)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L574

warmup_iteration = max_iterations complete_sweep = True # print("warmup_iteration1", warmup_iteration) offsets = self.kernel.compile_global_offsets( - iteration=range(prev_warmup_iteration, warmup_iteration)) + iteration=range(prev_warmup_iteration, warmup_iteration) + ) self.csim.loadstore(offsets, length=element_size) invalid_entries = self.csim.count_invalid_entries() # TODO more intelligent break criteria based on change of invalid entries might be # useful for early termination. # print("invalid_entries", invalid_entries)

kerncraft/incore_model.py#L257

Line too long (120 > 100 characters) (E501)

kerncraft/incore_model.py#L264

Blank line contains whitespace (W293)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L597

complete_sweep = True prev_warmup_iteration = warmup_iteration warmup_iteration = max_iterations # print("warmup_iteration2", warmup_iteration, end="; ") offsets = self.kernel.compile_global_offsets( - iteration=range(prev_warmup_iteration, warmup_iteration)) + iteration=range(prev_warmup_iteration, warmup_iteration) + ) self.csim.loadstore(offsets, length=element_size) warmup_iteration = 0 if not complete_sweep and invalid_entries > 0: - print("Warning: Unable to perform complete sweep nor initialize cache completely. " - "This might introduce inaccuracies (additional cache misses) in the cache " - "prediction.") + print( + "Warning: Unable to perform complete sweep nor initialize cache completely. " + "This might introduce inaccuracies (additional cache misses) in the cache " + "prediction." + ) # Phase 3: # Iterate to safe handover point prev_warmup_iteration = warmup_iteration - warmup_iteration = self._align_iteration_with_cl_boundary(warmup_iteration, subtract=False) + warmup_iteration = self._align_iteration_with_cl_boundary( + warmup_iteration, subtract=False + ) if warmup_iteration != prev_warmup_iteration: # print("warmup_iteration3", warmup_iteration) offsets = self.kernel.compile_global_offsets( - iteration=range(prev_warmup_iteration, warmup_iteration)) + iteration=range(prev_warmup_iteration, warmup_iteration) + ) self.csim.loadstore(offsets, length=element_size) # Reset stats to conclude warm-up phase self.csim.reset_stats() # Benchmark - bench_iteration = self._align_iteration_with_cl_boundary(min( - warmup_iteration + 100000, max_iterations - 1)) + bench_iteration = self._align_iteration_with_cl_boundary( + min(warmup_iteration + 100000, max_iterations - 1) + ) # print("bench_iteration", bench_iteration) - first_dim_factor = float((bench_iteration - warmup_iteration) / iterations_per_cacheline) + first_dim_factor = float( + (bench_iteration - warmup_iteration) / iterations_per_cacheline + ) # If end point is less than 100 cacheline away, warn user of inaccuracy if not complete_sweep and first_dim_factor < 1000: - print("Warning: benchmark iterations are very low ({} CL). This may lead to inaccurate " - "cache predictions.".format(first_dim_factor)) + print( + "Warning: benchmark iterations are very low ({} CL). This may lead to inaccurate " + "cache predictions.".format(first_dim_factor) + ) # Compile access needed for one cache-line offsets = self.kernel.compile_global_offsets( - iteration=range(warmup_iteration, bench_iteration)) + iteration=range(warmup_iteration, bench_iteration) + ) # Run cache simulation self.csim.loadstore(offsets, length=element_size) # FIXME compile_global_offsets should already expand to element_size # use stats to build results

kerncraft/incore_model.py#L283

Ambiguous variable name 'l' (E741)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L644

self.csim.print_stats(file=sio) pretty_stats = sio.getvalue() sio.close() self.pretty_stats = pretty_stats - def _align_iteration_with_cl_boundary(self, iteration, subtract=True): """Align iteration with cacheline boundary.""" # FIXME handle multiple datatypes element_size = self.kernel.datatypes_size[self.kernel.datatype] - cacheline_size = self.machine['cacheline size'] + cacheline_size = self.machine["cacheline size"] elements_per_cacheline = int(cacheline_size // element_size) # Gathering some loop information: inner_loop = list(self.kernel.get_loop_stack(subs_consts=True))[-1] - inner_increment = inner_loop['increment'] + inner_increment = inner_loop["increment"] # do this by aligning either writes (preferred) or reads # Assumption: writes (and reads) increase linearly o = self.kernel.compile_global_offsets(iteration=iteration)[0] if len(o[1]):

kerncraft/incore_model.py#L286

Block comment should start with '# ' (E265)

/home/runner/work/kerncraft/kerncraft/kerncraft/cacheprediction.py#L666

first_offset = min(o[1]) else: # we use reads first_offset = min(o[0]) - diff = first_offset - \ - (int(first_offset) >> self.csim.first_level.cl_bits << self.csim.first_level.cl_bits) + diff = first_offset - ( + int(first_offset) + >> self.csim.first_level.cl_bits + << self.csim.first_level.cl_bits + ) if diff == 0: return int(iteration) elif subtract: return int(iteration - (diff // element_size) // inner_increment) else: - return int(iteration + (elements_per_cacheline - diff // element_size) - // inner_increment) + return int( + iteration + + (elements_per_cacheline - diff // element_size) // inner_increment + ) def get_loads(self): """Return a list with number of loaded cache lines per memory hierarchy level.""" - return [self.stats[cache_level]['LOAD_count'] / self.first_dim_factor - for cache_level in range(len(self.machine['memory hierarchy']))] + return [ + self.stats[cache_level]["LOAD_count"] / self.first_dim_factor + for cache_level in range(len(self.machine["memory hierarchy"])) + ] def get_hits(self): """Return a list with number of hit cache lines per memory hierarchy level.""" - return [self.stats[cache_level]['HIT_count']/self.first_dim_factor - for cache_level in range(len(self.machine['memory hierarchy']))] + return [ + self.stats[cache_level]["HIT_count"] / self.first_dim_factor + for cache_level in range(len(self.machine["memory hierarchy"])) + ] def get_misses(self): """Return a list with number of missed cache lines per memory hierarchy level.""" - return [self.stats[cache_level]['MISS_count']/self.first_dim_factor - for cache_level in range(len(self.machine['memory hierarchy']))] - + return [ + self.stats[cache_level]["MISS_count"] / self.first_dim_factor + for cache_level in range(len(self.machine["memory hierarchy"])) + ] + def get_stores(self): """Return a list with number of stored cache lines per memory hierarchy level.""" - return [self.stats[cache_level]['STORE_count']/self.first_dim_factor - for cache_level in range(len(self.machine['memory hierarchy']))] + return [ + self.stats[cache_level]["STORE_count"] / self.first_dim_factor + for cache_level in range(len(self.machine["memory hierarchy"])) + ] def get_evicts(self): """Return a list with number of evicted cache lines per memory hierarchy level.""" - return [self.stats[cache_level]['EVICT_count']/self.first_dim_factor - for cache_level in range(len(self.machine['memory hierarchy']))] + return [ + self.stats[cache_level]["EVICT_count"] / self.first_dim_factor + for cache_level in range(len(self.machine["memory hierarchy"])) + ] def get_infos(self): """Return verbose information about the predictor.""" first_dim_factor = self.first_dim_factor - infos = {'memory hierarchy': [], 'cache stats': self.stats, - 'cachelines in stats': first_dim_factor, - 'cache pretty output': self.pretty_stats} - for cache_level, cache_info in list(enumerate(self.machine['memory hierarchy'])): - infos['memory hierarchy'].append({ - 'index': len(infos['memory hierarchy']), - 'level': '{}'.format(cache_info['level']), - 'total loads': self.stats[cache_level]['LOAD_byte']/first_dim_factor, - 'total misses': self.stats[cache_level]['MISS_byte']/first_dim_factor, - 'total hits': self.stats[cache_level]['HIT_byte']/first_dim_factor, - 'total stores': self.stats[cache_level]['STORE_byte']/first_dim_factor, - 'total evicts': self.stats[cache_level]['EVICT_byte']/first_dim_factor, - 'total lines load': self.stats[cache_level]['LOAD_count']/first_dim_factor, - 'total lines misses': self.stats[cache_level]['MISS_count']/first_dim_factor, - 'total lines hits': self.stats[cache_level]['HIT_count']/first_dim_factor, - 'total lines stores': self.stats[cache_level]['STORE_count']/first_dim_factor, - 'total lines evicts': self.stats[cache_level]['EVICT_count']/first_dim_factor, - 'cycles': None, - }) + infos = { + "memory hierarchy": [], + "cache stats": self.stats, + "cachelines in stats": first_dim_factor, + "cache pretty output": self.pretty_stats, + } + for cache_level, cache_info in list( + enumerate(self.machine["memory hierarchy"]) + ): + infos["memory hierarchy"].append( + { + "index": len(infos["memory hierarchy"]), + "level": "{}".format(cache_info["level"]), + "total loads": self.stats[cache_level]["LOAD_byte"] + / first_dim_factor, + "total misses": self.stats[cache_level]["MISS_byte"] + / first_dim_factor, + "total hits": self.stats[cache_level]["HIT_byte"] + / first_dim_factor, + "total stores": self.stats[cache_level]["STORE_byte"] + / first_dim_factor, + "total evicts": self.stats[cache_level]["EVICT_byte"] + / first_dim_factor, + "total lines load": self.stats[cache_level]["LOAD_count"] + / first_dim_factor, + "total lines misses": self.stats[cache_level]["MISS_count"] + / first_dim_factor, + "total lines hits": self.stats[cache_level]["HIT_count"] + / first_dim_factor, + "total lines stores": self.stats[cache_level]["STORE_count"] + / first_dim_factor, + "total lines evicts": self.stats[cache_level]["EVICT_count"] + / first_dim_factor, + "cycles": None, + } + ) return infos

kerncraft/incore_model.py#L287

Line too long (104 > 100 characters) (E501)

/home/runner/work/kerncraft/kerncraft/kerncraft/models/__init__.py#L10

from .layer_condition import LC from .base import PerformanceModel RooflineIACA = RooflineASM # for downward compatability -__all__ = ['ECM', 'ECMData', 'ECMCPU', 'RooflineFLOP', 'RooflineASM', 'Benchmark', 'LC', - 'PerformanceModel', 'RooflineIACA'] +__all__ = [ + "ECM", + "ECMData", + "ECMCPU", + "RooflineFLOP", + "RooflineASM", + "Benchmark", + "LC", + "PerformanceModel", + "RooflineIACA", +]

/home/runner/work/kerncraft/kerncraft/kerncraft/models/base.py#L1

#!/usr/bin/env python3 """Performance model base class.""" + class PerformanceModel: """Base class for performance models""" + # The name of the performance model (no abreviatation) name = "performance-model name" @classmethod def configure_arggroup(cls, parser):

/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L28

from kerncraft import iaca_get, __version__ def itemsEqual(lst): - return lst[1:] == lst[:-1] + return lst[1:] == lst[:-1] class IncoreModel: - def __init__(self, isa='x86'): + def __init__(self, isa="x86"): isa class IACA(IncoreModel): pass

/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L50

pass class ISA: @staticmethod - def get_isa(isa='x86'): - if isa.lower() == 'x86': + def get_isa(isa="x86"): + if isa.lower() == "x86": return x86 - elif isa.lower() == 'aarch64': + elif isa.lower() == "aarch64": return AArch64 @staticmethod def compute_block_metric(block): """Compute sortable metric to rank blocks."""

/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L93

class x86(ISA): @staticmethod def compute_block_metric(block): """Return comparable metric on block information.""" - register_class_usage = {'zmm': [], 'ymm': [], 'xmm': []} + register_class_usage = {"zmm": [], "ymm": [], "xmm": []} packed_instruction_ctr, avx_instruction_ctr, instruction_ctr = 0, 0, 0 # Analyze code to determine metric for line in block: # Skip non-instruction lines (e.g., comments) if line.instruction is None:

/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L111

if isinstance(op, RegisterOperand): if op.name.startswith(prefix): register_class_usage[prefix].append(op.name) # Identify and count packed and avx instructions - if re.match(r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]", - line.instruction): - if line.instruction.startswith('v'): + if re.match( + r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]", + line.instruction, + ): + if line.instruction.startswith("v"): avx_instruction_ctr += 1 packed_instruction_ctr += 1 # Build metric - return (packed_instruction_ctr, avx_instruction_ctr, - len(set(register_class_usage['zmm'])), - len(set(register_class_usage['ymm'])), - len(set(register_class_usage['xmm'])), - instruction_ctr) + return ( + packed_instruction_ctr, + avx_instruction_ctr, + len(set(register_class_usage["zmm"])), + len(set(register_class_usage["ymm"])), + len(set(register_class_usage["xmm"])), + instruction_ctr, + ) @staticmethod def get_pointer_increment(block): """Return pointer increment.""" increments = {}

/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L138

# Skip non-instruction lines (e.g., comments) if line.instruction is None: continue # Extract destination references, ignoring var(%rip) - dst_mem_references = [op for op in line.semantic_operands["destination"] - if isinstance(op, MemoryOperand) and - (op.base is None or op.base.name != 'rip')] + dst_mem_references = [ + op + for op in line.semantic_operands["destination"] + if isinstance(op, MemoryOperand) + and (op.base is None or op.base.name != "rip") + ] if dst_mem_references: if not stores_only: stores_only = True mem_references = [] mem_references += dst_mem_references # If no destination references were found sofar, include source references if not stores_only: - mem_references += [op for op in line.semantic_operands["source"] - if isinstance(op, MemoryOperand)] - if re.match(r'^inc[bwlq]?$', line.instruction): + mem_references += [ + op + for op in line.semantic_operands["source"] + if isinstance(op, MemoryOperand) + ] + if re.match(r"^inc[bwlq]?$", line.instruction): reg = line.operands[0].name modified_registers.append(reg) increments[reg] = 1 - elif re.match(r'^add[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \ - and isinstance(line.operands[1], RegisterOperand): + elif ( + re.match(r"^add[bwlq]?$", line.instruction) + and isinstance(line.operands[0], ImmediateOperand) + and isinstance(line.operands[1], RegisterOperand) + ): reg = line.operands[1].name increments[reg] = int(line.operands[0].value) modified_registers.append(reg) - elif re.match(r'^dec[bwlq]?$', line.instruction): + elif re.match(r"^dec[bwlq]?$", line.instruction): reg = line.operands[0].name modified_registers.append(reg) increments[reg] = -1 - elif re.match(r'^sub[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \ - and isinstance(line.operands[1], RegisterOperand): + elif ( + re.match(r"^sub[bwlq]?$", line.instruction) + and isinstance(line.operands[0], ImmediateOperand) + and isinstance(line.operands[1], RegisterOperand) + ): reg = line.operands[1].name modified_registers.append(reg) increments[reg] = -int(line.operands[0].value) - elif re.match(r'^lea[bwlq]?$', line.instruction): + elif re.match(r"^lea[bwlq]?$", line.instruction): # `lea 1(%r11), %r11` is the same as `add $1, %r11` - if line.operands[0].base is not None and \ - line.operands[0].base.name == line.operands[1].name and \ - line.operands[0].index is None: + if ( + line.operands[0].base is not None + and line.operands[0].base.name == line.operands[1].name + and line.operands[0].index is None + ): reg = line.operands[1].name modified_registers.append(reg) - increments[reg] = int( - line.operands[0].offset.value) + increments[reg] = int(line.operands[0].offset.value) # `lea 1(,%r11), %r11` is the same as `add $1, %r11` - if line.operands[0].index is not None and \ - line.operands[0].index.name == line.operands[1].name and \ - line.operands[0].base is None: + if ( + line.operands[0].index is not None + and line.operands[0].index.name == line.operands[1].name + and line.operands[0].base is None + ): reg = line.operands[1].name modified_registers.append(reg) - increments[reg] = int( - line.operands[0].offset.value) + increments[reg] = int(line.operands[0].offset.value) # deduce loop increment from memory index register pointer_increment = None # default -> can not decide, let user choose possible_idx_regs = None if mem_references: # we found memory references to work with - possible_idx_regs = list(set(increments.keys()).intersection( - set([mref.base.name for mref in mem_references if mref.base is not None] + - [mref.index.name for mref in mem_references if mref.index is not None]))) + possible_idx_regs = list( + set(increments.keys()).intersection( + set( + [ + mref.base.name + for mref in mem_references + if mref.base is not None + ] + + [ + mref.index.name + for mref in mem_references + if mref.index is not None + ] + ) + ) + ) for mref in mem_references: for reg in list(possible_idx_regs): # Only consider references with two registers, where one could be an # index if None not in [mref.base, mref.index]: # One needs to mach, other registers will be excluded - if not ((mref.base is not None and reg == mref.base.name) or - (mref.index is not None and reg == mref.index.name)): + if not ( + (mref.base is not None and reg == mref.base.name) + or (mref.index is not None and reg == mref.index.name) + ): # reg can not be it possible_idx_regs.remove(reg) idx_reg = None if len(possible_idx_regs) == 1: # good, exactly one register was found idx_reg = possible_idx_regs[0] elif possible_idx_regs and itemsEqual( - [increments[pidxreg] for pidxreg in possible_idx_regs]): + [increments[pidxreg] for pidxreg in possible_idx_regs] + ): # multiple were option found, but all have the same increment # use first match: idx_reg = possible_idx_regs[0] if idx_reg and modified_registers.count(idx_reg) == 1: - mem_scales = [mref.scale for mref in mem_references - if (mref.index is not None and idx_reg == mref.index.name) or - (mref.base is not None and idx_reg == mref.base.name)] + mem_scales = [ + mref.scale + for mref in mem_references + if (mref.index is not None and idx_reg == mref.index.name) + or (mref.base is not None and idx_reg == mref.base.name) + ] if itemsEqual(mem_scales): # good, all scales are equal pointer_increment = mem_scales[0] * increments[idx_reg]

/home/runner/work/kerncraft/kerncraft/kerncraft/incore_model.py#L244

for line in block: # Skip non-instruction lines (e.g., comments) if line.instruction is None: continue # Counting basic arithmetic insstructions - if line.instruction in ['fmul', 'fdiv', 'fadd', 'fsub']: + if line.instruction in ["fmul", "fdiv", "fadd", "fsub"]: farithmetic_ctr += 1 - elif line.instruction in ['add', 'sub', 'mul']: + elif line.instruction in ["add", "sub", "mul"]: iarithmetic_ctr += 1 # Counting use of vector registers for op in line.operands: - if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in 'zv': + if ( + isinstance(op, RegisterOperand) + and op.prefix is not None + and op.prefix in "zv" + ): vector_ctr += 1 - #if isinstance(op, RegisterOperand) and 'range' in op.register and op.register.range[0].prefix in 'zv': + # if isinstance(op, RegisterOperand) and 'range' in op.register and op.register.range[0].prefix in 'zv': # vector_ctr += 1 # Count all instructions instruction_ctr += 1 # Build metric return (vector_ctr, farithmetic_ctr, iarithmetic_ctr, instruction_ctr) - + @staticmethod def normalize_to_register_str(register): if register is None: return None prefix = register.prefix - if prefix in 'wx': - prefix = 'x' + if prefix in "wx": + prefix = "x" return prefix + register.name @staticmethod def get_pointer_increment(block): """Return pointer increment."""

Run linters

The following actions uses node12 which is deprecated and will be forced to run on node16: actions/checkout@v2, actions/setup-python@v1, wearerequired/lint-action@v1. For more info: https://github.blog/changelog/2023-06-13-github-actions-all-actions-will-run-on-node16-instead-of-node12-by-default/

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merged master #98

Summary

Merged master #98

Jobs

Run details

lint.yml

Annotations