diff --git a/.gitignore b/.gitignore index 2faf325a44c..9aeb9c57de8 100644 --- a/.gitignore +++ b/.gitignore @@ -118,6 +118,9 @@ Untitled*.ipynb # GitLab CI generated files gitlab-build-docker.log +# airspeed velocity +/.asv + # Byte-compiled / optimized / DLL files __pycache__/ **/__pycache__ diff --git a/asv.conf.json b/asv.conf.json new file mode 100644 index 00000000000..abf30504242 --- /dev/null +++ b/asv.conf.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "project": "sagemath", + "project_url": "https://sagemath.org", + "repo": ".", + "environment_type": "conda", + "matrix": {}, + "conda_channels": ["conda-forge", "default"], + "build_command": [ + "echo 'Rebuilding sage for benchmarks is not supported yet. Use `asv run --environment existing --quick` in an environment with a built SageMath instead. Add --set-commit-hash `git rev-parse HEAD` to store timings in the database in .asv/. Note that o actually make this work, the entire installation workflow with conda would need to be added to asv.conf.json and the matrix there populated.'" + ], + "install_command": [], + "uninstall_command": [], + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "benchmark_dir": "src/sage/benchmark/" +} diff --git a/src/bin/sage-runtests b/src/bin/sage-runtests index ed1e56953ff..2881df8eeeb 100755 --- a/src/bin/sage-runtests +++ b/src/bin/sage-runtests @@ -97,6 +97,8 @@ if __name__ == "__main__": help="path to a json dictionary for timings and failure status for each file from previous runs; it will be updated in this run") parser.add_argument("--baseline_stats_path", "--baseline-stats-path", default=None, help="path to a json dictionary for timings and failure status for each file, to be used as a baseline; it will not be updated") + parser.add_argument("--asv_stats_path", "--asv-stats-path", default=None, + help="path to a json dictionary for storing individual doctest timings for use with airspeed velocity speed regression testing") class GCAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): diff --git a/src/doc/en/developer/doctesting.rst b/src/doc/en/developer/doctesting.rst index 9619460c30f..a8e79f98b9f 100644 --- a/src/doc/en/developer/doctesting.rst +++ b/src/doc/en/developer/doctesting.rst @@ -1338,3 +1338,7 @@ utilized most efficiently):: Sorting sources by runtime so that slower doctests are run first.... Doctesting 2067 files using 2 threads. ... + +To give a json file for recording the timings for each doctest, use the +``--asv_stats_path`` flag. These statistics can be collected by the airspeed +velocity code to generate interactive speed regression webpages. diff --git a/src/sage/benchmark/__init__.py b/src/sage/benchmark/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/sage/benchmark/docstring.py b/src/sage/benchmark/docstring.py new file mode 100644 index 00000000000..5c873d9faa2 --- /dev/null +++ b/src/sage/benchmark/docstring.py @@ -0,0 +1,175 @@ +r""" +Benchmarks for SageMath + +This module searches for doctest timings created with ``sage -t +--asv_stats_path=$SAGE_ASV_STATS`` if the ``$SAGE_ASV_STATS`` environment +variable is set. For each timing, it dynamically creates a method in a class in +this module to report that timing to airspeed velocity as configured in +``asv.conf.json``. + +EXAMPLES: + +Since this variable is usually not set, this module does nothing:: + + sage: import sage.benchmark + sage: dir(sage.benchmark) + ['__builtins__', + '__cached__', + '__doc__', + '__file__', + '__loader__', + '__name__', + '__package__', + '__path__', + '__spec__', + 'create_benchmark_class', + 'create_track_method', + 'create_trackers_from_doctest_stats', + 'json', + 'os'] + +""" +# **************************************************************************** +# Copyright (C) 2023 Julian Rueth +# +# Distributed under the terms of the GNU General Public License (GPL) +# as published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# https://www.gnu.org/licenses/ +# **************************************************************************** + +import json +import os + +# TODO: Explain here what this is for and put the replacement logic in a central space +# TODO: Can we load a plugin that undoes this replacement on the client? +DOT = "․" + + +def create_trackers_from_doctest_stats(): + r""" + Create asv-compatible benchmark classes that contain a `track_…` method + for each doctest block reported in the log produced by ``sage -t + --asv_stats_path=$SAGE_ASV_STATS`` + + We export these wrappers in this module. These get picked up by asv which + will then run these as "benchmarks". Instead of running the doctests to + benchmark them, we are just reporting the timings that we collected in a + previous doctest run instead. + + EXAMPLES: + + When ``SAGE_ASV_STATS`` is not set, this creates an empty dict:: + + sage: from sage.benchmark import create_trackers_from_doctest_stats + sage: create_trackers_from_doctest_stats() + {} + + """ + stats_file = os.environ.get("SAGE_ASV_STATS", None) + if not stats_file: + return {} + + runs_by_module = json.load(open(stats_file)) + + return { + module.replace(".", DOT): create_benchmark_class(module, runs) + for (module, runs) in runs_by_module.items() + } + + +def create_benchmark_class(name, runs): + r""" + Return an ASV compatible benchmark class called `name` that pretends to + perform the ``runs`` representing all the doctests of a module. + + EXAMPLES:: + + sage: from sage.benchmark import create_benchmark_class + sage: benchmark = create_benchmark_class("sage.rings.padics.padic_generic", [ + ....: ["", [], "pass", 1.337], + ....: ["pAdicGeneric.some_elements", ["long", "meataxe"], "pass\npass", 13.37] + ....: ]) + sage: dir(benchmark) + [... + 'track_[sage.rings.padics.padic_generic]', + 'track_pAdicGeneric.some_elements[long,meataxe]'] + + """ + class Benchmark: + pass + + # asv groups entries by top-level package name, so everything goes into "sage". + # To break this logic, we replace "." with a One Dot Leader character that + # looks similar. + Benchmark.__name__ = name.replace(".", DOT) + + for identifier, flags, source, time in runs: + method = "track_" + (identifier or f"[{name}]").replace(".", DOT) + if flags: + method += "[" + ",".join(flags) + "]" + + if hasattr(Benchmark, method): + # raise NotImplementedError(f"cannot merge duplicate results for {identifier} with {flags}") + print("duplicate results") + continue + + setattr(Benchmark, method, create_track_method(name, identifier, flags, source, time)) + + return Benchmark + + +def create_track_method(module, identifier, flags, source, time): + r""" + Return a function that can be added as a method of a benchmark class. + + The method returns tracking results that will be picked up by asv as timing + benchmark results. + + EXAMPLES:: + + sage: from sage.benchmark import create_track_method + sage: method = create_track_method("sage.rings.padics.padic_generic", "", [], "pass", 1.337) + sage: method.pretty_name + '[sage.rings.padics.padic_generic]' + sage: method.pretty_source + 'pass' + sage: method(None) + 1337.00000000000 + + :: + + sage: method = create_track_method("sage.rings.padics.padic_generic", "pAdicGeneric.some_elements", ["long", "meataxe"], "pass\npass", 13.37) + sage: method.pretty_name + 'pAdicGeneric.some_elements' + sage: print(method.pretty_source) + # with long, meataxe + pass + pass + sage: method(None) + 13370.0000000000 + + """ + def method(self): + return time * 1e3 + + method.unit = "ms" + + # We do not need to run this "benchmark" more than once; we are just + # reporting the same static data every time. + method.repeat = 1 + method.number = 1 + method.min_run_count = 1 + + # The doctest of a module itself has no identifier, so we write the module name instead. + method.pretty_name = identifier or f"[{module}]" + + method.pretty_source=source + if flags: + method.pretty_source = f"# with {', '.join(flags)}\n" + method.pretty_source + + return method + + +locals().update(create_trackers_from_doctest_stats()) diff --git a/src/sage/doctest/control.py b/src/sage/doctest/control.py index aa1268a6a27..4fc6084a96f 100644 --- a/src/sage/doctest/control.py +++ b/src/sage/doctest/control.py @@ -130,6 +130,8 @@ def __init__(self, **kwds): self.show_skipped = False self.target_walltime = -1 self.baseline_stats_path = None + self.asv_stats_path = None + self.use_asv = False # sage-runtests contains more optional tags. Technically, adding # auto_optional_tags here is redundant, since that is added @@ -399,6 +401,7 @@ def __init__(self, options, args): options.nthreads = 1 if options.verbose: options.show_skipped = True + options.use_asv = (options.asv_stats_path is not None) options.disabled_optional = set() if isinstance(options.optional, str): @@ -478,6 +481,7 @@ def __init__(self, options, args): self.baseline_stats = {} if options.baseline_stats_path: self.load_baseline_stats(options.baseline_stats_path) + self.asv_stats = {} self._init_warn_long() if self.options.random_seed is None: @@ -692,6 +696,30 @@ def save_stats(self, filename): with atomic_write(filename) as stats_file: json.dump(self.stats, stats_file) + def save_asv_stats(self, filename): + """ + Save individual doctest stats from the most recent run as a JSON file, + for use in speed regression testing. + + WARNING: This function overwrites the file. + + EXAMPLES:: + + sage: from sage.doctest.control import DocTestDefaults, DocTestController + sage: DC = DocTestController(DocTestDefaults(), []) + sage: DC.asv_stats['sage.doctest.control'] = {'walltime':1.0r} + sage: filename = tmp_filename() + sage: DC.save_avs_stats(filename) + sage: import json + sage: with open(filename) as f: + ....: D = json.load(f) + sage: D['sage.doctest.control'] + {'walltime': 1.0} + """ + from sage.misc.temporary_file import atomic_write + with atomic_write(filename) as stats_file: + json.dump(self.asv_stats, stats_file) + def log(self, s, end="\n"): """ Log the string ``s + end`` (where ``end`` is a newline by default) @@ -1110,6 +1138,9 @@ def cleanup(self, final=True): """ self.stats.update(self.reporter.stats) self.save_stats(self.options.stats_path) + if self.options.asv_stats_path: + self.asv_stats.update(self.reporter.asv_stats) + self.save_asv_stats(self.options.asv_stats_path) # Close the logfile if final and self.logfile is not None: self.logfile.close() diff --git a/src/sage/doctest/forker.py b/src/sage/doctest/forker.py index fe58e2bde3e..1252b97e7c1 100644 --- a/src/sage/doctest/forker.py +++ b/src/sage/doctest/forker.py @@ -505,6 +505,8 @@ def __init__(self, *args, **kwds): - ``verbose`` -- boolean, determines whether verbose printing is enabled. + - ``sage_options`` -- dictionary with Sage options + - ``optionflags`` -- Controls the comparison with the expected output. See :mod:`testmod` for more information. @@ -790,6 +792,13 @@ def compiler(example): # Restore the option flags (in case they were modified) self.optionflags = original_optionflags + if self.options.use_asv: + if failures == 0: + # We record the timing for this test for speed regression purposes + test.walltime = sum(example.walltime for example in test.examples) + else: + test.walltime = -1 + # Record and return the number of failures and tries. self._DocTestRunner__record_outcome(test, failures, tries) self.total_walltime_skips += walltime_skips @@ -1533,14 +1542,16 @@ def report_unexpected_exception(self, out, test, example, exc_info): self._fakeout.start_spoofing() return returnval - def update_results(self, D): + def update_results(self, D, doctests): """ When returning results we pick out the results of interest since many attributes are not pickleable. INPUT: - - ``D`` -- a dictionary to update with cputime and walltime + - ``D`` -- a dictionary to update with cputime and walltime (and asv_results if that option set) + + - ``doctests`` -- a list of doctests OUTPUT: @@ -1564,7 +1575,7 @@ def update_results(self, D): TestResults(failed=0, attempted=4) sage: T.stop().annotate(DTR) sage: D = DictAsObject({'cputime':[],'walltime':[],'err':None}) - sage: DTR.update_results(D) + sage: DTR.update_results(D, doctests) 0 sage: sorted(list(D.items())) [('cputime', [...]), ('err', None), ('failures', 0), ('tests', 4), ('walltime', [...]), ('walltime_skips', 0)] @@ -1576,6 +1587,37 @@ def update_results(self, D): D[key].append(self.__dict__[key]) D['tests'] = self.total_performed_tests D['walltime_skips'] = self.total_walltime_skips + if self.options.use_asv: + + def asv_label(test): + """ + This information is hashed by asv to determine continuity for speed regressions + """ + name = test.name + code = "".join([example.sage_source for example in test.examples]) + opts = set() + for example in test.examples: + # The added sig_on_count() test has no tags + if hasattr(example, "optional_tags"): + opts = opts.union(example.optional_tags) + olist = [] + if "long" in opts: + olist = ["long"] + opts.remove("long") + olist.extend(sorted(opts)) + + import tempfile + tmp, fname = tempfile.mkstemp() + import os + tmp = os.fdopen(tmp, 'w') + tmp.write(f"{name}|{','.join(olist)}|{code}") + tmp.close() + return fname + + D['asv_times'] = {} + for test in doctests: + if test.walltime > 0: # -1 indicates a failure, 0 means nothing was actually run + D['asv_times'][asv_label(test)] = test.walltime if hasattr(self, 'failures'): D['failures'] = self.failures return self.failures @@ -2389,8 +2431,9 @@ class DocTestTask(): sage: sorted(results.keys()) ['cputime', 'err', 'failures', 'optionals', 'tests', 'walltime', 'walltime_skips'] """ - extra_globals = {} + + runner = SageDocTestRunner """ Extra objects to place in the global namespace in which tests are run. Normally this should be empty but there are special cases where it may @@ -2467,7 +2510,7 @@ def __call__(self, options, outtmpfile=None, msgfile=None, result_queue=None): """ result = None try: - runner = SageDocTestRunner( + runner = DocTestTask.runner( SageOutputChecker(), verbose=options.verbose, outtmpfile=outtmpfile, @@ -2487,7 +2530,7 @@ def __call__(self, options, outtmpfile=None, msgfile=None, result_queue=None): for it in range(N): doctests, extras = self._run(runner, options, results) runner.summarize(options.verbose) - if runner.update_results(results): + if runner.update_results(results, doctests): break if extras['tab']: diff --git a/src/sage/doctest/parsing.py b/src/sage/doctest/parsing.py index 26d793b96bf..e4cc04f4b21 100644 --- a/src/sage/doctest/parsing.py +++ b/src/sage/doctest/parsing.py @@ -633,6 +633,7 @@ def parse(self, string, *args): for item in res: if isinstance(item, doctest.Example): optional_tags = parse_optional_tags(item.source) + item.optional_tags = optional_tags # for asv if optional_tags: for tag in optional_tags: self.optionals[tag] += 1 diff --git a/src/sage/doctest/reporting.py b/src/sage/doctest/reporting.py index 51c6ea55626..5f5a43036d1 100644 --- a/src/sage/doctest/reporting.py +++ b/src/sage/doctest/reporting.py @@ -113,6 +113,7 @@ def __init__(self, controller): self.postscript = dict(lines=[], cputime=0, walltime=0) self.sources_completed = 0 self.stats = {} + self.asv_stats = {} self.error_status = 0 def were_doctests_with_optional_tag_run(self, tag): @@ -381,6 +382,7 @@ def report(self, source, timeout, return_code, results, output, pid=None): postscript = self.postscript stats = self.stats basename = source.basename + options = self.controller.options if self.controller.baseline_stats: the_baseline_stats = self.controller.baseline_stats.get(basename, {}) else: @@ -500,6 +502,21 @@ def report(self, source, timeout, return_code, results, output, pid=None): stats[basename] = dict(failed=True, walltime=wall) else: stats[basename] = dict(walltime=wall) + if options.use_asv: + asv = [] + for fname, time in result_dict.asv_times.items(): + with open(fname) as label: + name, opts, code = label.read().split("|", 2) + if name.startswith(basename): + name = name[len(basename):] + if name.startswith("."): + name = name[1:] + if opts: + opts = opts.split(",") + else: + opts = [] + asv.append([name, opts, code, time]) + self.asv_stats[basename] = asv postscript['cputime'] += cpu postscript['walltime'] += wall @@ -510,29 +527,29 @@ def report(self, source, timeout, return_code, results, output, pid=None): for tag in sorted(optionals): nskipped = optionals[tag] if tag == "long time": - if not self.controller.options.long: - if self.controller.options.show_skipped: + if not options.long: + if options.show_skipped: log(" %s not run"%(count_noun(nskipped, "long test"))) elif tag == "not tested": - if self.controller.options.show_skipped: + if options.show_skipped: log(" %s not run"%(count_noun(nskipped, "not tested test"))) elif tag == "not implemented": - if self.controller.options.show_skipped: + if options.show_skipped: log(" %s for not implemented functionality not run"%(count_noun(nskipped, "test"))) else: if not self.were_doctests_with_optional_tag_run(tag): if tag == "bug": - if self.controller.options.show_skipped: + if options.show_skipped: log(" %s not run due to known bugs"%(count_noun(nskipped, "test"))) elif tag == "": - if self.controller.options.show_skipped: + if options.show_skipped: log(" %s not run"%(count_noun(nskipped, "unlabeled test"))) else: - if self.controller.options.show_skipped: + if options.show_skipped: log(" %s not run"%(count_noun(nskipped, tag + " test"))) nskipped = result_dict.walltime_skips - if self.controller.options.show_skipped: + if options.show_skipped: log(" %s not run because we ran out of time"%(count_noun(nskipped, "test"))) if nskipped != 0: @@ -547,7 +564,7 @@ def report(self, source, timeout, return_code, results, output, pid=None): total = "%d%% of tests run"%(round(100*ntests_run/float(ntests_run + nskipped))) else: total = count_noun(ntests, "test") - if not (self.controller.options.only_errors and not f): + if not (options.only_errors and not f): log(" [%s, %s%.2f s]" % (total, "%s, "%(count_noun(f, "failure")) if f else "", wall)) self.sources_completed += 1 diff --git a/src/sage/doctest/sources.py b/src/sage/doctest/sources.py index 6321e73a5e8..573bb2a491e 100644 --- a/src/sage/doctest/sources.py +++ b/src/sage/doctest/sources.py @@ -35,7 +35,8 @@ # Python file parsing triple_quotes = re.compile(r"\s*[rRuU]*((''')|(\"\"\"))") -name_regex = re.compile(r".*\s(\w+)([(].*)?:") +paren_name_regex = re.compile(r".*\s(\w+)\s*[(].*:") +noparen_name_regex = re.compile(r".*\s(\w+)\s*:") # LaTeX file parsing begin_verb = re.compile(r"\s*\\begin{verbatim}") @@ -1100,14 +1101,19 @@ def starting_docstring(self, line): # quotematch wasn't None, but then we mishandle classes # that don't have a docstring. if not self.code_wrapping and self.last_indent >= 0 and indent > self.last_indent: - name = name_regex.match(self.last_line) + if "(" in self.last_line: + name = paren_name_regex.match(self.last_line.replace("\n", "")) + else: + name = noparen_name_regex.match(self.last_line.replace("\n", "")) if name: name = name.groups()[0] self.qualified_name[indent] = name elif quotematch: self.qualified_name[indent] = '?' self._update_quotetype(line) - if line[indent] != '#' and not self.code_wrapping: + if self.code_wrapping: + self.last_line += line + elif line[indent] != '#': self.last_line, self.last_indent = line, indent self.code_wrapping = not (self.paren_count == self.bracket_count == self.curly_count == 0) return quotematch