Wip

Luni-4 · Luni-4 · commit be0c6a27c7b7 · 2021-04-19T17:50:38.000+02:00
diff --git a/split-minimal-tests.py b/split-minimal-tests.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+
+"""split-minimal-tests
+This script splits HTML minimal-tests, produced by a software called
+`json-minimal-tests`, into distinct directories depending on metric differences.
+
+Usage:
+
+./split-minimal-tests.py -i INPUT_DIR -o OUTPUT_DIR_NAME [-t MT_THRESHOLD]
+
+NOTE: OUTPUT_DIR_NAME is the name of the output directory.
+This directory could contain either a series of directories, called as
+the metrics that presents differences, or be empty if no metric differences
+are found.
+MT_THRESHOLD determines the maximum number of considered minimal tests
+for a metric.
+"""
+
+import argparse
+import itertools
+import pathlib
+import re
+import shutil
+import typing as T
+
+# List of metrics
+# TODO: Implement a command into rust-code-analysis-cli that returns all
+# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478
+METRICS = [
+    "cognitive",
+    "sloc",
+    "ploc",
+    "lloc",
+    "cloc",
+    "blank",
+    "cyclomatic",
+    "halstead",
+    "nom",
+    "nexits",
+    "nargs",
+]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        prog="split-minimal-tests",
+        description="This tool splits HTML minimal-tests, produced by "
+        "a software called `json-minimal-tests`, into distinct directories "
+        "depending on metric differences.",
+        epilog="The source code of this program can be found on "
+        "GitHub at https://github.com/mozilla/rust-code-analysis",
+    )
+
+    # Arguments
+    parser.add_argument(
+        "--input",
+        "-i",
+        type=lambda value: pathlib.Path(value),
+        required=True,
+        help="Input directory containing HTML minimal tests.",
+    )
+
+    parser.add_argument(
+        "--output-name",
+        "-o",
+        type=str,
+        required=True,
+        help="Name of the output directory.",
+    )
+
+    # Optional arguments
+    parser.add_argument(
+        "--threshold",
+        "-t",
+        type=int,
+        help="Maximum number of considered minimal tests for a metric.",
+    )
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Create output directory
+    output_dir = pathlib.Path(args.output_name)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Save files associated to each metric
+    metrics_saver = {metric_name: [] for metric_name in METRICS}
+
+    # Iterate over the files contained in the input directory
+    for path in args.input.glob("*.html"):
+        # Open a file
+        with open(path) as f:
+            # Read a file
+            file_str = f.read()
+
+            # Iterate over metrics
+            for metric_name, metric_files in metrics_saver.items():
+                # Regular expressions that match only when a metric is
+                # outside the <pre></pre> tags
+                re_expr = f"(\.{metric_name})|<pre>(?:(?!<pre\s?>).)*"
+
+                # Remove newlines to match tags
+                if re.search(re_expr, file_str.replace("\n", "")).group(1):
+                    metric_files.append(path)
+
+    # Iterate over metrics to print them
+    for metric_name, metric_files in metrics_saver.items():
+        # Create path for metric directory
+        metric_path = output_dir / metric_name
+
+        if metric_files:
+            # Create metric directory
+            metric_path.mkdir(parents=True, exist_ok=True)
+
+            # Save the number of files specified in the threshold
+            output_paths = (
+                metric_files[: args.threshold] if args.threshold else metric_files
+            )
+
+            for path in output_paths:
+                # Copy files in the directory
+                shutil.copy(path, metric_path)
+
+
+if __name__ == "__main__":
+    main()