SMT-COMP · bobot · Jun 19, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 14, 2024
diff --git a/smtcomp/defs.py b/smtcomp/defs.py
@@ -43,6 +43,10 @@ def __int__(self) -> int:
     def of_int(cls, id: int) -> EnumAutoInt:
         return cls.__ordered__[id]
 
+    @classmethod
+    def name_of_int(cls, id: int) -> str:
+        return cls.__ordered__[id].name
+
     def __hash__(self) -> int:
         return self.id
 
@@ -1363,11 +1367,11 @@ class Config:
     """Prioritize triviality as much as possible for testing purpose.
         Look for simple problems instead of hard one"""
 
-    nyse_seed = None
+    nyse_seed = 17817
     """The integer part of one hundred times the opening value of the New York Stock Exchange Composite Index on the first day the exchange is open on or after the date specified in nyse_date"""
-    nyse_date = date(year=2024, month=6, day=10)
+    nyse_date = date(year=2024, month=6, day=17)
 
-    aws_timelimit_hard = 600
+    aws_timelimit_hard = 180
     """
     Time in seconds upon which benchmarks are considered hards
     """
@@ -1376,6 +1380,12 @@ class Config:
     Number of selected benchmarks
     """
 
+    removed_benchmarks = [{
+        "logic": int(Logic.QF_LIA),
+        "family": "20210219-Dartagnan/ConcurrencySafety-Main",
+        "name": "39_rand_lock_p0_vs-O0.smt2",
+    }]
+
     def __init__(self, data: Path | None) -> None:
         if data is not None and data.name != "data":
             raise ValueError("Consistency check, data directory must be named 'data'")

diff --git a/smtcomp/main.py b/smtcomp/main.py
@@ -33,7 +33,10 @@
 from rich.console import Console
 import smtcomp.test_solver as test_solver
 from concurrent.futures import ThreadPoolExecutor
-
+from smtcomp.benchexec import get_suffix
+from smtcomp.scramble_benchmarks import benchmark_files_dir
+from smtcomp.utils import *
+import re
 
 app = typer.Typer()
 
@@ -333,52 +336,114 @@ def show_sq_selection_stats(
         .sort(by="logic")
         .collect()
     )
-    table = Table(title="Statistics on the benchmark selection for single query")
-
-    table.add_column("Logic", justify="left", style="cyan", no_wrap=True)
-    table.add_column("trivial", justify="right", style="green")
-    table.add_column("not trivial", justify="right", style="orange_red1")
-    table.add_column("never compet.", justify="right", style="magenta")
-    table.add_column("new", justify="right", style="magenta1")
-    table.add_column("selected", justify="right", style="green3")
-    table.add_column("selected sat", justify="right", style="green4")
-    table.add_column("selected unsat", justify="right", style="green4")
-    table.add_column("selected already run", justify="right", style="green4")
 
     used_logics = defs.logic_used_for_track(defs.Track.SingleQuery)
-    for d in b3.to_dicts():
-        logic = defs.Logic.of_int(d["logic"])
+
+    def print_logic(id: int) -> str:
+        logic = defs.Logic.of_int(id)
         if logic in used_logics:
-            slogic = f"{str(logic)}"
+            return f"{str(logic)}"
         else:
-            slogic = f"[bold red]{str(logic)}[/bold red]"
-
-        table.add_row(
-            slogic,
-            str(d["trivial"]),
-            str(d["not_trivial"]),
-            str(d["old_never_ran"]),
-            str(d["new"]),
-            str(d["selected"]),
-            str(d["selected_sat"]),
-            str(d["selected_unsat"]),
-            str(d["selected_already_run"]),
+            return f"[bold red]{str(logic)}[/bold red]"
+
+    rich_print_pl(
+        "Statistics on the benchmark selection for single query",
+        b3,
+        Col("logic", "Logic", footer="Total", justify="left", style="cyan", no_wrap=True, custom=print_logic),
+        Col("trivial", "trivial", justify="right", style="green"),
+        Col("not_trivial", "not trivial", justify="right", style="orange_red1"),
+        Col("old_never_ran", "never compet.", justify="right", style="magenta"),
+        Col("new", "new", justify="right", style="magenta1"),
+        Col("selected", "selected", justify="right", style="green3"),
+        Col("selected_sat", "selected sat", justify="right", style="green4"),
+        Col("selected_unsat", "selected unsat", justify="right", style="green4"),
+        Col("selected_already_run", "selected already run", justify="right", style="green4"),
+    )
+
+
+@app.command(rich_help_panel=selection_panel)
+def show_aws_stats(
+    data: Path,
+) -> None:
+    """
+    Show statistics on the benchmarks selected for aws tracks
+    """
+    config = defs.Config(data)
+    benchmarks = smtcomp.selection.helper_aws_selection(config)
+    solvers = smtcomp.selection.solver_competing_logics(config).group_by(["track", "logic"]).agg("solver")
+    b3 = (
+        add_columns(
+            benchmarks.group_by(["track", "logic"]).agg(
+                n=pl.len(),
+                hard=(pl.col("hard") == True).sum(),
+                unsolved=(pl.col("unsolved") == True).sum(),
+                selected=(pl.col("selected") == True).sum(),
+                hard_selected=((pl.col("hard") == True) & (pl.col("selected") == True)).sum(),
+                unsolved_selected=((pl.col("unsolved") == True) & (pl.col("selected") == True)).sum(),
+            ),
+            solvers,
+            on=["track", "logic"],
+            defaults={"solver": []},
         )
+        .sort(by=["track", "logic"])
+        .collect()
+    )
 
-    table.add_section()
-    table.add_row(
-        "Total",
-        str(b3["trivial"].sum()),
-        str(b3["not_trivial"].sum()),
-        str(b3["old_never_ran"].sum()),
-        str(b3["new"].sum()),
-        str(b3["selected"].sum()),
-        str(b3["selected_sat"].sum()),
-        str(b3["selected_unsat"].sum()),
-        str(b3["selected_already_run"].sum()),
+    rich_print_pl(
+        "Statistics on the benchmark selection for AWS tracks",
+        b3,
+        Col(
+            "track",
+            "Track",
+            footer="Total",
+            justify="left",
+            style="cyan",
+            no_wrap=True,
+            custom=defs.Track.name_of_int,
+        ),
+        Col(
+            "logic",
+            "Logic",
+            footer="",
+            justify="left",
+            style="cyan",
+            no_wrap=True,
+            custom=defs.Logic.name_of_int,
+        ),
+        Col("n", "Benchmarks", justify="right", style="magenta"),
+        Col("hard", "Hard", justify="right", style="green"),
+        Col("unsolved", "Unsolved", justify="right", style="orange_red1"),
+        Col("hard_selected", "Sel. Hard", justify="right", style="green"),
+        Col("unsolved_selected", "Sel. Unsolved", justify="right", style="orange_red1"),
+        Col("selected", "Selected", justify="right", style="orange_red1"),
+        Col(
+            "solver",
+            "Solvers",
+            justify="left",
+            custom=lambda x: ", ".join(x),
+            footer=lambda df: str(
+                (df.select(n=pl.col("solver").list.len() * (pl.col("hard_selected") + pl.col("unsolved_selected"))))[
+                    "n"
+                ].sum()
+            ),
+        ),
     )
 
-    print(table)
+
+@app.command(rich_help_panel=selection_panel)
+def scramble_aws(
+    data: Path,
+    srcdir: Path,
+    dstdir: Path,
+    scrambler: Path,
+    max_workers: int = 8,
+) -> None:
+    """
+    Show statistics on the benchmarks selected for aws tracks
+    """
+    config = defs.Config(data)
+
+    smtcomp.scramble_benchmarks.select_and_scramble_aws(config, srcdir, dstdir, scrambler, max_workers)
 
 
 def print_iterable(i: int, tree: Tree, a: Any) -> None:
@@ -551,13 +616,35 @@ def read_submission(file: Path) -> defs.Submission:
 
 
 @app.command()
-def check_model_locally(cachedir: Path, resultdirs: list[Path], max_workers: int = 8) -> None:
-    l: list[tuple[results.RunId, results.Run, model_validation.ValidationResult]] = []
+def check_model_locally(
+    cachedir: Path, resultdirs: list[Path], max_workers: int = 8, outdir: Optional[Path] = None
+) -> None:
+    l: list[tuple[results.RunId, results.Run, model_validation.ValidationError]] = []
     with Progress() as progress:
         with ThreadPoolExecutor(max_workers) as executor:
             for resultdir in resultdirs:
                 l2 = model_validation.check_results_locally(cachedir, resultdir, executor, progress)
-                for rid, r, result in l2:
-                    if result.status != defs.Status.Sat:
-                        l.append((rid, r, result))
-    print(l)
+                l.extend(filter_map(map_none3(model_validation.is_error), l2))
+    keyfunc = lambda v: v[0].solver
+    l.sort(key=keyfunc)
+    d = itertools.groupby(l, key=keyfunc)
+    t = Tree("Unvalidated models")
+    for solver, rs in d:
+        t2 = t.add(solver)
+        for rid, r, result in rs:
+            stderr = result.stderr.strip().replace("\n", ", ")
+            t2.add(f"{r.basename}: {stderr}")
+    print(t)
+    if outdir is not None:
+        for solver, models in d:
+            dst = outdir / solver
+            dst.mkdir(parents=True, exist_ok=True)
+            for rid, r, result in models:
+                filedir = benchmark_files_dir(cachedir, rid.track)
+                logic = rid.includefile.removesuffix(get_suffix(rid.track))
+                basename = r.basename.removesuffix(".yml") + ".smt2"
+                basename_model = r.basename.removesuffix(".yml") + ".rsmt2"
+                smt2_file = filedir / logic / basename
+                (dst / basename).unlink(missing_ok=True)
+                (dst / basename).symlink_to(smt2_file)
+                (dst / basename_model).write_text(result.model)
diff --git a/smtcomp/model_validation.py b/smtcomp/model_validation.py
@@ -33,12 +33,36 @@
 
 
 @dataclass
-class ValidationResult:
+class ValidationOk:
+    stderr: str
+
+
+@dataclass
+class ValidationError:
     status: defs.Status
     stderr: str
+    model: str | None
+
+
+@dataclass
+class NoValidation:
+    """No validation possible"""
+
+
+noValidation = NoValidation()
+
+Validation = ValidationOk | ValidationError | NoValidation
+
+
+def is_error(x: Validation) -> ValidationError | None:
+    match x:
+        case ValidationError(_):
+            return x
+        case ValidationOk(_) | NoValidation():
+            return None
 
 
-def check_locally(smt2_file: Path, model: str) -> ValidationResult:
+def check_locally(smt2_file: Path, model: str) -> Validation:
     r = subprocess.run(
         [
             "dolmen",
@@ -54,39 +78,37 @@ def check_locally(smt2_file: Path, model: str) -> ValidationResult:
     )
     match r.returncode:
         case 0:
-            status = defs.Status.Sat
+            return ValidationOk(r.stderr.decode())
         case 5:
             status = defs.Status.Unsat
         case 2:
             # LimitReached
             status = defs.Status.Unknown
         case _:
             status = defs.Status.Unknown
-    return ValidationResult(status, r.stderr.decode())
+    return ValidationError(status, r.stderr.decode(), model)
 
 
-def check_result_locally(
-    cachedir: Path, logfiles: results.LogFile, rid: results.RunId, r: results.Run
-) -> ValidationResult:
+def check_result_locally(cachedir: Path, logfiles: results.LogFile, rid: results.RunId, r: results.Run) -> Validation:
     if r.status == "true":
         filedir = benchmark_files_dir(cachedir, rid.track)
         logic = rid.includefile.removesuffix(get_suffix(rid.track))
         smt2_file = filedir / logic / (r.basename.removesuffix(".yml") + ".smt2")
         model = logfiles.get_output(rid, r.basename)
         return check_locally(smt2_file, model)
     else:
-        return ValidationResult(defs.Status.Unknown, "")
+        return noValidation
 
 
 def check_results_locally(
     cachedir: Path, resultdir: Path, executor: ThreadPoolExecutor, progress: Progress
-) -> list[tuple[results.RunId, results.Run, ValidationResult]]:
+) -> list[tuple[results.RunId, results.Run, Validation]]:
     with results.LogFile(resultdir) as logfiles:
         l = [(r.runid, b) for r in results.parse_results(resultdir) for b in r.runs if b.status == "true"]
         return list(
             progress.track(
                 executor.map((lambda v: (v[0], v[1], check_result_locally(cachedir, logfiles, v[0], v[1]))), l),
                 total=len(l),
-                description="checking models",
+                description=f"checking models for {resultdir.name}",
             )
         )