Add selection logic for sq

- Add benchmarks file for 2024 - incremental second version - non-incremental first version - generate a cache to speed up operations - use polars
SMT-COMP · May 16, 2024 · 1f33595 · 1f33595
1 parent a9d60c2
commit 1f33595
Show file tree

Hide file tree

Showing 20 changed files with 1,159 additions and 281 deletions.
diff --git a/.gitignore b/.gitignore
@@ -170,3 +170,4 @@ cython_debug/
 /submission-schema.html
 schema_doc.css
 schema_doc.min.js
+*.feather
diff --git a/README.md b/README.md
@@ -17,18 +17,85 @@ Tools used for the organization of the SMT competition
 git clone git@github.com:smtcomp/smtcomp.github.io.git
 ```
 
-Finally, install the environment and the pre-commit hooks with
+Finally, install the environment with
 
 ```bash
 make install
 ```
 
-You are now ready to start development on your project!
-The CI/CD pipeline will be triggered when you open a pull request, merge to main, or when you create a new release.
+## For starting a new SMT-COMP year
 
-To finalize the set-up for publishing to PyPi or Artifactory, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/publishing/#set-up-for-pypi).
-For activating the automatic documentation with MkDocs, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/mkdocs/#enabling-the-documentation-on-github).
-To enable the code coverage reports, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/codecov/).
+Edit the file `smtcomp/defs.py`, in particular `Config.current_year`, `Logic` for adding new logics and `tracks` for new divisions.
+
+Download the new benchmarks from zenodo, unpack them, unpack the .tar.zst, you should get something like:
+
+```
+$DIR/zenodo
+├── incremental
+│   ├── ABVFPLRA
+│   ├── ALIA
+│   ...
+│   ├── UFNIA
+│   └── UFNRA
+└── non-incremental
+    ├── ABV
+    ├── ABVFP
+    ├── ABVFPLRA
+    ├── ALIA
+    ├── ANIA
+    ├── AUFBV
+    ...
+    ├── UFFPDTNIRA
+    ├── UFIDL
+    ├── UFLIA
+    ├── UFLRA
+    ├── UFNIA
+    └── UFNIRA
+```
+
+Then you can run (very io intensive):
+
+```
+smtcomp $DIR/zenodo ./data/
+```
+
+The directory `./data/` is the one present in this repository
+
+## Using the smtcomp tool for selecting the benchmarks
+
+The list of benchmarks and the previous results are in json which are human
+readable, but slow to parse (1min). So locally the tool use the feather format. The
+feather files are generated with:
+
+```
+smtcomp create-cache ./data/
+```
+
+Working with the feather files with [polars](https://docs.pola.rs/) is very fast,
+so no more intermediate files are needed.
+
+However statistics can be shown, for example for the selection of single track:
+
+```
+smtcomp show-sq-selection-stats ./data/ 0
+```
+
+Which outputs:
+
+```
+          Statistics on the benchmark selection for single query
+┏━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━┓
+┃ Logic         ┃ trivial ┃ not trivial ┃ old never ran ┃  new ┃ selected ┃
+┡━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━━━━┩
+│ ABV           │       0 │        2573 │          2402 │    0 │     2487 │
+│ ABVFP         │       0 │          60 │             0 │    0 │       60 │
+│ ABVFPLRA      │       0 │          77 │             0 │    0 │       77 │
+│ ALIA          │      23 │        1545 │          1530 │    0 │     1537 │
+│ ANIA          │       0 │          56 │             0 │   22 │       78 │
+│ AUFBV         │       0 │        1333 │           190 │    0 │      761 │
+│ AUFBVDTLIA    │     115 │        1434 │           134 │    0 │      784 │
+...
+```
 
 ## Using the smtcomp tool for generating benchexec
 

diff --git a/data/benchmarks-2024.json.gz b/data/benchmarks-2024.json.gz
diff --git a/data/results-sq-2018.json.gz b/data/results-sq-2018.json.gz
diff --git a/data/results-sq-2019.json.gz b/data/results-sq-2019.json.gz
diff --git a/data/results-sq-2020.json.gz b/data/results-sq-2020.json.gz
diff --git a/data/results-sq-2021.json.gz b/data/results-sq-2021.json.gz
diff --git a/data/results-sq-2022.json.gz b/data/results-sq-2022.json.gz
diff --git a/data/results-sq-2023.json.gz b/data/results-sq-2023.json.gz
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,7 +17,7 @@ smtcomp = "smtcomp.main:app"
 python = ">=3.11,<4.0"
 typer = {extras = ["all"], version = "^0.9.0"}
 rich = "^13.7.0"
-pydantic = "^2.5.0"
+pydantic = "^2.7.0"
 email-validator = "^2.1.0"
 python-gitlab = "*"
 gitpython = "*"
@@ -27,7 +27,9 @@ option = "*"
 requests = "*"
 bs4 = "*"
 benchexec = "*"
-pystemd = "*"
+polars = "*"
+#pystemd is not set as dependency because it is not in the CI
+
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.2.0"
@@ -36,6 +38,7 @@ deptry = "^0.12.0"
 mypy = "^1.5.1"
 pre-commit = "^3.4.0"
 tox = "^4.11.1"
+types-requests = "*"
 
 
 

diff --git a/smtcomp/benchexec.py b/smtcomp/benchexec.py
@@ -75,7 +75,7 @@ def cmdtask_for_submission(s: defs.Submission, cachedir: Path) -> List[CmdTask]:
                     continue
             tasks: list[str] = []
             for _, logics in divisions.items():
-                tasks.extend([logic + suffix for logic in logics])
+                tasks.extend([str(logic) + suffix for logic in logics])
             if tasks:
                 executable_path = find_command(command, archive, cachedir)
                 executable = str(relpath(executable_path, start=str(cachedir)))

diff --git a/smtcomp/convert_csv.py b/smtcomp/convert_csv.py
@@ -11,6 +11,7 @@
 from rich.progress import track
 
 import smtcomp.defs as defs
+from smtcomp.list_benchmarks import get_smt2_file
 
 
 class CsvColumn(str, Enum):
@@ -175,3 +176,38 @@ def convert_csv(file: Path, dstdir: Path) -> None:
                 submission = convert_row(row, dstdir)
                 with open(Path.joinpath(dstdir, submission.name + ".json"), "w") as f:
                     f.write(submission.model_dump_json())
+
+
+def convert_csv_result(file: Path, track: defs.Track) -> defs.Results:
+    # Some csv benchmark name start with track_.../QF..
+    with open(file) as dcsv:
+        results = csv.DictReader(dcsv)
+        compatibility = next(results)["benchmark"].startswith("track_")
+
+    with open(file) as dcsv:
+        results = csv.DictReader(dcsv)
+        l: list[defs.Result] = []
+        for row in results:
+            if row["result"] == "starexec-unknown":
+                row["result"] = "unknown"
+            elif row["result"] == "--":
+                row["result"] = "unknown"
+            row["benchmark"] = row["benchmark"].replace("UFFPDTLIRA", "UFFPDTNIRA")
+
+            smt2file = get_smt2_file(
+                src=None, file=Path(row["benchmark"].strip()), incremental=track is defs.Track.Incremental
+            )
+
+            l.append(
+                defs.Result(
+                    track=track,
+                    file=smt2file,
+                    solver="{}_{}".format(row["solver"].strip(), row["configuration"].strip()),
+                    cpu_time=float(row["cpu time"]),
+                    wallclock_time=float(row["wallclock time"]),
+                    memory_usage=float(row["memory usage"]),
+                    result=defs.Status(row["result"]),
+                )
+            )
+
+    return defs.Results(results=l)