Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Download, unpack solver and test solver on dumb test file #58

Merged
merged 2 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/workflows/test-solver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: TestSolver

on:
pull_request:
paths:
- submissions/*.json

jobs:
generate-test-script:
runs-on: ubuntu-latest
steps:
- name: Check out
uses: actions/checkout@v3

- uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}

- name: Set up the environment
uses: ./.github/actions/setup-poetry-env

- name: Compute changed files
id: changed-files
uses: tj-actions/changed-files@v41
with:
files: |
submissions/*.json

- name: Generate script
run: |
poetry run smtcomp generate-test-script test_data/ ${{ steps.changed-files.outputs.all_changed_files }}
rm -rf test_data/download

- name: Archive directory
run: tar -cf test_data.tar test_data

- name: Upload generated test script
uses: actions/upload-artifact@v4
with:
name: generated_script
path: test_data.tar

run-test-script:
needs: generate-test-script
runs-on: ubuntu-latest
container: registry.gitlab.com/sosy-lab/benchmarking/competition-scripts/user:latest
steps:
- name: Download generated test script
uses: actions/download-artifact@v4
with:
name: generated_script

- name: Unarchive
run: tar -xf test_data.tar

- name: Run test script
run: python3 test_data/test_script.py
37 changes: 32 additions & 5 deletions smtcomp/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
from enum import Enum
from pathlib import Path, PurePath
from typing import Any, Dict, Optional
from typing import Any, Dict, cast, Optional

from pydantic import BaseModel, Field, RootModel, model_validator, ConfigDict
from pydantic.networks import HttpUrl, validate_email
Expand Down Expand Up @@ -1159,6 +1159,15 @@ def uniq_id(self, name: str, archive: Archive) -> str:
return h.hexdigest()


class ParticipationCompleted(BaseModel, extra="forbid"):
"""Participation using the default value in the submission root"""

tracks: dict[Track, dict[Division, set[Logic]]]
archive: Archive
command: Command
experimental: bool


class Participation(BaseModel, extra="forbid"):
"""
tracks: select the participation tracks
Expand Down Expand Up @@ -1202,17 +1211,31 @@ def get(self, d: None | dict[Track, dict[Division, set[Logic]]] = None) -> dict[
logics.add(logic)
return d

def complete(self, archive: Archive | None, command: Command | None) -> ParticipationCompleted:
archive = cast(Archive, archive if self.archive is None else self.archive)
command = cast(Command, command if self.command is None else self.command)
return ParticipationCompleted(
tracks=self.get(), archive=archive, command=command, experimental=self.experimental
)


import itertools


class Participations(RootModel):
root: list[Participation]

def get_divisions(self, track: Track) -> list[Division]:
def get_divisions(self, l: list[Track] = list(Track)) -> set[Division]:
""" " Return the divisions in which the solver participates"""
return [] # TODO
tracks = self.get()
divs = [set(tracks[track].keys()) for track in l]
return functools.reduce(lambda x, y: x | y, divs)

def get_logics(self, track: Track) -> list[Logic]:
def get_logics(self, l: list[Track] = list(Track)) -> set[Logic]:
""" " Return the logics in which the solver participates"""
return [] # TODO
tracks = self.get()
logics = itertools.chain.from_iterable([iter(tracks[track].values()) for track in l])
return functools.reduce(lambda x, y: x | y, logics)

def get(self, d: None | dict[Track, dict[Division, set[Logic]]] = None) -> dict[Track, dict[Division, set[Logic]]]:
if d is None:
Expand Down Expand Up @@ -1247,6 +1270,10 @@ def check_archive(self) -> Submission:
def uniq_id(self) -> str:
return hashlib.sha256(self.name.encode()).hexdigest()

def complete_participations(self) -> list[ParticipationCompleted]:
"""Push defaults from the submission into participations"""
return [p.complete(self.archive, self.command) for p in self.participations.root]


class Smt2File(BaseModel):
incremental: bool
Expand Down
23 changes: 21 additions & 2 deletions smtcomp/generate_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,25 @@
from smtcomp import defs


def path_trivial_benchmark(dst: Path, track: defs.Track, logic: defs.Logic, status: defs.Status) -> Path:
match track:
case defs.Track.Incremental:
suffix = "_inc"
case defs.Track.ModelValidation:
suffix = "_model"
case defs.Track.SingleQuery:
suffix = ""
case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
raise (ValueError("No trivial benchmarks yet for f{track}"))
match status:
case defs.Status.Sat:
return dst.joinpath("files", str(logic) + suffix + ".sat.smt2")
case defs.Status.Unsat:
return dst.joinpath("files", str(logic) + suffix + ".unsat.smt2")
case defs.Status.Unknown:
raise (ValueError("No trivial benchmarks yet for unknown"))


def generate_trivial_benchmarks(dst: Path) -> None:
dst.joinpath("files").mkdir(parents=True, exist_ok=True)
for track, divisions in defs.tracks.items():
Expand All @@ -18,8 +37,8 @@ def generate_trivial_benchmarks(dst: Path) -> None:
for _, theories in divisions.items():
for theory in theories:
file = dst.joinpath(str(theory) + suffix)
file_sat = dst.joinpath("files", str(theory) + suffix + ".sat.smt2")
file_unsat = dst.joinpath("files", str(theory) + suffix + ".unsat.smt2")
file_sat = path_trivial_benchmark(dst, track, theory, defs.Status.Sat)
file_unsat = path_trivial_benchmark(dst, track, theory, defs.Status.Unsat)

file.write_text("\n".join([str(file_sat.relative_to(dst)), str(file_unsat.relative_to(dst))]))

Expand Down
86 changes: 77 additions & 9 deletions smtcomp/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import itertools
from pathlib import Path
from typing import List, Optional, cast, Dict, Any, Annotated
from typing import List, Optional, cast, Dict, Any, Annotated, TextIO
import rich
from rich.progress import track
import rich.style
Expand All @@ -27,6 +27,8 @@
from smtcomp.unpack import write_cin, read_cin
import smtcomp.scramble_benchmarks
from rich.console import Console
import smtcomp.test_solver as test_solver


app = typer.Typer()

Expand Down Expand Up @@ -142,21 +144,29 @@ def generate_benchexec(
)


# Should be moved somewhere else
def download_archive_aux(s: defs.Submission, dst: Path) -> None:
"""
Download and unpack
"""
dst.mkdir(parents=True, exist_ok=True)
if s.archive:
archive.download(s.archive, dst)
archive.unpack(s.archive, dst)
for p in s.participations.root:
if p.archive:
archive.download(p.archive, dst)
archive.unpack(p.archive, dst)


@app.command(rich_help_panel=benchexec_panel)
def download_archive(files: List[Path], dst: Path) -> None:
"""
Download and unpack
"""
for file in track(files):
dst.mkdir(parents=True, exist_ok=True)
s = submission.read(str(file))
if s.archive:
archive.download(s.archive, dst)
archive.unpack(s.archive, dst)
for p in s.participations.root:
if p.archive:
archive.download(p.archive, dst)
archive.unpack(p.archive, dst)
download_archive_aux(s, dst)


@app.command()
Expand Down Expand Up @@ -501,3 +511,61 @@ def scramble_benchmarks(
"""

smtcomp.scramble_benchmarks.scramble(competition_track, src, dstdir, scrambler, seed, max_workers)


@app.command()
def generate_test_script(outdir: Path, submissions: list[Path] = typer.Argument(None)) -> None:
def read_submission(file: Path) -> defs.Submission:
try:
return submission.read(str(file))
except Exception as e:
rich.print(f"[red]Error during file parsing of {file}[/red]")
print(e)
exit(1)

outdir.mkdir(parents=True, exist_ok=True)
test_solver.copy_me(outdir)

trivial_bench = outdir.joinpath("trivial_bench")
smtcomp.generate_benchmarks.generate_trivial_benchmarks(trivial_bench)

l = list(map(read_submission, submissions))
script_output = outdir.joinpath("test_script.py")
with script_output.open("w") as out:
out.write("from test_solver import *\n")
out.write("init_test()\n")
out.write("\n")
out.write("""print("Testing provers")\n""")
for sub in l:
out.write(f"print({sub.name!r})\n")
download_archive_aux(sub, outdir)
for part in sub.complete_participations():
for track, divisions in part.tracks.items():
match track:
case defs.Track.Incremental:
statuses = [defs.Status.Sat, defs.Status.Unsat]
case defs.Track.ModelValidation:
statuses = [defs.Status.Sat]
case defs.Track.SingleQuery:
statuses = [defs.Status.Sat, defs.Status.Unsat]
case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
continue
for _, logics in divisions.items():
for logic in logics:
for status in statuses:
file_sat = smtcomp.generate_benchmarks.path_trivial_benchmark(
trivial_bench, track, logic, status
).relative_to(outdir)
cmd = (
archive.archive_unpack_dir(part.archive, outdir)
.joinpath(part.command.binary)
.relative_to(outdir)
)
if status == defs.Status.Sat:
expected = "sat"
else:
expected = "unsat"
out.write(
f"test({str(cmd)!r},{part.command.arguments!r},{str(file_sat)!r},{expected!r})\n"
)
out.write("end()\n")
71 changes: 71 additions & 0 deletions smtcomp/test_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# This module is for the test script in the docker image
# It must use only module from the standard library
from pathlib import Path
import shutil
import subprocess, os
import re


def copy_me(dstdir: Path) -> None:
shutil.copyfile(src=__file__, dst=dstdir.joinpath("test_solver.py"), follow_symlinks=True)


# copied from tools.py should be factorized
def parse_result(returnsignal: int | None, returncode: int, output: list[str]) -> str:
if returnsignal is None:
status = None
for line in output:
line = line.strip()
# ignore
if re.compile("^\s*(success|;.*)?\s*$").match(line):
continue
if line == "unsat":
return "unsat"
elif line == "sat":
return "sat"
else:
return "unknown"
return "unknown"

elif (returnsignal == 9) or (returnsignal == 15):
status = "timeout"

elif returnsignal == 9:
status = "KILLED BY SIGNAL 9"
elif returnsignal == 6:
status = "ABORTED"
elif returnsignal == 15:
status = "KILLED"
else:
status = f"ERROR ({returncode})"

return status


def init_test() -> None:
os.chdir(os.path.dirname(__file__))


exit_code = 0


# Try to simulate runexec from benchexec
def test(cmd: str, args: list[str], file: str, expected: str) -> None:
global exit_code
all = [cmd] + args + [file]
print(all, flush=True)
result = subprocess.run(all, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if result.returncode < 0:
returnsignal = -result.returncode
else:
returnsignal = None
answer = parse_result(returnsignal, result.returncode, result.stdout.decode().splitlines())
if answer != expected:
print(f"\x1b[1;31mError expected {expected} result obtained {answer}\x1b[0m", flush=True)
exit_code = 1
else:
print("\x1b[1;32mOK\x1b[0m", flush=True)


def end() -> None:
exit(exit_code)
Loading