Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AWS selection and scrambling #98

Merged
merged 9 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions smtcomp/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ def __int__(self) -> int:
def of_int(cls, id: int) -> EnumAutoInt:
return cls.__ordered__[id]

@classmethod
def name_of_int(cls, id: int) -> str:
return cls.__ordered__[id].name

def __hash__(self) -> int:
return self.id

Expand Down Expand Up @@ -1363,11 +1367,11 @@ class Config:
"""Prioritize triviality as much as possible for testing purpose.
Look for simple problems instead of hard one"""

nyse_seed = None
nyse_seed = 17817
"""The integer part of one hundred times the opening value of the New York Stock Exchange Composite Index on the first day the exchange is open on or after the date specified in nyse_date"""
nyse_date = date(year=2024, month=6, day=10)
nyse_date = date(year=2024, month=6, day=17)

aws_timelimit_hard = 600
aws_timelimit_hard = 180
"""
Time in seconds upon which benchmarks are considered hards
"""
Expand All @@ -1376,6 +1380,12 @@ class Config:
Number of selected benchmarks
"""

removed_benchmarks = [{
"logic": int(Logic.QF_LIA),
"family": "20210219-Dartagnan/ConcurrencySafety-Main",
"name": "39_rand_lock_p0_vs-O0.smt2",
}]

def __init__(self, data: Path | None) -> None:
if data is not None and data.name != "data":
raise ValueError("Consistency check, data directory must be named 'data'")
Expand Down
177 changes: 132 additions & 45 deletions smtcomp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
from rich.console import Console
import smtcomp.test_solver as test_solver
from concurrent.futures import ThreadPoolExecutor

from smtcomp.benchexec import get_suffix
from smtcomp.scramble_benchmarks import benchmark_files_dir
from smtcomp.utils import *
import re

app = typer.Typer()

Expand Down Expand Up @@ -333,52 +336,114 @@ def show_sq_selection_stats(
.sort(by="logic")
.collect()
)
table = Table(title="Statistics on the benchmark selection for single query")

table.add_column("Logic", justify="left", style="cyan", no_wrap=True)
table.add_column("trivial", justify="right", style="green")
table.add_column("not trivial", justify="right", style="orange_red1")
table.add_column("never compet.", justify="right", style="magenta")
table.add_column("new", justify="right", style="magenta1")
table.add_column("selected", justify="right", style="green3")
table.add_column("selected sat", justify="right", style="green4")
table.add_column("selected unsat", justify="right", style="green4")
table.add_column("selected already run", justify="right", style="green4")

used_logics = defs.logic_used_for_track(defs.Track.SingleQuery)
for d in b3.to_dicts():
logic = defs.Logic.of_int(d["logic"])

def print_logic(id: int) -> str:
logic = defs.Logic.of_int(id)
if logic in used_logics:
slogic = f"{str(logic)}"
return f"{str(logic)}"
else:
slogic = f"[bold red]{str(logic)}[/bold red]"

table.add_row(
slogic,
str(d["trivial"]),
str(d["not_trivial"]),
str(d["old_never_ran"]),
str(d["new"]),
str(d["selected"]),
str(d["selected_sat"]),
str(d["selected_unsat"]),
str(d["selected_already_run"]),
return f"[bold red]{str(logic)}[/bold red]"

rich_print_pl(
"Statistics on the benchmark selection for single query",
b3,
Col("logic", "Logic", footer="Total", justify="left", style="cyan", no_wrap=True, custom=print_logic),
Col("trivial", "trivial", justify="right", style="green"),
Col("not_trivial", "not trivial", justify="right", style="orange_red1"),
Col("old_never_ran", "never compet.", justify="right", style="magenta"),
Col("new", "new", justify="right", style="magenta1"),
Col("selected", "selected", justify="right", style="green3"),
Col("selected_sat", "selected sat", justify="right", style="green4"),
Col("selected_unsat", "selected unsat", justify="right", style="green4"),
Col("selected_already_run", "selected already run", justify="right", style="green4"),
)


@app.command(rich_help_panel=selection_panel)
def show_aws_stats(
data: Path,
) -> None:
"""
Show statistics on the benchmarks selected for aws tracks
"""
config = defs.Config(data)
benchmarks = smtcomp.selection.helper_aws_selection(config)
solvers = smtcomp.selection.solver_competing_logics(config).group_by(["track", "logic"]).agg("solver")
b3 = (
add_columns(
benchmarks.group_by(["track", "logic"]).agg(
n=pl.len(),
hard=(pl.col("hard") == True).sum(),
unsolved=(pl.col("unsolved") == True).sum(),
selected=(pl.col("selected") == True).sum(),
hard_selected=((pl.col("hard") == True) & (pl.col("selected") == True)).sum(),
unsolved_selected=((pl.col("unsolved") == True) & (pl.col("selected") == True)).sum(),
),
solvers,
on=["track", "logic"],
defaults={"solver": []},
)
.sort(by=["track", "logic"])
.collect()
)

table.add_section()
table.add_row(
"Total",
str(b3["trivial"].sum()),
str(b3["not_trivial"].sum()),
str(b3["old_never_ran"].sum()),
str(b3["new"].sum()),
str(b3["selected"].sum()),
str(b3["selected_sat"].sum()),
str(b3["selected_unsat"].sum()),
str(b3["selected_already_run"].sum()),
rich_print_pl(
"Statistics on the benchmark selection for AWS tracks",
b3,
Col(
"track",
"Track",
footer="Total",
justify="left",
style="cyan",
no_wrap=True,
custom=defs.Track.name_of_int,
),
Col(
"logic",
"Logic",
footer="",
justify="left",
style="cyan",
no_wrap=True,
custom=defs.Logic.name_of_int,
),
Col("n", "Benchmarks", justify="right", style="magenta"),
Col("hard", "Hard", justify="right", style="green"),
Col("unsolved", "Unsolved", justify="right", style="orange_red1"),
Col("hard_selected", "Sel. Hard", justify="right", style="green"),
Col("unsolved_selected", "Sel. Unsolved", justify="right", style="orange_red1"),
Col("selected", "Selected", justify="right", style="orange_red1"),
Col(
"solver",
"Solvers",
justify="left",
custom=lambda x: ", ".join(x),
footer=lambda df: str(
(df.select(n=pl.col("solver").list.len() * (pl.col("hard_selected") + pl.col("unsolved_selected"))))[
"n"
].sum()
),
),
)

print(table)

@app.command(rich_help_panel=selection_panel)
def scramble_aws(
data: Path,
srcdir: Path,
dstdir: Path,
scrambler: Path,
max_workers: int = 8,
) -> None:
"""
Show statistics on the benchmarks selected for aws tracks
"""
config = defs.Config(data)

smtcomp.scramble_benchmarks.select_and_scramble_aws(config, srcdir, dstdir, scrambler, max_workers)


def print_iterable(i: int, tree: Tree, a: Any) -> None:
Expand Down Expand Up @@ -551,13 +616,35 @@ def read_submission(file: Path) -> defs.Submission:


@app.command()
def check_model_locally(cachedir: Path, resultdirs: list[Path], max_workers: int = 8) -> None:
l: list[tuple[results.RunId, results.Run, model_validation.ValidationResult]] = []
def check_model_locally(
cachedir: Path, resultdirs: list[Path], max_workers: int = 8, outdir: Optional[Path] = None
) -> None:
l: list[tuple[results.RunId, results.Run, model_validation.ValidationError]] = []
with Progress() as progress:
with ThreadPoolExecutor(max_workers) as executor:
for resultdir in resultdirs:
l2 = model_validation.check_results_locally(cachedir, resultdir, executor, progress)
for rid, r, result in l2:
if result.status != defs.Status.Sat:
l.append((rid, r, result))
print(l)
l.extend(filter_map(map_none3(model_validation.is_error), l2))
keyfunc = lambda v: v[0].solver
l.sort(key=keyfunc)
d = itertools.groupby(l, key=keyfunc)
t = Tree("Unvalidated models")
for solver, rs in d:
t2 = t.add(solver)
for rid, r, result in rs:
stderr = result.stderr.strip().replace("\n", ", ")
t2.add(f"{r.basename}: {stderr}")
print(t)
if outdir is not None:
for solver, models in d:
dst = outdir / solver
dst.mkdir(parents=True, exist_ok=True)
for rid, r, result in models:
filedir = benchmark_files_dir(cachedir, rid.track)
logic = rid.includefile.removesuffix(get_suffix(rid.track))
basename = r.basename.removesuffix(".yml") + ".smt2"
basename_model = r.basename.removesuffix(".yml") + ".rsmt2"
smt2_file = filedir / logic / basename
(dst / basename).unlink(missing_ok=True)
(dst / basename).symlink_to(smt2_file)
(dst / basename_model).write_text(result.model)
42 changes: 32 additions & 10 deletions smtcomp/model_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,36 @@


@dataclass
class ValidationResult:
class ValidationOk:
stderr: str


@dataclass
class ValidationError:
status: defs.Status
stderr: str
model: str | None


@dataclass
class NoValidation:
"""No validation possible"""


noValidation = NoValidation()

Validation = ValidationOk | ValidationError | NoValidation


def is_error(x: Validation) -> ValidationError | None:
match x:
case ValidationError(_):
return x
case ValidationOk(_) | NoValidation():
return None


def check_locally(smt2_file: Path, model: str) -> ValidationResult:
def check_locally(smt2_file: Path, model: str) -> Validation:
r = subprocess.run(
[
"dolmen",
Expand All @@ -54,39 +78,37 @@ def check_locally(smt2_file: Path, model: str) -> ValidationResult:
)
match r.returncode:
case 0:
status = defs.Status.Sat
return ValidationOk(r.stderr.decode())
case 5:
status = defs.Status.Unsat
case 2:
# LimitReached
status = defs.Status.Unknown
case _:
status = defs.Status.Unknown
return ValidationResult(status, r.stderr.decode())
return ValidationError(status, r.stderr.decode(), model)


def check_result_locally(
cachedir: Path, logfiles: results.LogFile, rid: results.RunId, r: results.Run
) -> ValidationResult:
def check_result_locally(cachedir: Path, logfiles: results.LogFile, rid: results.RunId, r: results.Run) -> Validation:
if r.status == "true":
filedir = benchmark_files_dir(cachedir, rid.track)
logic = rid.includefile.removesuffix(get_suffix(rid.track))
smt2_file = filedir / logic / (r.basename.removesuffix(".yml") + ".smt2")
model = logfiles.get_output(rid, r.basename)
return check_locally(smt2_file, model)
else:
return ValidationResult(defs.Status.Unknown, "")
return noValidation


def check_results_locally(
cachedir: Path, resultdir: Path, executor: ThreadPoolExecutor, progress: Progress
) -> list[tuple[results.RunId, results.Run, ValidationResult]]:
) -> list[tuple[results.RunId, results.Run, Validation]]:
with results.LogFile(resultdir) as logfiles:
l = [(r.runid, b) for r in results.parse_results(resultdir) for b in r.runs if b.status == "true"]
return list(
progress.track(
executor.map((lambda v: (v[0], v[1], check_result_locally(cachedir, logfiles, v[0], v[1]))), l),
total=len(l),
description="checking models",
description=f"checking models for {resultdir.name}",
)
)
Loading
Loading