Skip to content

Commit

Permalink
Fix AWS selection
Browse files Browse the repository at this point in the history
  • Loading branch information
bobot committed Jun 17, 2024
1 parent a6952ac commit 37555cf
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 51 deletions.
2 changes: 1 addition & 1 deletion smtcomp/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1365,7 +1365,7 @@ class Config:
"""The integer part of one hundred times the opening value of the New York Stock Exchange Composite Index on the first day the exchange is open on or after the date specified in nyse_date"""
nyse_date = date(year=2024, month=6, day=10)

aws_timelimit_hard = 600
aws_timelimit_hard = 180
"""
Time in seconds upon which benchmarks are considered hards
"""
Expand Down
106 changes: 67 additions & 39 deletions smtcomp/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,52 +336,80 @@ def show_sq_selection_stats(
.sort(by="logic")
.collect()
)
table = Table(title="Statistics on the benchmark selection for single query")

table.add_column("Logic", justify="left", style="cyan", no_wrap=True)
table.add_column("trivial", justify="right", style="green")
table.add_column("not trivial", justify="right", style="orange_red1")
table.add_column("never compet.", justify="right", style="magenta")
table.add_column("new", justify="right", style="magenta1")
table.add_column("selected", justify="right", style="green3")
table.add_column("selected sat", justify="right", style="green4")
table.add_column("selected unsat", justify="right", style="green4")
table.add_column("selected already run", justify="right", style="green4")

used_logics = defs.logic_used_for_track(defs.Track.SingleQuery)
for d in b3.to_dicts():
logic = defs.Logic.of_int(d["logic"])

def print_logic(id: int) -> str:
logic = defs.Logic.of_int(id)
if logic in used_logics:
slogic = f"{str(logic)}"
return f"{str(logic)}"
else:
slogic = f"[bold red]{str(logic)}[/bold red]"

table.add_row(
slogic,
str(d["trivial"]),
str(d["not_trivial"]),
str(d["old_never_ran"]),
str(d["new"]),
str(d["selected"]),
str(d["selected_sat"]),
str(d["selected_unsat"]),
str(d["selected_already_run"]),
)
return f"[bold red]{str(logic)}[/bold red]"

rich_print_pl(
"Statistics on the benchmark selection for single query",
b3,
Col("logic", "Logic", footer="Total", justify="left", style="cyan", no_wrap=True, custom=print_logic),
Col("trivial", "trivial", justify="right", style="green"),
Col("not_trivial", "not trivial", justify="right", style="orange_red1"),
Col("old_never_ran", "never compet.", justify="right", style="magenta"),
Col("new", "new", justify="right", style="magenta1"),
Col("selected", "selected", justify="right", style="green3"),
Col("selected_sat", "selected sat", justify="right", style="green4"),
Col("selected_unsat", "selected unsat", justify="right", style="green4"),
Col("selected_already_run", "selected already run", justify="right", style="green4"),
)


table.add_section()
table.add_row(
"Total",
str(b3["trivial"].sum()),
str(b3["not_trivial"].sum()),
str(b3["old_never_ran"].sum()),
str(b3["new"].sum()),
str(b3["selected"].sum()),
str(b3["selected_sat"].sum()),
str(b3["selected_unsat"].sum()),
str(b3["selected_already_run"].sum()),
@app.command(rich_help_panel=selection_panel)
def show_aws_stats(
data: Path,
) -> None:
"""
Show statistics on the benchmarks selected for aws tracks
"""
config = defs.Config(data)
benchmarks = smtcomp.selection.helper_aws_selection(config)
b3 = (
benchmarks.group_by(["track", "logic"])
.agg(
n=pl.len(),
hard=(pl.col("hard") == True).sum(),
unsolved=(pl.col("unsolved") == True).sum(),
hard_selected=((pl.col("hard") == True) & pl.col("selected") == True).sum(),
unsolved_selected=((pl.col("unsolved") == True) & pl.col("selected") == True).sum(),
)
.sort(by=["track", "logic"])
.collect()
)

print(table)
rich_print_pl(
"Statistics on the benchmark selection for AWS tracks",
b3,
Col(
"track",
"Track",
footer="Total",
justify="left",
style="cyan",
no_wrap=True,
custom=lambda x: str(defs.Track.of_int(x)),
),
Col(
"logic",
"Logic",
footer="",
justify="left",
style="cyan",
no_wrap=True,
custom=lambda x: str(defs.Logic.of_int(x)),
),
Col("n", "Benchmarks", justify="right", style="magenta"),
Col("hard", "Hard", justify="right", style="green"),
Col("unsolved", "Unsolved", justify="right", style="orange_red1"),
Col("hard_selected", "Hard", justify="right", style="green"),
Col("unsolved_selected", "Unsolved", justify="right", style="orange_red1"),
)


def print_iterable(i: int, tree: Tree, a: Any) -> None:
Expand Down
45 changes: 34 additions & 11 deletions smtcomp/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from rich import print
from pydantic import BaseModel
import polars as pl
from smtcomp.utils import *

c_logic = pl.col("logic")
c_result = pl.col("result")
Expand Down Expand Up @@ -197,11 +198,14 @@ def aws_selection(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, conf

# Add division information to competitive logic
logics = competitive_logics(config).filter(pl.col("track").is_in(list(map(int, aws_track))), competitive=True)
logics = logics.join(tracks(), on=["track", "logic"], how="inner")
df_logics = logics.collect()
if len(df_logics) == 0:
raise ValueError("No logics selected")
logics = df_logics.lazy().join(tracks(), on=["track", "logic"], how="left")

# Keep only competitive logic from this track
b = benchmarks.join(logics, on="logic", how="inner")
previous_results = previous_results.join(b, on="file", how="semi")
# Keep only competitive logic from the tracks
benchmarks = intersect(benchmarks, logics, on=["logic"])
previous_results = filter_with(previous_results, benchmarks, on=["file"])

# Keep only hard and unsolved benchmarks
solved = c_result.is_in({int(defs.Status.Sat), int(defs.Status.Unsat)})
Expand All @@ -212,22 +216,41 @@ def aws_selection(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, conf
# Remove bench solved within the timelimit by any solver
.filter(solved_within_timelimit.not_().all().over("file"))
.group_by("file")
.agg(hard=solved.any(), unsolved=solved.not_().all())
.agg(hard=solved.any())
)

b = b.join(hard_unsolved, how="inner", on="file")
hard_unsolved = intersect(hard_unsolved, benchmarks, on=["file"])

def sample(lf: pl.LazyFrame) -> pl.LazyFrame:
n = pl.min_horizontal(pl.col("file").list.len(), config.aws_num_selected)
n = pl.min_horizontal(pl.col("file").list.len(), config.aws_num_selected / 2)
return lf.with_columns(file=pl.col("file").list.sample(n=n, seed=config.seed))

b = hard_unsolved

# Sample at the logic level
b = sample(b.group_by("track", "division", "logic").agg(file=c_file))
b = sample(b.group_by("track", "hard", "division", "logic").agg(file=c_file.sort()))

# Sample at the division level
b = sample(b.group_by("track", "division").agg(file=c_file.flatten()))
b = sample(b.group_by("track", "hard", "division").agg(file=c_file.flatten().sort()))

# Sample at the track level
b = sample(b.group_by("track").agg(file=c_file.flatten()))
b = sample(b.group_by("track", "hard").agg(file=c_file.flatten().sort()))

b = b.explode("file").with_columns(selected=True)

b = add_columns(
hard_unsolved.select("file", "hard", "track"), b, on=["track", "hard", "file"], defaults={"selected": False}
).with_columns(unsolved=pl.col("hard").not_())

return add_columns(
benchmarks, b, on=["track", "file"], defaults={"hard": False, "unsolved": False, "selected": False}
)


return b.explode("file").join(benchmarks, on="file", how="inner")
def helper_aws_selection(config: defs.Config) -> pl.LazyFrame:
"""
Returned columns: file (uniq id), logic, family,name, status, asserts nunmber, trivial, run (in previous year), new (benchmarks), selected
"""
benchmarks = pl.read_ipc(config.cached_non_incremental_benchmarks)
results = pl.read_ipc(config.cached_previous_results)
return aws_selection(benchmarks.lazy(), results.lazy(), config)
64 changes: 64 additions & 0 deletions smtcomp/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from dataclasses import dataclass
import polars as pl
from rich.table import Table
import rich
from typing import *

U = TypeVar("U")
Expand All @@ -21,3 +25,63 @@ def g(x: Tuple[W1, W2, U]) -> Tuple[W1, W2, V] | None:
return (x[0], x[1], y)

return g


def add_columns(dst: pl.LazyFrame, from_: pl.LazyFrame, on: list[str], defaults: Dict[str, Any]) -> pl.LazyFrame:
dst_cols = set(dst.columns)
from_cols = set(from_.columns)
on_cols = set(on)
assert on_cols.issubset(dst_cols)
assert on_cols.issubset(from_cols)
assert dst_cols.isdisjoint(from_cols.difference(on_cols))
assert from_cols.difference(on_cols) == defaults.keys()
fill_nulls = [pl.col(k).fill_null(value=v) for k, v in defaults.items()]
return dst.join(from_, how="left", on=on, coalesce=True).with_columns(*fill_nulls)


def intersect(dst: pl.LazyFrame, from_: pl.LazyFrame, on: list[str]) -> pl.LazyFrame:
"""
All the possible matches in the two given tables
"""
dst_cols = set(dst.columns)
from_cols = set(from_.columns)
on_cols = set(on)
assert on_cols.issubset(dst_cols)
assert on_cols.issubset(from_cols)
assert dst_cols.isdisjoint(from_cols.difference(on_cols))
return dst.join(from_, how="inner", on=on, coalesce=True)


def filter_with(a: pl.LazyFrame, b: pl.LazyFrame, on: list[str]) -> pl.LazyFrame:
return a.join(b, how="semi", on=on)


@dataclass
class Col:
name: str
header: str
footer: str | None = None
justify: Literal["default", "left", "center", "right", "full"] = "right"
style: str | None = None
no_wrap: bool = False
custom: Callable[[int], str] = str


def rich_print_pl(title: str, df: pl.DataFrame, *cols: Col) -> None:
"""Print DataFrame of integers"""

table = Table(title="Statistics on the benchmark selection for single query")

for col in cols:
table.add_column(col.header, justify=col.justify, style=col.style, no_wrap=col.no_wrap)

for d in df.to_dicts():
l = list(map(lambda col: col.custom(d[col.name]), cols))
table.add_row(*l)

table.add_section()

l = list(map(lambda col: col.footer if col.footer is not None else str(df[col.name].sum()), cols))
table.add_row(*l)

rich.print(table)

0 comments on commit 37555cf

Please sign in to comment.