From c5983f91be18a93b2c88f314c260607deea7c026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= Date: Mon, 8 Jul 2024 19:01:20 +0200 Subject: [PATCH 1/9] Generate results page for division --- .gitignore | 1 + Makefile | 12 +- smtcomp/defs.py | 4 + smtcomp/generate_website_page.py | 176 ++++++++++++++++++ smtcomp/main.py | 35 +++- smtcomp/results.py | 28 ++- smtcomp/scoring.py | 22 ++- .../smtcomp/layouts/_default/result.html | 10 +- 8 files changed, 269 insertions(+), 19 deletions(-) create mode 100644 smtcomp/generate_website_page.py diff --git a/.gitignore b/.gitignore index 6a24df05..01dd2984 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ schema_doc.min.js *.feather tmp/ /web/public/ +/web/content/results/*.md \ No newline at end of file diff --git a/Makefile b/Makefile index d5e0ef5e..9b62ef0f 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ test: generation ## Test the code with pytest @echo "🚀 Testing code: Running pytest" @poetry run pytest -generation: submission-generation participant-data ## Files generation for the website +generation: submission-generation participant-data results-generation ## Files generation for the website .PHONY: build build: clean-build ## Build wheel file using poetry @@ -45,7 +45,7 @@ GENERATED_SCHEMA_FILE=web/content/solver_submission/schema.json GENERATED_SCHEMA_HTML=web/content/solver_submission/schema.html PARTICIPANT_DATA_FILE=web/data/participants.json -.PHONY: submission-doc submission-generation participant-data +.PHONY: submission-doc submission-generation participant-data results-generation cache submission-generation: @echo "🚀 Generating schema to $(GENERATED_SCHEMA_FILE)" @poetry run smtcomp dump-json-schema $(GENERATED_SCHEMA_FILE) @@ -59,5 +59,13 @@ participant-data: @echo "🚀 Generating participant data to $(PARTICIPANT_DATA_FILE)" @poetry run smtcomp show-json submissions/*.json $(PARTICIPANT_DATA_FILE) +results-generation: + @echo "🚀 Generating results to web/content/results" + @poetry run smtcomp export-results-pages data + +cache: + @echo "🚀 Generating cache" + @poetry run smtcomp create-cache data + hugo-server: (cd web; hugo server) diff --git a/smtcomp/defs.py b/smtcomp/defs.py index 8d67f2c7..475a1d74 100644 --- a/smtcomp/defs.py +++ b/smtcomp/defs.py @@ -1478,6 +1478,10 @@ def submissions(self) -> list[Submission]: Submission.model_validate_json(Path(file).read_text()) for file in self.data.glob("../submissions/*.json") ] + @functools.cached_property + def web_results(self) -> Path: + return self.data / ".." / "web" / "content" / "results" + @functools.cached_property def seed(self) -> int: unknown_seed = 0 diff --git a/smtcomp/generate_website_page.py b/smtcomp/generate_website_page.py new file mode 100644 index 00000000..02fff0d4 --- /dev/null +++ b/smtcomp/generate_website_page.py @@ -0,0 +1,176 @@ +import functools, itertools +from typing import Set, Dict, Optional, cast, List, DefaultDict +from pathlib import Path, PurePath +from smtcomp import defs +from rich import progress +from rich import print +from pydantic import BaseModel +import polars as pl +import smtcomp.scoring +from smtcomp.utils import * +import smtcomp.results + +# Warning: Hugo lowercase all dict keys + + +class PodiumStep(BaseModel): + name: str + competing: str # yes or no + errorScore: int + correctScore: int + CPUScore: float + WallScore: float + solved: int + solved_sat: int + solved_unsat: int + unsolved: int + abstained: int + timeout: int + memout: int + + +class PodiumDivision(BaseModel): + resultdate: str + year: int + divisions: str # divisions_2023 + participants: str # participants_2023 + disagreements: str # disagreements_2023 + division: str # Arith + track: str # track_single_query + n_benchmarks: int + time_limit: int + mem_limit: int + logics: dict[str, int] + winner_seq: str + winner_par: str + winner_sat: str + winner_unsat: str + winner_24s: str + + sequential: list[PodiumStep] + parallel: list[PodiumStep] + sat: list[PodiumStep] + unsat: list[PodiumStep] + twentyfour: list[PodiumStep] + + layout: str = "result" + + +def podium_steps(podium: List[dict[str, Any]]) -> List[PodiumStep]: + return [ + PodiumStep( + name=s["solver"], + competing="yes", # TODO + errorScore=-s["error_score"], + correctScore=s["correctly_solved_score"], + CPUScore=s["cpu_time_score"], + WallScore=s["wallclock_time_score"], + solved=s["solved"], + solved_sat=s["solved_sat"], + solved_unsat=s["solved_unsat"], + unsolved=s["unsolved"], + abstained=s["abstained"], + timeout=s["timeout"], + memout=s["memout"], + ) + for s in podium + ] + + +def podium_division(config: defs.Config, d: dict[str, Any]) -> PodiumDivision: + return PodiumDivision( + resultdate="2024-07-08", + year=config.current_year, + divisions=f"divisions_{config.current_year}", + participants=f"participants_{config.current_year}", + disagreements=f"disagreements_{config.current_year}", + division=defs.Division.name_of_int(d["division"]), + track="track_single_query", + n_benchmarks=d["n"], + time_limit=config.timelimit_s, + mem_limit=config.memlimit_M, + logics=dict((defs.Logic.name_of_int(d2["logic"]), d2["n"]) for d2 in d["logics"]), + winner_seq=d[smtcomp.scoring.Kind.seq.name][0]["solver"], # TODO select only participating + winner_par=d[smtcomp.scoring.Kind.par.name][0]["solver"], + winner_sat=d[smtcomp.scoring.Kind.sat.name][0]["solver"], + winner_unsat=d[smtcomp.scoring.Kind.unsat.name][0]["solver"], + winner_24s=d[smtcomp.scoring.Kind.twentyfour.name][0]["solver"], + sequential=podium_steps(d[smtcomp.scoring.Kind.seq.name]), + parallel=podium_steps(d[smtcomp.scoring.Kind.par.name]), + sat=podium_steps(d[smtcomp.scoring.Kind.sat.name]), + unsat=podium_steps(d[smtcomp.scoring.Kind.unsat.name]), + twentyfour=podium_steps(d[smtcomp.scoring.Kind.twentyfour.name]), + ) + + +def sq_generate_divisions(config: defs.Config, results: pl.LazyFrame) -> dict[defs.Division, PodiumDivision]: + """ + With disagreements + """ + assert "disagreements" in results.columns + results = results.filter(track=int(defs.Track.SingleQuery)).drop("track") + + def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, results: pl.LazyFrame) -> pl.LazyFrame: + results = smtcomp.scoring.filter_for(kind, config, results) + return ( + results.with_columns(total=pl.n_unique("file").over("division")) + .group_by("division", "solver") + .agg( + pl.sum("error_score"), + pl.sum("correctly_solved_score"), + pl.sum("cpu_time_score"), + pl.sum("wallclock_time_score"), + solved=(smtcomp.scoring.known_answer).sum(), + solved_sat=(smtcomp.scoring.sat_answer).sum(), + solved_unsat=(smtcomp.scoring.unsat_answer).sum(), + unsolved=(smtcomp.scoring.unknown_answer).sum(), + timeout=(smtcomp.scoring.timeout_answer).sum(), + memout=(smtcomp.scoring.memout_answer).sum(), + abstained=pl.col("total").first() - pl.len(), + ) + .sort(["division"] + smtcomp.scoring.scores, descending=True) + .group_by("division", maintain_order=True) + .agg( + pl.struct( + "solver", + "error_score", + "correctly_solved_score", + "cpu_time_score", + "wallclock_time_score", + "solved", + "solved_sat", + "solved_unsat", + "unsolved", + "timeout", + "memout", + "abstained", + ).alias(kind.name) + ) + ) + + lf_info = ( + results.group_by("division", "logic") + .agg(n=pl.n_unique("file"), disagreements=(pl.col("disagreements") == True).sum()) + .group_by("division") + .agg(pl.sum("n"), pl.sum("disagreements"), logics=pl.struct("logic", "n")) + ) + + results = results.filter(disagreements=False).drop("disagreements") + + l = [lf_info] + [info_for_podium_step(kind, config, results) for kind in smtcomp.scoring.Kind] + + r = functools.reduce(lambda x, y: x.join(y, validate="1:1", on=["division"]), l) + + df = r.collect() + + return dict((defs.Division.of_int(d["division"]), podium_division(config, d)) for d in df.to_dicts()) + + +def export_results(config: defs.Config, results: pl.LazyFrame) -> None: + + datas = sq_generate_divisions(config, results) + + dst = config.web_results + + for div, data in datas.items(): + (dst / f"{str(div).lower()}-single-query.md").write_text(data.model_dump_json(indent=1)) diff --git a/smtcomp/main.py b/smtcomp/main.py index 1b6e7c28..d9df8d4f 100644 --- a/smtcomp/main.py +++ b/smtcomp/main.py @@ -30,6 +30,7 @@ import smtcomp.generate_benchmarks import smtcomp.list_benchmarks import smtcomp.selection +import smtcomp.generate_website_page from smtcomp.unpack import write_cin, read_cin import smtcomp.scramble_benchmarks from rich.console import Console @@ -244,7 +245,10 @@ def store_results( @app.command(rich_help_panel=benchexec_panel) def stats_of_benchexec_results( - data: Path, results: Path, only_started: bool = False, track: defs.Track = defs.Track.SingleQuery + data: Path, + results: List[Path] = typer.Argument(None), + only_started: bool = False, + track: defs.Track = defs.Track.SingleQuery, ) -> None: """ Load benchexec results and print some results about them @@ -323,7 +327,9 @@ def print_missing(d: Dict[str, Any]) -> str: @app.command(rich_help_panel=benchexec_panel) def find_disagreement_results( - data: Path, results: Path, use_previous_year_results: bool = defs.Config.use_previous_results_for_status + data: Path, + results: List[Path] = typer.Argument(None), + use_previous_year_results: bool = defs.Config.use_previous_results_for_status, ) -> None: """ Load benchexec results and print some results about them @@ -380,7 +386,9 @@ def print_answers(d: List[Dict[str, Any]]) -> str: @app.command(rich_help_panel=scoring_panel) def scoring_removed_benchmarks( - data: Path, src: Path, use_previous_year_results: bool = defs.Config.use_previous_results_for_status + data: Path, + src: List[Path] = typer.Argument(None), + use_previous_year_results: bool = defs.Config.use_previous_results_for_status, ) -> None: config = defs.Config(data) config.use_previous_results_for_status = use_previous_year_results @@ -406,7 +414,14 @@ def scoring_removed_benchmarks( @app.command(rich_help_panel=scoring_panel) -def show_scores(data: Path, src: Path, kind: smtcomp.scoring.Kind = typer.Argument(default="par")) -> None: +def show_scores( + data: Path, + src: List[Path] = typer.Argument(None), + kind: smtcomp.scoring.Kind = typer.Argument(default="par"), +) -> None: + """ + If src is empty use results in data + """ config = defs.Config(data) results = smtcomp.results.helper_get_results(config, src) @@ -978,3 +993,15 @@ def check_model_locally( (dst / basename).unlink(missing_ok=True) (dst / basename).symlink_to(smt2_file) (dst / basename_model).write_text(result.model) + + +@app.command() +def export_results_pages(data: Path, results: list[Path] = typer.Argument(None)) -> None: + """ + Generate page for results pages in web directory + """ + config = defs.Config(data) + lf = smtcomp.results.helper_get_results(config, results) + lf = smtcomp.scoring.add_disagreements_info(lf) + lf = smtcomp.scoring.benchmark_scoring(lf) + smtcomp.generate_website_page.export_results(config, lf) diff --git a/smtcomp/results.py b/smtcomp/results.py index 3d582724..90526872 100644 --- a/smtcomp/results.py +++ b/smtcomp/results.py @@ -254,15 +254,36 @@ def get_output(self: "LogFile", r: RunId, basename: str) -> str: return s[index:] -def helper_get_results(config: defs.Config, results: Path, track: defs.Track = defs.Track.SingleQuery) -> pl.LazyFrame: +def helper_get_results( + config: defs.Config, results: List[Path], track: defs.Track = defs.Track.SingleQuery +) -> pl.LazyFrame: """ + If results is empty use the one in data + Return on all the selected benchmarks for each solver that should run it - "track", "file", "scrambled_id", "logic", "division", "status", "solver", "answer", "cputime_s", "memory_B", "walltime_s". + "track", "file", "logic", "division", "status", "solver", "answer", "cputime_s", "memory_B", "walltime_s". -1 is used when no answer is available. """ - lf = pl.read_ipc(results / "parsed.feather").lazy().filter(track=int(track)) + if len(results) == 0: + lf = ( + pl.read_ipc(config.cached_current_results[track]) + .lazy() + .with_columns(track=int(track)) + .rename( + { + "result": "answer", + "memory_usage": "memory_B", + "cpu_time": "cputime_s", + "wallclock_time": "walltime_s", + } + ) + .drop("year") + ) + else: + lf = pl.concat(pl.read_ipc(p / "parsed.feather").lazy() for p in results) + lf = lf.filter(track=int(track)).drop("scrambled_id") selected = smtcomp.selection.helper(config, track).filter(selected=True).with_columns(track=int(track)) selected = intersect(selected, smtcomp.selection.solver_competing_logics(config), on=["logic", "track"]) @@ -273,7 +294,6 @@ def helper_get_results(config: defs.Config, results: Path, track: defs.Track = d on=["file", "solver", "track"], defaults={ "answer": -1, - "scramble_id": 1, "cputime_s": -1, "memory_B": -1, "walltime_s": -1, diff --git a/smtcomp/scoring.py b/smtcomp/scoring.py index 240e33fd..9c665663 100644 --- a/smtcomp/scoring.py +++ b/smtcomp/scoring.py @@ -14,6 +14,11 @@ sat_status = c_status == int(defs.Status.Sat) unsat_status = c_status == int(defs.Status.Unsat) +c_sound_status = pl.col("sound_status") +sat_sound_status = c_sound_status == int(defs.Status.Sat) +unsat_sound_status = c_sound_status == int(defs.Status.Unsat) + + c_walltime_s = pl.col("walltime_s") c_cputime_s = pl.col("cputime_s") twentyfour = c_walltime_s <= 24 @@ -49,15 +54,24 @@ def add_disagreements_info(results: pl.LazyFrame) -> pl.LazyFrame: disagreements = (pl.col("sound_solvers") & sat_answer).any().over("track", "file") & ( pl.col("sound_solvers") & unsat_answer ).any().over("track", "file") - return results.with_columns(disagreements=disagreements).drop("sound_solvers") + sound_status = ( + pl.when((pl.col("sound_solvers") & sat_answer).any().over("track", "file")) + .then(int(defs.Answer.Sat)) + .when((pl.col("sound_solvers") & unsat_answer).any().over("track", "file")) + .then(int(defs.Answer.Unsat)) + .otherwise(c_status) + ) + + return results.with_columns(disagreements=disagreements, sound_status=sound_status).drop("sound_solvers") def benchmark_scoring(results: pl.LazyFrame) -> pl.LazyFrame: """ + Requires disagreements Add "error_score", "correctly_solved_score", "wallclock_time_score","cpu_time_score" """ - error_score = pl.when((sat_status & unsat_answer) | (unsat_status & sat_answer)).then(-1).otherwise(0) + error_score = pl.when((sat_sound_status & unsat_answer) | (unsat_sound_status & sat_answer)).then(-1).otherwise(0) """ Use -1 instead of 1 for error so that we can use lexicographic comparison """ @@ -111,8 +125,8 @@ def filter_for(kind: Kind, config: defs.Config, results: pl.LazyFrame) -> pl.Laz case Kind.seq: return virtual_sequential_score(config, results) case Kind.sat: - return results.filter(sat_answer) + return results.filter(sat_sound_status) case Kind.unsat: - return results.filter(unsat_answer) + return results.filter(unsat_sound_status) case Kind.twentyfour: return results.filter(twentyfour) diff --git a/web/themes/smtcomp/layouts/_default/result.html b/web/themes/smtcomp/layouts/_default/result.html index 165fcb09..67e2c0a2 100644 --- a/web/themes/smtcomp/layouts/_default/result.html +++ b/web/themes/smtcomp/layouts/_default/result.html @@ -4,7 +4,7 @@

{{ .Title }}

{{ $dateMachine := .Date | time.Format "2006-01-02T15:04:05-07:00" }} {{ $dateHuman := .Date | time.Format ":date_long" }} - {{ $trueDivision := and (isset .Params "logics") (index .Params.logics 0 | len | ne 0) }} + {{ $trueDivision := and (isset .Params "logics") (.Params.logics | len | ne 0) }}

{{ .Params.division }} ({{ .Params.track }})

@@ -23,9 +23,9 @@

{{ .Params.division }} ({{ .Params.track }})

{{ if $trueDivision }} -Logics: