Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Judge: Implement opendata-v2 #269

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions fixtures/sum_kasiopea/judge_v2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#include <assert.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>

using namespace std;

void verdict(bool correct, string msg) {
cout << msg << endl;
exit(correct ? 42 : 43);
}

int main(int argc, char **argv) {
FILE *fin = fopen(getenv("TEST_INPUT"), "r");
FILE *fcorrect = fopen(getenv("TEST_OUTPUT"), "r");

assert(fin && fcorrect);

int t;
fscanf(fin, "%d", &t);

for (int i = 0; i < t; i++) {
long long a, b, c, contestant;

fscanf(fin, "%lld%lld", &a, &b);
fscanf(fcorrect, "%lld", &c);

scanf("%lld", &contestant);

assert(a + b == c);

if (c != contestant) {
verdict(false, "No, that wasn't the correct answer.");
}
}

verdict(true, "Yes, that was the correct answer");
}
47 changes: 47 additions & 0 deletions fixtures/sum_kasiopea/judge_v2_partial.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include <array>
#include <assert.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>

using namespace std;

void verdict(float points, string msg) {
cout << msg << endl;
cout << "POINTS=" << points << endl;
exit(points > 0 ? 42 : 43);
}

std::array<double, 3> max_points = {1, 4, 6};

int main(int argc, char **argv) {
assert(argc == 3);

int subtask;
assert(sscanf(argv[1], "%d", &subtask) == 1);

FILE *fin = fopen(getenv("TEST_INPUT"), "r");
FILE *fcorrect = fopen(getenv("TEST_OUTPUT"), "r");

assert(fin && fcorrect);

int t;
fscanf(fin, "%d", &t);

for (int i = 0; i < t; i++) {
long long a, b, c, contestant;

fscanf(fin, "%lld%lld", &a, &b);
fscanf(fcorrect, "%lld", &c);

scanf("%lld", &contestant);

assert(a + b == c);

if (c != contestant) {
verdict(0.0, "No, that wasn't the correct answer.");
}
}

verdict(max_points.at(subtask), "Yes, that was the correct answer");
}
1 change: 1 addition & 0 deletions pisek/config/config_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class JudgeType(StrEnum):
cms_batch = "cms-batch"
cms_communication = "cms-communication"
opendata_v1 = "opendata-v1"
opendata_v2 = "opendata-v2"


class ShuffleMode(StrEnum):
Expand Down
7 changes: 6 additions & 1 deletion pisek/config/task_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,12 @@ def validate_model(self):
)

JUDGE_TYPES = {
TaskType.batch: [None, JudgeType.opendata_v1, JudgeType.cms_batch],
TaskType.batch: [
None,
JudgeType.opendata_v1,
JudgeType.opendata_v2,
JudgeType.cms_batch,
],
TaskType.communication: [JudgeType.cms_communication],
}

Expand Down
133 changes: 112 additions & 21 deletions pisek/task_jobs/judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from abc import abstractmethod
from decimal import Decimal
from decimal import Decimal, InvalidOperation
from functools import cache
import os
import random
Expand Down Expand Up @@ -119,7 +119,7 @@ def _compute_result(self) -> dict[str, Any]:
result["judge_outs"] = set()
for job in self.jobs:
if isinstance(job, RunJudge):
if isinstance(job, RunCMSJudge):
if isinstance(job, RunCMSJudge) or isinstance(job, RunOpendataV2Judge):
result["judge_outs"].add(job.points_file)
result["judge_outs"].add(job.judge_log_file)

Expand Down Expand Up @@ -545,14 +545,8 @@ def _get_flags(self) -> list[str]:
class RunOpendataJudge(RunBatchJudge):
"""Judges solution output using judge with the opendata interface. (Abstract class)"""

@property
@abstractmethod
def return_code_ok(self) -> int:
pass

@property
@abstractmethod
def return_code_wa(self) -> int:
def get_result(self, result: RunResult) -> SolutionResult:
pass

def __init__(
Expand All @@ -579,6 +573,7 @@ def __init__(
)
self.judge = judge
self.seed = seed
self.points_file = TaskPath.points_file(self._env, self.judge_log_file.name)

def _judge(self) -> SolutionResult:
envs = {}
Expand All @@ -597,14 +592,23 @@ def _judge(self) -> SolutionResult:
f"{self.seed:x}" if self.seed is not None else OPENDATA_NO_SEED,
],
stdin=self.output,
stdout=self.points_file,
stderr=self.judge_log_file,
env=envs,
)
if result.returncode == self.return_code_ok:

return self.get_result(result)


class RunOpendataV1Judge(RunOpendataJudge):
"""Judges solution output using judge with the opendataV1 interface."""

def get_result(self, result: RunResult) -> SolutionResult:
if result.returncode == 0:
return RelativeSolutionResult(
Verdict.ok, None, self._solution_run_res, result, Decimal(1)
)
elif result.returncode == self.return_code_wa:
elif result.returncode == 1:
return RelativeSolutionResult(
Verdict.wrong_answer, None, self._solution_run_res, result, Decimal(0)
)
Expand All @@ -614,16 +618,85 @@ def _judge(self) -> SolutionResult:
)


class RunOpendataV1Judge(RunOpendataJudge):
"""Judges solution output using judge with the opendataV1 interface."""
class RunOpendataV2Judge(RunOpendataJudge):
"""Judges solution output using judge with the opendataV2 interface."""

@property
def return_code_ok(self) -> int:
return 0
def get_result(self, result: RunResult) -> SolutionResult:
if result.returncode not in (42, 43):
raise self._create_program_failure(
f"Judge failed on output {self.output:n}:", result
)

@property
def return_code_wa(self) -> int:
return 1
assert isinstance(result.stdout_file, TaskPath)
with open(result.stdout_file.path) as file:
message = file.readline().removesuffix("\n") or None

if message is not None and len(message.encode()) > 255:
raise self._create_program_failure(
f"The judge message for output {self.output:n} was too long", result
)

points: Decimal | None = None

for line in file:
line = line.removesuffix("\n")

if "=" not in line:
raise self._create_program_failure(
f'The judge wrote a line that wasn\'t a key-value pair: "{line}"',
result,
)

key, value = line.split("=", maxsplit=1)

if key == "POINTS":
try:
points = Decimal(value)
except InvalidOperation:
raise self._create_program_failure(
"The value of the POINTS key was not a valid decimal",
result,
)

if points is None:
if result.returncode == 42:
verdict = Verdict.ok
points = Decimal(1)
else:
verdict = Verdict.wrong_answer
points = Decimal(0)

return RelativeSolutionResult(
verdict,
message,
self._solution_run_res,
result,
points,
)
else:
max_points = self._env.config.subtasks[self.subtask].points
are_points_fake = max_points == 0

if are_points_fake:
max_points = 1

if points <= 0:
verdict = Verdict.wrong_answer
elif points < max_points:
verdict = Verdict.partial_ok
else:
verdict = Verdict.ok

if are_points_fake:
points = Decimal(0)

return AbsoluteSolutionResult(
verdict,
message,
self._solution_run_res,
result,
points,
)


class RunCMSBatchJudge(RunCMSJudge, RunBatchJudge):
Expand Down Expand Up @@ -695,7 +768,12 @@ def judge_job(
expected_verdict: Optional[Verdict],
env: Env,
) -> Union[
RunDiffJudge, RunTokenJudge, RunShuffleJudge, RunOpendataV1Judge, RunCMSBatchJudge
RunDiffJudge,
RunTokenJudge,
RunShuffleJudge,
RunOpendataV1Judge,
RunOpendataV2Judge,
RunCMSBatchJudge,
]:
"""Returns JudgeJob according to contest type."""
if env.config.out_check == OutCheck.diff:
Expand Down Expand Up @@ -725,7 +803,7 @@ def judge_job(
correct_output,
expected_verdict,
)
else:
elif env.config.judge_type == JudgeType.opendata_v1:
return RunOpendataV1Judge(
env,
env.config.out_judge,
Expand All @@ -736,3 +814,16 @@ def judge_job(
seed,
expected_verdict,
)
elif env.config.judge_type == JudgeType.opendata_v2:
return RunOpendataV2Judge(
env,
env.config.out_judge,
subtask,
input_,
output,
correct_output,
seed,
expected_verdict,
)
else:
raise RuntimeError("the specified judge type was not found.")
2 changes: 1 addition & 1 deletion pisek/task_jobs/solution/solution_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def abs_sol_result_representer(dumper, sol_result: AbsoluteSolutionResult):


def abs_sol_result_constructor(loader, value) -> AbsoluteSolutionResult:
verdict, message, points, sol_rr, judge_rr = loader.construct_sequence(value)
verdict, message, sol_rr, judge_rr, points = loader.construct_sequence(value)
return AbsoluteSolutionResult(
Verdict[verdict], message, sol_rr, judge_rr, Decimal(points)
)
Expand Down