diff --git a/tested/dodona.py b/tested/dodona.py index 8c6fa120..a55bbcc7 100644 --- a/tested/dodona.py +++ b/tested/dodona.py @@ -11,7 +11,7 @@ """ import json from enum import StrEnum, auto, unique -from typing import IO, Literal, Optional, Union +from typing import IO, Literal, Union from attrs import define from cattrs.preconf.json import make_converter @@ -30,10 +30,10 @@ class Permission(StrEnum): class ExtendedMessage: description: str format: str = "text" - permission: Optional[Permission] = None + permission: Permission | None = None -Message = Union[ExtendedMessage, str] +Message = ExtendedMessage | str BadgeCount = int @@ -66,7 +66,7 @@ class StatusMessage: """Describes the outcome of the judgement.""" enum: Status - human: Optional[str] = None + human: str | None = None @define @@ -84,16 +84,16 @@ class StartTab: """ title: str - hidden: Optional[bool] = None + hidden: bool | None = None command: Literal["start-tab"] = "start-tab" - permission: Optional[Permission] = None + permission: Permission | None = None @define class StartContext: """Start on a new context.""" - description: Optional[Message] = None + description: Message | None = None command: Literal["start-context"] = "start-context" @@ -110,8 +110,8 @@ class StartTest: """Start on a new test with a given channel answer.""" expected: str - channel: Optional[str] = None - description: Optional[Message] = None + channel: str | None = None + description: Message | None = None command: Literal["start-test"] = "start-test" @@ -137,11 +137,11 @@ class AnnotateCode: row: Index text: str - externalUrl: Optional[str] = None - column: Optional[Index] = None - type: Optional[Severity] = None - rows: Optional[Index] = None - columns: Optional[Index] = None + externalUrl: str | None = None + column: Index | None = None + type: Severity | None = None + rows: Index | None = None + columns: Index | None = None command: Literal["annotate-code"] = "annotate-code" @@ -154,7 +154,7 @@ class CloseTest: generated: str status: StatusMessage - accepted: Optional[bool] = None + accepted: bool | None = None command: Literal["close-test"] = "close-test" @@ -165,7 +165,7 @@ class CloseTestcase: overwrite this. """ - accepted: Optional[bool] = None + accepted: bool | None = None command: Literal["close-testcase"] = "close-testcase" @@ -176,7 +176,7 @@ class CloseContext: overwrite this. """ - accepted: Optional[bool] = None + accepted: bool | None = None command: Literal["close-context"] = "close-context" @@ -184,7 +184,7 @@ class CloseContext: class CloseTab: """Close the current tab.""" - badge_count: Optional[BadgeCount] = None + badge_count: BadgeCount | None = None command: Literal["close-tab"] = "close-tab" @@ -196,8 +196,8 @@ class CloseJudgement: tests, but you can overwrite this. """ - accepted: Optional[bool] = None - status: Optional[StatusMessage] = None + accepted: bool | None = None + status: StatusMessage | None = None command: Literal["close-judgement"] = "close-judgement" diff --git a/tested/judge/collector.py b/tested/judge/collector.py index bdaccab4..dd03e402 100644 --- a/tested/judge/collector.py +++ b/tested/judge/collector.py @@ -36,19 +36,19 @@ class OutputManager: __slots__ = [ "finalized", "open_stack", - "closed", + "currently_open", "out", ] finalized: bool open_stack: list[str] - closed: Tuple[int, int, int] + currently_open: Tuple[int, int, int] out: IO def __init__(self, out: IO): self.finalized = False self.open_stack = [] - self.closed = (0, 0, 0) + self.currently_open = (0, 0, 0) self.out = out def add_all(self, commands: Iterable[Update]): @@ -73,18 +73,18 @@ def add(self, command: Update, index: Optional[int] = None): self.open_stack.append(type_) elif action == "close": previous = self.open_stack.pop() - assert previous == type_, "Closing a different update type" + assert previous == type_, f"Closing {type_}, but expected {previous}" # If the output should be counted or not. if index is not None: if isinstance(command, CloseTab): - self.closed = (index + 1, 0, 0) + self.currently_open = (index + 1, 0, 0) elif isinstance(command, CloseContext): - tabs, _, _ = self.closed - self.closed = (tabs, index + 1, 0) + tabs, _, _ = self.currently_open + self.currently_open = (tabs, index + 1, 0) elif isinstance(command, CloseTestcase): - tabs, contexts, _ = self.closed - self.closed = (tabs, contexts, index + 1) + tabs, contexts, _ = self.currently_open + self.currently_open = (tabs, contexts, index + 1) _logger.debug(f"After adding, stack is {self.open_stack}") report_update(self.out, command) diff --git a/tested/judge/compilation.py b/tested/judge/compilation.py index 9cc44816..aeceed10 100644 --- a/tested/judge/compilation.py +++ b/tested/judge/compilation.py @@ -75,7 +75,7 @@ def process_compile_results( # There was no compilation if results is None: - return [], Status.CORRECT, [] + return CompilationResult(status=Status.CORRECT) show_stdout = False _logger.debug("Received stderr from compiler: " + results.stderr) @@ -106,14 +106,16 @@ def process_compile_results( # Report errors if needed. if results.timeout: - return messages, Status.TIME_LIMIT_EXCEEDED, annotations + status = Status.TIME_LIMIT_EXCEEDED if results.memory: - return messages, Status.MEMORY_LIMIT_EXCEEDED, annotations + status = Status.MEMORY_LIMIT_EXCEEDED if results.exit != 0: if not shown_messages: messages.append( get_i18n_string("judge.compilation.exitcode", exitcode=results.exit) ) - return messages, Status.COMPILATION_ERROR, annotations + status = Status.COMPILATION_ERROR else: - return messages, Status.CORRECT, annotations + status = Status.CORRECT + + return CompilationResult(messages=messages, status=status, annotations=annotations) diff --git a/tested/judge/core.py b/tested/judge/core.py index 35337088..11d72cd8 100644 --- a/tested/judge/core.py +++ b/tested/judge/core.py @@ -8,7 +8,6 @@ CloseContext, CloseJudgement, CloseTab, - Message, StartContext, StartJudgement, StartTab, @@ -23,6 +22,7 @@ from tested.judge.evaluation import evaluate_context_results, terminate from tested.judge.execution import ( ExecutionResult, + PlanStrategy, compile_unit, execute_unit, filter_files, @@ -43,6 +43,29 @@ _logger = logging.getLogger(__name__) +def _is_fatal_compilation_error(compilation_results: CompilationResult) -> bool: + return compilation_results.status in ( + Status.TIME_LIMIT_EXCEEDED, + Status.MEMORY_LIMIT_EXCEEDED, + ) + + +def _handle_time_or_memory_compilation( + bundle: Bundle, collector: OutputManager, results: CompilationResult +): + assert _is_fatal_compilation_error(results) + collector.add_messages(results.messages) + collector.add_all(results.annotations) + terminate( + bundle, + collector, + StatusMessage( + enum=results.status, + human=get_i18n_string("judge.core.invalid.source-code"), + ), + ) + + def judge(bundle: Bundle): """ Evaluate a solution for an exercise. Execute the tests present in the @@ -81,19 +104,9 @@ def judge(bundle: Bundle): terminate(bundle, collector, Status.TIME_LIMIT_EXCEEDED) return - _logger.debug("Planning execution") - planned_units = plan_test_suite(bundle) - _judge_planned_units(bundle, collector, planned_units, start, max_time) + planned_units = plan_test_suite(bundle, strategy=PlanStrategy.OPTIMAL) - -def _judge_planned_units( - bundle: Bundle, - collector: OutputManager, - planned_units: list[PlannedExecutionUnit], - start: float, - max_time: float, -): - _logger.debug("Generating files") + # Attempt to precompile everything. common_dir, dependencies, selector = _generate_files(bundle, planned_units) # Create an execution plan. @@ -106,82 +119,55 @@ def _judge_planned_units( start_time=start, ) - messages, status, annotations = precompile(bundle, plan) + _logger.debug("Attempting precompilation") + compilation_results = precompile(bundle, plan) # If something went horribly wrong, and the compilation itself caused a timeout or memory issue, bail now. - if status in (Status.TIME_LIMIT_EXCEEDED, Status.MEMORY_LIMIT_EXCEEDED): - _logger.info(f"Compilation resulted in {status}. Bailing now.") - collector.add_messages(messages) - collector.add_all(annotations) - terminate(bundle, collector, status) + if _is_fatal_compilation_error(compilation_results): + _handle_time_or_memory_compilation(bundle, collector, compilation_results) return - # If an individual execution unit should be compiled or not. - should_unit_compile = False + # If the compilation failed, but we can fall back, do that. + if ( + compilation_results.status != Status.CORRECT + and bundle.config.options.allow_fallback + ): + _logger.warning("Precompilation failed. Falling back to unit compilation.") + planned_units = plan_test_suite(bundle, strategy=PlanStrategy.TAB) + plan.units = planned_units + compilation_results = None - # If the compilation failed, but we are allowed to use a fallback, do that. - if status != Status.CORRECT and bundle.config.options.allow_fallback: - _logger.info( - "Compilation error, falling back to compiling each unit individually." - ) - should_unit_compile = True - # Remove the selector file from the dependencies. - # Otherwise, it will keep being compiled, which we want to avoid. - if bundle.language.needs_selector(): - # The last element in the list is the "selector". - plan.files.pop() - # When compilation succeeded, only add annotations - elif status == Status.CORRECT: - collector.add_messages(messages) - collector.add_all(annotations) - else: - collector.add_messages(messages) - collector.add_all(annotations) - terminate( - bundle, - collector, - StatusMessage( - enum=status, - human=get_i18n_string("judge.core.invalid.source-code"), - ), - ) - _logger.info("Compilation error without fallback") - return # Compilation error occurred, useless to continue. + _judge_planned_units(bundle, collector, plan, compilation_results) + +def _judge_planned_units( + bundle: Bundle, + collector: OutputManager, + plan: ExecutionPlan, + compilation_results: CompilationResult | None, +): _logger.info("Starting execution.") + currently_open_tab = -1 # Create a list of runs we want to execute. for i, planned_unit in enumerate(plan.units): # Prepare the unit. execution_dir, dependencies = set_up_unit(bundle, plan, i) - should_attempt_execution = True # If compilation is necessary, do it. - if should_unit_compile: - (messages, status, annotations), dependencies = compile_unit( + if compilation_results is None: + local_compilation_results, dependencies = compile_unit( bundle, plan, i, execution_dir, dependencies ) - if status == Status.TIME_LIMIT_EXCEEDED: - # There is no more, so stop now. - collector.add_messages(messages) - collector.add_all(annotations) - terminate( - bundle, - collector, - StatusMessage( - enum=status, - human=get_i18n_string("judge.core.invalid.source-code"), - ), + if _is_fatal_compilation_error(local_compilation_results): + _handle_time_or_memory_compilation( + bundle, collector, local_compilation_results ) return - elif status != Status.CORRECT: - # TODO: go back and start again with tabs? - should_attempt_execution = False - else: - collector.add_messages(messages) - collector.add_all(annotations) + else: + local_compilation_results = compilation_results # Execute the unit. - if should_attempt_execution: + if local_compilation_results.status == Status.CORRECT: remaining_time = plan.remaining_time() execution_result, status = execute_unit( bundle, planned_unit, execution_dir, dependencies, remaining_time @@ -189,14 +175,14 @@ def _judge_planned_units( else: execution_result = None - result_status = _process_results( + result_status, currently_open_tab = _process_results( bundle=bundle, unit=planned_unit, execution_result=execution_result, - compiler_messages=messages, - status=status, execution_dir=execution_dir, + compilation_results=local_compilation_results, collector=collector, + currently_open_tab=currently_open_tab, ) if result_status in ( @@ -207,8 +193,8 @@ def _judge_planned_units( terminate(bundle, collector, result_status) return - # Depending on the result, we might want to do the next execution anyway. - + # Close the last tab. + collector.add(CloseTab(), currently_open_tab) collector.add(CloseJudgement()) @@ -235,7 +221,7 @@ def precompile(bundle: Bundle, plan: ExecutionPlan) -> CompilationResult: # Update the files if the compilation succeeded. processed_results = process_compile_results(bundle.language, result) - if processed_results[1] == Status.CORRECT: + if processed_results.status == Status.CORRECT: plan.files = compilation_files return processed_results @@ -300,26 +286,24 @@ def _process_results( bundle: Bundle, collector: OutputManager, unit: PlannedExecutionUnit, + compilation_results: CompilationResult, execution_result: ExecutionResult | None, - compiler_messages: list[Message], - status: Status, execution_dir: Path, -) -> Status | None: + currently_open_tab: int, +) -> tuple[Status | None, int]: if execution_result: context_results = execution_result.to_context_results() else: context_results = [None] * len(unit.contexts) - current_tab_index = -1 - for planned, context_result in zip(unit.contexts, context_results): planned: PlannedContext - if current_tab_index < planned.tab_index: + if currently_open_tab < planned.tab_index: # Close the previous tab if necessary. - if current_tab_index >= 0: - collector.add(CloseTab(), current_tab_index) - current_tab_index = current_tab_index + 1 - tab = bundle.suite.tabs[current_tab_index] + if collector.open_stack[-1] == "tab": + collector.add(CloseTab(), currently_open_tab) + currently_open_tab = currently_open_tab + 1 + tab = bundle.suite.tabs[currently_open_tab] collector.add(StartTab(title=tab.name, hidden=tab.hidden)) # Handle the contexts. @@ -329,21 +313,16 @@ def _process_results( bundle, context=planned.context, exec_results=context_result, - compiler_results=(compiler_messages, status), context_dir=execution_dir, collector=collector, + compilation_results=compilation_results, ) - # We handled the compiler messages above, so remove them. - compiler_messages = [] - collector.add(CloseContext(), planned.context_index) if continue_ in (Status.TIME_LIMIT_EXCEEDED, Status.MEMORY_LIMIT_EXCEEDED): - return continue_ + return continue_, currently_open_tab - # Finish the final tab. - collector.add(CloseTab(), current_tab_index) - return None + return None, currently_open_tab def _copy_workdir_source_files(bundle: Bundle, common_dir: Path) -> list[str]: diff --git a/tested/judge/evaluation.py b/tested/judge/evaluation.py index 42171e3d..6a2ba863 100644 --- a/tested/judge/evaluation.py +++ b/tested/judge/evaluation.py @@ -3,7 +3,7 @@ from collections.abc import Collection from enum import StrEnum, unique from pathlib import Path -from typing import List, Literal, Optional, Tuple, Union +from typing import List, Literal, Optional, Union from tested.configs import Bundle from tested.dodona import ( @@ -15,7 +15,6 @@ CloseTestcase, EscalateStatus, ExtendedMessage, - Message, StartContext, StartTab, StartTest, @@ -27,6 +26,7 @@ from tested.internationalization import get_i18n_string from tested.judge.collector import OutputManager, TestcaseCollector from tested.judge.execution import ContextResult +from tested.judge.planning import CompilationResult from tested.languages.generation import ( attempt_readable_input, generate_statement, @@ -147,11 +147,11 @@ def _evaluate_channel( def evaluate_context_results( bundle: Bundle, context: Context, - exec_results: Optional[ContextResult], - compiler_results: Tuple[List[Message], Status], + exec_results: ContextResult | None, + compilation_results: CompilationResult, context_dir: Path, collector: OutputManager, -) -> Optional[Status]: +) -> Status | None: """ Evaluate the results for a single context. @@ -161,7 +161,7 @@ def evaluate_context_results( :param bundle: The configuration bundle. :param context: The context to evaluate. :param exec_results: The results of evaluating the context. - :param compiler_results: The compiler results. + :param compilation_results: The compiler results. :param context_dir: The directory where the execution happened. :param collector: Where to put the output :return: A status if of interest to the caller. @@ -169,18 +169,20 @@ def evaluate_context_results( # If the compiler results are not successful, there is no point in doing more, # so stop early. - if compiler_results[1] != Status.CORRECT: + if compilation_results.status != Status.CORRECT: readable_input = attempt_readable_input(bundle, context) collector.add(StartTestcase(description=readable_input)) # Report all compiler messages. - for message in compiler_results[0]: - collector.add(AppendMessage(message=message)) - # Escalate the compiler status to every testcase. - collector.add(EscalateStatus(status=StatusMessage(enum=compiler_results[1]))) + if not compilation_results.reported: + collector.add_messages(compilation_results.messages) + collector.add_all(compilation_results.annotations) + collector.add( + EscalateStatus(status=StatusMessage(enum=compilation_results.status)) + ) - # Finish evaluation, since there is nothing we can do. + # Finish the evaluation, since there is nothing we can do. collector.add(CloseTestcase(accepted=False), 0) - return compiler_results[1] + return compilation_results.status # There must be execution if compilation succeeded. assert exec_results is not None @@ -460,7 +462,7 @@ def complete_evaluation(bundle: Bundle, collector: OutputManager): "judgement" in collector.open_stack ), "A non-finalized output manager without open judgement is not possible." - tab_start, context_start, testcase_start = collector.closed + tab_start, context_start, testcase_start = collector.currently_open for tab in bundle.suite.tabs[tab_start:]: if context_start == 0 and testcase_start == 0: @@ -517,7 +519,7 @@ def terminate( status_if_unclosed: Union[Status, StatusMessage], ): # Determine the level we need to close. - tab, context, testcase = collector.closed + tab, context, testcase = collector.currently_open max_tab = len(bundle.suite.tabs) until: Literal["testcase", "context", "tab", "judgement"] diff --git a/tested/judge/execution.py b/tested/judge/execution.py index 0844132c..908d7433 100644 --- a/tested/judge/execution.py +++ b/tested/judge/execution.py @@ -1,6 +1,8 @@ +import enum import itertools import logging import shutil +from enum import Enum from pathlib import Path from typing import List, Optional, Tuple, Union, cast @@ -314,22 +316,15 @@ def execute_unit( return result, status -def plan_test_suite(bundle: Bundle) -> list[PlannedExecutionUnit]: - """ - Transform a test suite into a list of execution units. +class PlanStrategy(Enum): + OPTIMAL = enum.auto() + TAB = enum.auto() + CONTEXT = enum.auto() - :param bundle: The configuration - :return: A list of planned execution units. - """ - - # First, flatten all contexts into a single list. - flattened_contexts = [] - for t, tab in enumerate(bundle.suite.tabs): - for c, context in enumerate(tab.contexts): - flattened_contexts.append( - PlannedContext(context=context, tab_index=t, context_index=c) - ) +def _flattened_contexts_to_units( + bundle: Bundle, flattened_contexts: list[PlannedContext] +) -> list[PlannedExecutionUnit]: units = [] current_unit = [] @@ -372,3 +367,49 @@ def plan_test_suite(bundle: Bundle) -> list[PlannedExecutionUnit]: ) return units + + +def plan_test_suite( + bundle: Bundle, strategy: PlanStrategy +) -> list[PlannedExecutionUnit]: + """ + Transform a test suite into a list of execution units. + + :param strategy: Which strategy to follow when planning the units. + :param bundle: The configuration + :return: A list of planned execution units. + """ + + # First, flatten all contexts into a single list. + if strategy == PlanStrategy.OPTIMAL: + flattened_contexts = [] + for t, tab in enumerate(bundle.suite.tabs): + for c, context in enumerate(tab.contexts): + flattened_contexts.append( + PlannedContext(context=context, tab_index=t, context_index=c) + ) + flattened_contexts_list = [flattened_contexts] + elif strategy == PlanStrategy.TAB: + flattened_contexts_list = [] + for t, tab in enumerate(bundle.suite.tabs): + flattened_contexts = [] + for c, context in enumerate(tab.contexts): + flattened_contexts.append( + PlannedContext(context=context, tab_index=t, context_index=c) + ) + flattened_contexts_list.append(flattened_contexts) + else: + assert strategy == PlanStrategy.CONTEXT + flattened_contexts_list = [] + for t, tab in enumerate(bundle.suite.tabs): + for c, context in enumerate(tab.contexts): + flattened_contexts = [ + PlannedContext(context=context, tab_index=t, context_index=c) + ] + flattened_contexts_list.append(flattened_contexts) + + nested_units = [] + for flattened_contexts in flattened_contexts_list: + nested_units.extend(_flattened_contexts_to_units(bundle, flattened_contexts)) + + return nested_units diff --git a/tested/judge/planning.py b/tested/judge/planning.py index e89071c8..5c39dba1 100644 --- a/tested/judge/planning.py +++ b/tested/judge/planning.py @@ -5,13 +5,19 @@ from pathlib import Path from typing import Optional -from attrs import define +from attrs import define, field from tested.dodona import AnnotateCode, Message, Status from tested.languages.config import FileFilter from tested.testsuite import Context -CompilationResult = tuple[list[Message], Status, list[AnnotateCode]] + +@define +class CompilationResult: + status: Status + messages: list[Message] = field(factory=list) + annotations: list[AnnotateCode] = field(factory=list) + reported: bool = False @define diff --git a/tested/manual.py b/tested/manual.py index d7a84953..735eda4e 100644 --- a/tested/manual.py +++ b/tested/manual.py @@ -13,7 +13,7 @@ from tested.main import run from tested.testsuite import ExecutionMode, SupportedLanguage -exercise_dir = "/home/niko/Ontwikkeling/CG-Dodona/reeks09/rebussen oplossen/" +exercise_dir = "/home/niko/Ontwikkeling/universal-judge/tests/exercises/global" def read_config() -> DodonaConfig: @@ -21,13 +21,13 @@ def read_config() -> DodonaConfig: return DodonaConfig( memory_limit=536870912, time_limit=60, - programming_language=SupportedLanguage("bash"), + programming_language=SupportedLanguage("python"), natural_language="nl", resources=Path(exercise_dir, "evaluation"), - source=Path(exercise_dir, "solution/solution.sh"), + source=Path(exercise_dir, "solution/correct.py"), judge=Path("."), workdir=Path("workdir"), - test_suite="tests.yaml", + test_suite="one.tson", options=Options( linter=False, ), diff --git a/tests/test_functionality.py b/tests/test_functionality.py index b99c9d02..ba16466c 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -564,10 +564,11 @@ def test_batch_compilation(language: str, tmp_path: Path, pytestconfig, mocker): def test_batch_compilation_fallback( language: str, tmp_path: Path, pytestconfig, mocker ): + config_ = {"options": {"allow_fallback": True}} lang_class = LANGUAGES[language] spy = mocker.spy(lang_class, "compilation") conf = configuration( - pytestconfig, "echo", language, tmp_path, "two.tson", "comp-error" + pytestconfig, "echo", language, tmp_path, "two.tson", "comp-error", config_ ) result = execute_config(conf) updates = assert_valid_output(result, pytestconfig) @@ -589,7 +590,7 @@ def test_batch_compilation_no_fallback( result = execute_config(conf) updates = assert_valid_output(result, pytestconfig) assert len(updates.find_all("start-tab")) == 1 - assert updates.find_status_enum() == ["compilation error", "wrong", "wrong"] + assert updates.find_status_enum() == ["compilation error"] * 2 assert spy.call_count == 1