dodona-edu · bsels · May 8, 2021 · May 6, 2021 · May 6, 2021 · May 6, 2021
diff --git a/tested/configs.py b/tested/configs.py
@@ -72,6 +72,7 @@ class DodonaConfig(BaseModel):
     testplan: str = "plan.json"  # Name of the testplan file.
     options: Options = Options()
     output_limit: int = 10240 * 1024  # Default value for backwards compatibility.
+    timing_statistics: bool = False
 
     def config_for(self) -> Dict[str, Any]:
         return self.options.language.get(self.programming_language, dict())

diff --git a/tested/dsl/translate_parser.py b/tested/dsl/translate_parser.py
@@ -13,6 +13,7 @@
 from tested.datatypes import BasicBooleanTypes, BasicNumericTypes, \
     BasicObjectTypes, BasicSequenceTypes, BasicStringTypes
 from tested.dsl.statement import Parser
+from tested.internal_timings import new_stage, end_stage
 from tested.serialisation import ExceptionValue, Value, NothingType, StringType, \
     BooleanType, NumberType, SequenceType, ObjectType, ObjectKeyValuePair
 from tested.testplan import BaseOutput, Context, EmptyChannel, \
@@ -468,22 +469,31 @@ def _translate_testcase(self,
                 self._get_str_safe(testcase, "expression"))
         output = Output()
         self._translate_base_output(testcase, output, stack_frame)
+
         if "return" in testcase:
             hide_expected = self._get_bool_safe(stack_frame.options_return,
                                                 "hideExpected") or False
+
+            new_stage("parse.return", True)
             output.result = ValueOutputChannel(
                 value=self._translate_value(
                     self._get_any_safe(testcase, "return")),
                 show_expected=not hide_expected
             )
+            end_stage("parse.return", True)
         elif "return-raw" in testcase:
+            new_stage("parse.return-raw", True)
             output.result = self._translate_raw_return(
                 stack_frame,
                 self._get_str_dict_safe(testcase, "return-raw")
             )
+            end_stage("parse.return-raw", True)
 
         self._heuristic_check_multiline_value(output.result)
+
+        new_stage("parse.expression", True)
         testcase_value = Testcase(input=parser.parse_statement(code), output=output)
+        end_stage("parse.expression", True)
 
         if "files" in testcase:
             files = [
@@ -506,7 +516,7 @@ def _translate_raw_return(self, stack_frame: StackFrame,
             show_expected=not hide_expected
         )
 
-    def _translate_value(self, value: YAML_DICT) -> Value:
+    def _translate_value(self, value: YAML_OBJECT) -> Value:
         if value is None:
             return NothingType()
         elif isinstance(value, str):

diff --git a/tested/evaluators/exception.py b/tested/evaluators/exception.py
@@ -8,6 +8,7 @@
 
 from . import EvaluationResult, EvaluatorConfig
 from ..dodona import StatusMessage, Status, ExtendedMessage, Permission
+from ..internal_timings import new_stage, end_stage
 from ..internationalization import get_i18n_string
 from ..serialisation import ExceptionValue
 from ..testplan import ExceptionOutputChannel
@@ -90,6 +91,7 @@ def evaluate(config: EvaluatorConfig,
             messages=[staff_message, student_message]
         )
 
+    new_stage("evaluate.builtin.exception", True)
     message = config.bundle.lang_config.clean_stacktrace_to_message(
         actual.stacktrace)
     if message:
@@ -104,6 +106,7 @@ def evaluate(config: EvaluatorConfig,
     else:
         status = Status.CORRECT if expected.message == actual.message else \
             Status.WRONG
+    end_stage("evaluate.builtin.exception", True)
 
     return EvaluationResult(
         result=StatusMessage(enum=status),

diff --git a/tested/evaluators/programmed.py b/tested/evaluators/programmed.py
@@ -10,6 +10,7 @@
 from .value import get_values
 from ..datatypes import StringTypes
 from ..dodona import ExtendedMessage, StatusMessage, Status, Permission
+from ..internal_timings import new_stage, end_stage
 from ..internationalization import get_i18n_string
 from ..judge import evaluate_programmed
 from ..judge.utils import BaseExecutionResult
@@ -101,13 +102,14 @@ def evaluate(config: EvaluatorConfig,
     _logger.debug(f"Calling programmed evaluation with params:\n"
                   f"expected: {expected}\n"
                   f"actual: {actual}")
-
+    new_stage("evaluate.programmed", True)
     result = evaluate_programmed(
         config.bundle,
         evaluator=channel.evaluator,
         expected=expected,
         actual=actual
     )
+    end_stage("evaluate.programmed", True)
 
     if isinstance(result, BaseExecutionResult):
         if result.timeout:

diff --git a/tested/evaluators/text.py b/tested/evaluators/text.py
@@ -7,6 +7,7 @@
 
 from . import EvaluationResult, EvaluatorConfig
 from ..dodona import StatusMessage, Status
+from ..internal_timings import new_stage, end_stage
 from ..internationalization import get_i18n_string
 from ..testplan import TextOutputChannel, FileOutputChannel, OutputChannel
 
@@ -94,8 +95,12 @@ def evaluate_text(
     """
     assert isinstance(channel, TextOutputChannel)
     options = _text_options(config)
+
+    new_stage("evaluate.builtin.text", True)
     expected = channel.get_data_as_string(config.bundle.config.resources)
-    return compare_text(options, expected, actual)
+    result = compare_text(options, expected, actual)
+    end_stage("evaluate.builtin.text", True)
+    return result
 
 
 def evaluate_file(config: EvaluatorConfig,
@@ -159,13 +164,15 @@ def evaluate_file(config: EvaluatorConfig,
         return compare_text(options, expected, actual)
     else:
         assert options["mode"] == "line"
+        new_stage("evaluate.builtin.file.line", True)
         strip_newlines = options.get("stripNewlines", False)
         expected_lines = expected.splitlines(keepends=not strip_newlines)
         actual_lines = actual.splitlines(keepends=not strip_newlines)
         correct = len(actual_lines) == len(expected_lines)
         for (expected_line, actual_line) in zip(expected_lines, actual_lines):
             r = compare_text(options, expected_line, actual_line)
             correct = correct and r.result.enum == Status.CORRECT
+        end_stage("evaluate.builtin.file.line", True)
         return EvaluationResult(
             result=StatusMessage(enum=Status.CORRECT if correct else Status.WRONG),
             readable_expected=expected,

diff --git a/tested/evaluators/value.py b/tested/evaluators/value.py
@@ -9,6 +9,7 @@
 from ..datatypes import AdvancedTypes, BasicTypes, BasicStringTypes, SimpleTypes, \
     BasicSequenceTypes
 from ..dodona import ExtendedMessage, Permission, StatusMessage, Status
+from ..internal_timings import new_stage, end_stage
 from ..internationalization import get_i18n_string
 from ..languages.config import TypeSupport
 from ..languages.generator import convert_statement
@@ -216,6 +217,7 @@ def evaluate(config: EvaluatorConfig, channel: OutputChannel,
             readable_actual=readable_actual
         )
 
+    new_stage("evaluate.builtin.value", True)
     type_check, expected = _check_type(config.bundle, expected, actual)
     messages = []
     type_status = None
@@ -224,6 +226,7 @@ def evaluate(config: EvaluatorConfig, channel: OutputChannel,
     py_actual = to_python_comparable(actual)
 
     content_check = py_expected == py_actual
+    end_stage("evaluate.builtin.value", True)
 
     # Only add the message about the type if the content is the same.
     if content_check and not type_check:

diff --git a/tested/features.py b/tested/features.py
@@ -8,6 +8,7 @@
 from typing import Iterable, Set, NamedTuple, TYPE_CHECKING
 
 from .datatypes import AllTypes, BasicSequenceTypes, BasicObjectTypes, NestedTypes
+from tested.internal_timings import new_stage, end_stage
 
 if TYPE_CHECKING:
     from .configs import Bundle
@@ -96,7 +97,9 @@ def is_supported(bundle: 'Bundle') -> bool:
     """
     from .languages.config import TypeSupport
 
+    new_stage("analyse.features", sub_stage=True)
     required = bundle.plan.get_used_features()
+    end_stage("analyse.features", sub_stage=True)
 
     # Check constructs
     available_constructs = bundle.lang_config.supported_constructs()

diff --git a/tested/internal_timings.py b/tested/internal_timings.py
@@ -0,0 +1,151 @@
+from dataclasses import dataclass, field
+from threading import current_thread
+from time import monotonic
+from typing import NamedTuple, List, Dict, Optional, Tuple
+
+from tested.internationalization import get_i18n_string
+
+
+class StageEntry(NamedTuple):
+    thread_name: str
+    stage_name: str
+    timestamp: float
+    is_start: bool = True
+    sub_stage: bool = False
+
+
+@dataclass
+class SubStageTiming:
+    sub_stage_name: str
+    duration: float = -1
+
+
+@dataclass
+class StageTiming:
+    stage_name: str
+    duration: float = -1
+    sub_stages: List[SubStageTiming] = field(default_factory=list)
+
+
+collect_measures: bool = False
+timings: List[StageEntry] = []
+
+start_time = monotonic()
+
+
+def _get_thread_name():
+    return current_thread().getName()
+
+
+def collect_timings(collect: bool = False):
+    global collect_measures
+    collect_measures = collect
+
+
+def is_collecting():
+    return collect_measures
+
+
+def end_stage(stage: str, sub_stage: bool = False):
+    global collect_measures
+    global timings
+    if collect_measures:
+        timings.append(StageEntry(thread_name=_get_thread_name(), stage_name=stage,
+                                  is_start=False, timestamp=monotonic(),
+                                  sub_stage=sub_stage))
+
+
+def group_by_thread() -> Dict[str, List[StageEntry]]:
+    global timings
+    timings_by_thread = {}
+    for entry in timings:
+        try:
+            timings_by_thread[entry.thread_name].append(entry)
+        except KeyError:
+            timings_by_thread[entry.thread_name] = [entry]
+    return timings_by_thread
+
+
+def new_stage(stage: str, sub_stage: bool = False):
+    global collect_measures
+    global timings
+    if collect_measures:
+        timings.append(StageEntry(thread_name=_get_thread_name(), stage_name=stage,
+                                  is_start=True, timestamp=monotonic(),
+                                  sub_stage=sub_stage))
+
+
+def calculate_stages():
+    groups = group_by_thread()
+    stages: List[StageTiming] = []
+    # Calculate interval durations
+    for group in groups.values():
+        stage: Optional[StageTiming] = None
+        stage_start: float = 0.0
+        sub_stage_start: float = 0.0
+        for row in group:
+            if row.sub_stage:
+                if row.is_start:
+                    if stage.sub_stages and stage.sub_stages[-1].duration < 0:
+                        stage.sub_stages[
+                            -1].duration = row.timestamp - sub_stage_start
+                    sub_stage_start = row.timestamp
+                    stage.sub_stages.append(SubStageTiming(row.stage_name))
+                else:
+                    stage.sub_stages[-1].duration = row.timestamp - sub_stage_start
+            else:
+                if row.is_start:
+                    if stage is not None:
+                        if stage.sub_stages and stage.sub_stages[-1].duration < 0:
+                            stage.sub_stages[
+                                -1].duration = row.timestamp - sub_stage_start
+                        stage.duration = row.timestamp - stage_start
+                        stages.append(stage)
+                    stage_start = row.timestamp
+                    stage = StageTiming(row.stage_name)
+                else:
+                    stage.duration = row.timestamp - stage_start
+                    if stage.sub_stages and stage.sub_stages[-1].duration < 0:
+                        stage.sub_stages[
+                            -1].duration = row.timestamp - sub_stage_start
+                    stages.append(stage)
+                    stage = None
+    # Combine timings from the same stages
+    combined: Dict[str, Tuple[StageTiming, Dict[str, SubStageTiming]]] = {}
+    for stage in stages:
+        try:
+            combined[stage.stage_name][0].duration += stage.duration
+        except KeyError:
+            combined[stage.stage_name] = StageTiming(stage.stage_name,
+                                                     stage.duration), {}
+        finally:
+            sub_stages = combined[stage.stage_name][1]
+            for sub_stage in stage.sub_stages:
+                try:
+                    sub_stages[
+                        sub_stage.sub_stage_name].duration += sub_stage.duration
+                except KeyError:
+                    sub_stages[sub_stage.sub_stage_name] = SubStageTiming(
+                        sub_stage.sub_stage_name, sub_stage.duration)
+    # Include substages
+    for stage, sub_stages in combined.values():
+        stage.sub_stages = list(sub_stages.values())
+    return (stage for stage, _ in combined.values())
+
+
+def pretty_print_timings(is_parallel: bool = False):
+    global start_time
+    lines = []
+    if is_parallel:
+        lines.append(f'_{get_i18n_string("timings.parallel")}_')
+    for stage in calculate_stages():
+        stage_name = get_i18n_string(f"timings.{stage.stage_name}")
+        lines.append("{:s}: **{:0.03f} s**".format(stage_name, stage.duration))
+        for sub_stage in stage.sub_stages:
+            sub_stage_name = get_i18n_string(f"timings.{sub_stage.sub_stage_name}")
+            lines.append("- {:s}: **{:0.03f} s**".format(sub_stage_name,
+                                                         sub_stage.duration))
+    end_time = monotonic()
+    lines.extend(["", "{:s}: {:0.03f} s".format(get_i18n_string(f"timings.total"),
+                                                end_time - start_time)])
+    return '\n\n'.join(lines)
diff --git a/tested/internationalization/en.yaml b/tested/internationalization/en.yaml
@@ -136,3 +136,47 @@ en:
         timeout: "ShellCheck exceeded time limit"
         memory: "ShellCheck exceeded memory limit"
         output: "ShellCheck produced bad output."
+  timings:
+    parallel: |
+      In parallel execution, the cumulative execution times can be greater than the total execution time.
+    title: "Statistics timings"
+    testplan: "Reading testplan"
+    dsl: "Parse DSL testplan"
+    json: "Parse JSON testplan"
+    bundle: "Create bundle information"
+    analyse:
+      supported: "Check if testplan is supported by the programming language"
+      features: "Analyse features"
+    prepare:
+      output: "Preparing feedback table"
+      results: "Preparing results"
+    linter: "Running linter"
+    generation: "Generate testcode"
+    submission:
+      modify: "Modify submission"
+    generate:
+      templates: "Generate testcode from templates"
+    compilation:
+      pre: "Batch compilation"
+      individual: "Individual compilation"
+      batch:
+        done: "Adding feedback from batch compilation"
+    run:
+      execution: "Running a run"
+      testcode: "Running test executables"
+    dependencies:
+      copy: "Copy dependencies"
+    evaluate:
+      results: "Evaluating results"
+      builtin:
+        text: "Evaluating text results/full file with built-in evaluator"
+        file: "Evaluating file line-by-line with built-in evaluator"
+        value: "Evaluating return values with built-in evaluator"
+        exception: "Evaluating exceptions with built-in evaluator"
+      programmed: "Evaluating results with programmed evaluator"
+    total: "Total execution time"
+    parse:
+      expression: "Parse expressions and statements"
+      return: "Parse YAML return values"
+      return-raw: "Parse return-raw values"
+