jupyter · brichet · Sep 1, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/.gitignore b/.gitignore
@@ -92,6 +92,8 @@ nbgrader/docs/source/user_guide/release/ps1/problem2.html
 nbgrader/docs/source/user_guide/source/header.html
 nbgrader/docs/source/user_guide/source/ps1/problem1.html
 nbgrader/docs/source/user_guide/source/ps1/problem2.html
+nbgrader/docs/source/user_guide/source/ps1_autotest/problem1.html
+nbgrader/docs/source/user_guide/source/ps1_autotest/problem2.html
 
 # components stuff
 node_modules

diff --git a/nbgrader/apps/generateassignmentapp.py b/nbgrader/apps/generateassignmentapp.py
@@ -2,10 +2,11 @@
 
 import sys
 
-from traitlets import default
+from traitlets import default, Bool
+from textwrap import dedent
 
 from .baseapp import NbGrader, nbgrader_aliases, nbgrader_flags
-from ..converters import BaseConverter, GenerateAssignment, NbGraderException
+from ..converters import BaseConverter, GenerateAssignment, NbGraderException, GenerateSourceWithTests
 from traitlets.traitlets import MetaHasTraits
 from typing import List, Any
 from traitlets.config.loader import Config
@@ -51,6 +52,12 @@
  {'BaseConverter': {'force': True}},
  "Overwrite an assignment/submission if it already exists."
  ),
+ 'source_with_tests': (
+ {'GenerateAssignmentApp': {'source_with_tests': True}},
+ "Generate intermediate notebooks that contain both the autogenerated test code and the solutions. "
+ "Results will be saved in the source_with_tests/ folder. "
+ "This is useful for instructors to debug problematic autogenerated test code."
+ ),
 })
 
 
@@ -62,6 +69,17 @@ class GenerateAssignmentApp(NbGrader):
  aliases = aliases
  flags = flags
 
+ source_with_tests = Bool(
+ False,
+ help=dedent(
+ """
+ Generate intermediate notebooks that contain both the autogenerated test code and the solutions.
+ Results will be saved in the source_with_tests/ folder.
+ This is useful for instructors to debug issues in autogenerated test code.
+ """
+ )
+ ).tag(config=True)
+
  examples = """
  Produce the version of the assignment that is intended to be released to
  students. This performs several modifications to the original assignment:
@@ -112,7 +130,7 @@ class GenerateAssignmentApp(NbGrader):
  @default("classes")
  def _classes_default(self) -> List[MetaHasTraits]:
  classes = super(GenerateAssignmentApp, self)._classes_default()
- classes.extend([BaseConverter, GenerateAssignment])
+ classes.extend([BaseConverter, GenerateAssignment, GenerateSourceWithTests])
  return classes
 
  def _load_config(self, cfg: Config, **kwargs: Any) -> None:
@@ -141,6 +159,14 @@ def start(self) -> None:
  elif len(self.extra_args) == 1:
  self.coursedir.assignment_id = self.extra_args[0]
 
+
+ if self.source_with_tests:
+ converter = GenerateSourceWithTests(coursedir=self.coursedir, parent=self)
+ try:
+ converter.start()
+ except NbGraderException:
+ sys.exit(1)
+
  converter = GenerateAssignment(coursedir=self.coursedir, parent=self)
  try:
  converter.start()

diff --git a/nbgrader/apps/quickstartapp.py b/nbgrader/apps/quickstartapp.py
@@ -40,6 +40,15 @@
  """
  )
  ),
+ 'autotest': (
+ {'QuickStartApp': {'autotest': True}},
+ dedent(
+ """
+ Create notebook assignments that have examples of automatic test generation via
+ ### AUTOTEST and ### HASHED AUTOTEST statements.
+ """
+ )
+ ),
 }
 
 class QuickStartApp(NbGrader):
@@ -73,6 +82,8 @@ class QuickStartApp(NbGrader):
 
  force = Bool(False, help="Whether to overwrite existing files").tag(config=True)
 
+ autotest = Bool(False, help="Whether to use automatic test generation in example files").tag(config=True)
+
  @default("classes")
  def _classes_default(self):
  classes = super(QuickStartApp, self)._classes_default()
@@ -115,12 +126,20 @@ def start(self):
  if not os.path.isdir(course_path):
  os.mkdir(course_path)
 
- # populating it with an example
+ # populate it with an example
  self.log.info("Copying example from the user guide...")
  example = os.path.abspath(os.path.join(
  os.path.dirname(__file__), '..', 'docs', 'source', 'user_guide', 'source'))
- ignore_html = shutil.ignore_patterns("*.html")
- shutil.copytree(example, os.path.join(course_path, "source"), ignore=ignore_html)
+ if self.autotest:
+ tests_file_path = os.path.abspath(os.path.join(
+ os.path.dirname(__file__), '..', 'docs', 'source', 'user_guide', 'autotests.yml'))
+ shutil.copyfile(tests_file_path, os.path.join(course_path, 'autotests.yml'))
+ ignored_files = shutil.ignore_patterns("*.html", "ps1")
+ shutil.copytree(example, os.path.join(course_path, "source"), ignore=ignored_files)
+ os.rename(os.path.join(course_path, "source", "ps1_autotest"), os.path.join(course_path, "source", "ps1"))
+ else:
+ ignored_files = shutil.ignore_patterns("*.html", "autotests.yml", "ps1_autotest")
+ shutil.copytree(example, os.path.join(course_path, "source"), ignore=ignored_files)
 
  # create the config file
  self.log.info("Generating example config file...")

diff --git a/nbgrader/converters/__init__.py b/nbgrader/converters/__init__.py
@@ -5,6 +5,7 @@
 from .feedback import Feedback
 from .generate_feedback import GenerateFeedback
 from .generate_solution import GenerateSolution
+from .generate_source_with_tests import GenerateSourceWithTests
 
 __all__ = [
  "BaseConverter",
@@ -14,5 +15,6 @@
  "Autograde",
  "Feedback",
  "GenerateFeedback",
- "GenerateSolution"
+ "GenerateSolution",
+ "GenerateSourceWithTests"
 ]
diff --git a/nbgrader/converters/generate_assignment.py b/nbgrader/converters/generate_assignment.py
@@ -8,6 +8,7 @@
 from .base import BaseConverter, NbGraderException
 from ..preprocessors import (
  IncludeHeaderFooter,
+ InstantiateTests,
  ClearSolutions,
  LockCells,
  ComputeChecksums,
@@ -57,6 +58,7 @@ def _output_directory(self) -> str:
 
  preprocessors = List([
  IncludeHeaderFooter,
+ InstantiateTests,
  LockCells,
  ClearSolutions,
  ClearOutput,

diff --git a/nbgrader/converters/generate_source_with_tests.py b/nbgrader/converters/generate_source_with_tests.py
@@ -0,0 +1,49 @@
+import os
+import re
+
+from traitlets import List, default
+
+from .base import BaseConverter
+from ..preprocessors import (
+ InstantiateTests,
+ ClearOutput,
+ CheckCellMetadata
+)
+from traitlets.config.loader import Config
+from typing import Any
+from ..coursedir import CourseDirectory
+
+
+class GenerateSourceWithTests(BaseConverter):
+
+ @default("permissions")
+ def _permissions_default(self) -> int:
+ return 664 if self.coursedir.groupshared else 644
+
+ @property
+ def _input_directory(self) -> str:
+ return self.coursedir.source_directory
+
+ @property
+ def _output_directory(self) -> str:
+ return self.coursedir.source_with_tests_directory
+
+ preprocessors = List([
+ InstantiateTests,
+ ClearOutput,
+ CheckCellMetadata
+ ]).tag(config=True)
+
+ def _load_config(self, cfg: Config, **kwargs: Any) -> None:
+ super(GenerateSourceWithTests, self)._load_config(cfg, **kwargs)
+
+ def __init__(self, coursedir: CourseDirectory = None, **kwargs: Any) -> None:
+ super(GenerateSourceWithTests, self).__init__(coursedir=coursedir, **kwargs)
+
+ def start(self) -> None:
+ old_student_id = self.coursedir.student_id
+ self.coursedir.student_id = '.'
+ try:
+ super(GenerateSourceWithTests, self).start()
+ finally:
+ self.coursedir.student_id = old_student_id
diff --git a/nbgrader/coursedir.py b/nbgrader/coursedir.py
@@ -142,6 +142,18 @@ def _validate_notebook_id(self, proposal: Bunch) -> str:
  )
  ).tag(config=True)
 
+ source_with_tests_directory = Unicode(
+ 'source_with_tests',
+ help=dedent(
+ """
+ The name of the directory that contains notebooks with both solutions
+ and instantiated test code (i.e., all AUTOTEST directives are removed
+ and replaced by actual test code). This corresponds to the
+ `nbgrader_step` variable in the `directory_structure` config option.
+ """
+ )
+ ).tag(config=True)
+
  submitted_directory = Unicode(
  'submitted',
  help=dedent(

diff --git a/nbgrader/docs/source/user_guide/advanced.rst b/nbgrader/docs/source/user_guide/advanced.rst
@@ -194,3 +194,160 @@ containerization system. For details on using ``envkernel`` with
 singularity, see the `README
 <https://github.com/NordicHPC/envkernel/blob/master/README.md>`_ of
 ``envkernel``.
+
+.. _customizing-autotests:
+
+Automatic test code generation
+---------------------------------------
+
+.. versionadded:: 0.9.0
+
+.. seealso::
+
+ :ref:`autograder-tests-cell-automatic-test-code`
+ General introduction to automatic test code generation.
+
+
+nbgrader now supports generating test code automatically
+using ``### AUTOTEST`` and ``### HASHED AUTOTEST`` statements.
+In this section, you can find more detail on how this works and 
+how to customize the test generation process. 
+Suppose you ask students to create a ``foo`` function that adds 5 to
+an integer. In the source copy of the notebook, you might write something like
+
+.. code:: python
+
+ ### BEGIN SOLUTION
+ def foo(x):
+ return x + 5
+ ### END SOLUTION
+
+In a test cell, you would normally then write test code manually to probe various aspects of the solution.
+For example, you might check that the function increments 3 to 8 properly, and that the type
+of the output is an integer.
+
+.. code:: python
+
+ assert isinstance(foo(3), int), "incrementing an int by 5 should return an int"
+ assert foo(3) == 8, "3+5 should be 8"
+
+nbgrader now provides functionality to automate this process. Instead of writing tests explicitly,
+you can instead specify *what you want to test*, and let nbgrader decide *how to test it* automatically.
+
+.. code:: python
+
+ ### AUTOTEST foo(3)
+
+This directive indicates that you want to check ``foo(3)`` in the student's notebook, and make sure it 
+aligns with the value of ``foo(3)`` in the current source copy. You can write any valid expression (in the 
+language of your notebook) after the ``### AUTOTEST`` directive. For example, you could write
+
+.. code:: python
+
+ ### AUTOTEST (foo(3) - 5 == 3)
+
+to generate test code for the expression ``foo(3)-5==3`` (i.e., a boolean value), and make sure that evaluating
+the student's copy of this expression has a result that aligns with the source version (i.e., ``True``). You can write multiple
+``### AUTOTEST`` directives in one cell. You can also separate multiple expressions on one line with semicolons:
+
+.. code:: python
+
+ ### AUTOTEST foo(3); foo(4); foo(5) != 8
+
+These directives will insert code into student notebooks where the solution is available in plaintext. If you want to
+obfuscate the answers in the student copy, you should instead use a ``### HASHED AUTOTEST``, which will produce
+a student notebook where the answers are hashed and not viewable by students.
+
+When you generate an assignment containing ``### AUTOTEST`` (or ``### HASHED AUTOTEST``) statements, nbgrader looks for a file
+named ``autotests.yml`` that contains instructions on how to generate test code. It first looks 
+in the assignment directory itself (in case you want to specify special tests for just that assignment), and if it is 
+not found there, nbgrader searches in the course root directory.
+The ``autotests.yml`` file is a `YAML <https://yaml.org/>`__ file that looks something like this:
+
+.. code:: yaml
+
+ python3:
+ setup: "from hashlib import sha1"
+ hash: 'sha1({{snippet}}.encode("utf-8")+b"{{salt}}").hexdigest()'
+ dispatch: "type({{snippet}})"
+ normalize: "str({{snippet}})"
+ check: 'assert {{snippet}} == """{{value}}""", """{{message}}"""'
+ success: "print('Success!')"
+
+ templates:
+ default:
+ - test: "type({{snippet}})"
+ fail: "type of {{snippet}} is not correct"
+
+ - test: "{{snippet}}"
+ fail: "value of {{snippet}} is not correct"
+
+ int:
+ - test: "type({{snippet}})"
+ fail: "type of {{snippet}} is not int. Please make sure it is int and not np.int64, etc. You can cast your value into an int using int()"
+
+ - test: "{{snippet}}"
+ fail: "value of {{snippet}} is not correct"
+
+The outermost level in the YAML file (the example shows an entry for ``python3``) specifies which kernel the configuration applies to. ``autotests.yml`` can 
+have separate sections for multiple kernels / languages. The ``autotests.yml`` file uses `Jinja templates <https://jinja.palletsprojects.com/en/3.1.x/>`__ to 
+specify snippets of code that will be executed/inserted into Jupyter notebooks in the process of generating the assignment. You should familiarize yourself 
+with the basics of Jinja templates before proceeding. For each kernel, there are a few configuration settings possible:
+
+- **dispatch:** When you write ``### AUTOTEST foo(3)``, nbgrader needs to know how to test ``foo(3)``. It does so by executing ``foo(3)``, then checking its *type*,
+ and then running tests corresponding to that type in the ``autotests.yml`` file. Specifically, when generating an assignment, nbgrader substitutes the ``{{snippet}}`` template
+ variable with the expression ``foo(3)``, and then evaluates the dispatch code based on that. In this case, nbgrader runs ``type(foo(3))``, which will 
+ return ``int``, so nbgrader will know to test ``foo(3)`` using tests for integer variables.
+- **templates:** Once nbgrader determines the type of the expression ``foo(3)``, it will look for that type in the list of templates for the kernel. In this case,
+ it will find the ``int`` type in the list (it will use the **default** if the type is not found). Each type will have associated with it a 
+ list of **test**/**fail** template pairs, which tell nbgrader what tests to run 
+ and what messages to print in the event of a failure. Once again, ``{{snippet}}`` will be replaced by the ``foo(3)`` expression. In ``autotests.yml`` above, the 
+ ``int`` type has two tests: one that checks type of the expression, and one that checks its value. In this case, the student notebook will have 
+ two tests: one that checks the value of ``type(foo(3))``, and one that checks the value of ``foo(3)``.
+- **normalize:** For each test code expression (for example, ``type(foo(3))`` as mentioned previously), nbgrader will execute code using the corresponding 
+ Jupyter kernel, which will respond with a result in the form of a *string*. So nbgrader now knows that if it runs ``type(foo(3))`` at this 
+ point in the notebook, and converts the output to a string (i.e., *normalizes it*), it should obtain ``"int"``. However, nbgrader does not know how to convert output to a string; that
+ depends on the kernel! So the normalize code template tells nbgrader how to convert an expression to a string. In the ``autotests.yml`` example above, the 
+ normalize template suggests that nbgrader should try to compare ``str(type(foo(3)))`` to ``"int"``. 
+- **check:** This is the code template that will be inserted into the student notebook to run each test. The template has three variables. ``{{snippet}}`` is the normalized
+ test code. The ``{{value}}`` is the evaluated version of that test code, based on the source notebook. The ``{{message}}`` is
+ text that will be printed in the event of a test failure. In the example above, the check code template tells nbgrader to insert an ``assert`` statement to run the test.
+- **hash (optional):** This is a code template that is responsible for hashing (i.e., obfuscating) the answers in the student notebok. The template has two variables.
+ ``{{snippet}}`` represents the expression that will be hashed, and ``{{salt}}`` is used for nbgrader to insert a `salt <https://en.wikipedia.org/wiki/Salt_(cryptography)>`__ 
+ prior to hashing. The salt helps avoid students being able to identify hashes from common question types. For example, a true/false question has only two possible answers;
+ without a salt, students would be able to recognize the hashes of ``True`` and ``False`` in their notebooks. By adding a salt, nbgrader makes the hashed version of the answer 
+ different for each question, preventing identifying answers based on their hashes.
+- **setup (optional):** This is a code template that will be run at the beginning of all test cells containing ``### AUTOTEST`` or ``### HASHED AUTOTEST`` directives. It is often used to import
+ special packages that only the test code requires. In the example above, the setup code is used to import the ``sha1`` function from ``hashlib``, which is necessary
+ for hashed test generation.
+- **success (optional):** This is a code template that will be added to the end of all test cells containing ``### AUTOTEST`` or ``### HASHED AUTOTEST`` directives. In the 
+ generated student version of the notebook,
+ this code will run if all the tests pass. In the example ``autotests.yml`` file above, the success code is used to run ``print('Success!')``, i.e., simply print a message to
+ indicate that all tests in the cell passed.
+
+.. note::
+
+ For assignments with ``### AUTOTEST`` and ``### HASHED AUTOTEST`` directives, it is often handy
+ to have an editable copy of the assignment with solutions *and* test code inserted. You can
+ use ``nbgrader generate_assignment --source_with_tests`` to generate this version of an assignment,
+ which will appear in the ``source_with_tests/`` folder in the course repository.
+
+.. warning::
+
+ The default ``autotests.yml`` test templates file included with the repository has tests for many
+ common data types (``int``, ``dict``, ``list``, ``float``, etc). It also has a ``default`` test template
+ that it will try to apply to any types that do not have specified tests. If you want to automatically
+ generate your own tests for custom types, you will need to implement those test templates in ``autotests.yml``. That being said, custom
+ object types often have standard Python types as class attributes. Sometimes an easier option is to use nbgrader to test these
+ attributes automatically instead. For example, if ``obj`` is a complicated type with no specific test template available,
+ but ``obj`` has an ``int`` attribute ``x``, you could consider testing that attribute directly, e.g., ``### AUTOTEST obj.x``.
+
+.. warning::
+
+ The InstantiateTests preprocessor in nbgrader is responsible for generating test code from ``### AUTOTEST`` 
+ directives and the ``autotests.yml`` file. It has some configuration parameters not yet mentioned here.
+ The most important of these is the ``InstantiateTests.sanitizers`` dictionary, which tells nbgrader how to 
+ clean up the string output from each kind of Jupyter kernel before using it in the process of generating tests. We have 
+ implemented sanitizers for popular kernels in nbgrader already, but you might need to add your own.
+
+