[Doctests] Refactor doctests + add CI (huggingface#22987)

* intiial commit * new styling * update * just run doctest in CI * remove more test for fast dev * update * update refs * update path and fetch upstream * update documentatyion trests * typo * parse pwd * don't check for files that are in hidden folders * just give paths relative to transformers * update * update * update * major refactoring * make sure options is ok * lest test that mdx is tested * doctest glob * nits * update doctest nightly * some cleaning * run correct test on diff * debug * run on a single worker * skip_cuda_test tampkate * updates * add rA and continue on failure * test options * parse `py` codeblock? * we don't need to replace ignore results, don't remember whyu I put it * cleanup * more cleaning * fix arg * more cleaning * clean an todo * more pre-processing * doctest-module has none so extra `- ` is needed * remove logs * nits * doctest-modules .... * oups * let's use sugar * make dataset go quiet * add proper timeout * nites * spleling timeout * update * properly skip tests that have CUDSA * proper skipping * cleaning main and get tests to run * remove make report? * remove tee * some updates * tee was removed but is the full output still available? * [all-test] * only our tests * don't touch tee in this PR * no atee-sys * proper sub * monkey * only replace call * fix sub * nits * nits * fix invalid syntax * add skip cuda doctest env variable * make sure all packages are installed * move file * update check repo * revert changes * nit * finish cleanup * fix re * findall * update don't test init files * ignore pycache * `-ignore-pycache` when running pytests * try to fix the import missmatch error * install dec * pytest is required as doctest_utils imports things from it * the only log issues were dataset, ignore results should work * more cleaning * Update .circleci/create_circleci_config.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * [ydshieh] empty string if cuda is found * [ydshieh] fix condition * style * [ydshieh] fix * Add comment * style * style * show failure * trigger CI --------- Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
novice03 · May 9, 2023 · 627f447 · 627f447
1 parent 650a71e
commit 627f447
Show file tree

Hide file tree

Showing 10 changed files with 287 additions and 178 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -166,6 +166,7 @@ jobs:
                       - v0.6-repository_consistency
             - run: pip install --upgrade pip
             - run: pip install .[all,quality]
+            - run: pip install pytest
             - save_cache:
                   key: v0.5-repository_consistency-{{ checksum "setup.py" }}
                   paths:

diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -51,6 +51,8 @@ class CircleCIJob:
     resource_class: Optional[str] = "xlarge"
     tests_to_run: Optional[List[str]] = None
     working_directory: str = "~/transformers"
+    # This should be only used for doctest job!
+    command_timeout: Optional[int] = None
 
     def __post_init__(self):
         # Deal with defaults for mutable attributes.
@@ -107,11 +109,15 @@ def to_dict(self):
         steps.append({"store_artifacts": {"path": "~/transformers/installed.txt"}})
 
         all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
-        pytest_flags = [f"--{key}={value}" if value is not None else f"-{key}" for key, value in all_options.items()]
+        pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
         pytest_flags.append(
             f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
         )
-        test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+        test_command = ""
+        if self.command_timeout:
+            test_command = f"timeout {self.command_timeout} "
+        test_command += f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+
         if self.parallelism == 1:
             if self.tests_to_run is None:
                 test_command += " << pipeline.parameters.tests_to_run >>"
@@ -161,12 +167,37 @@ def to_dict(self):
             steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
             steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
 
-            test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+            test_command = ""
+            if self.timeout:
+                test_command = f"timeout {self.timeout} "
+            test_command += f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
             test_command += " $(cat splitted_tests.txt)"
         if self.marker is not None:
             test_command += f" -m {self.marker}"
-        test_command += " | tee tests_output.txt"
+
+        if self.name == "pr_documentation_tests":
+            # can't use ` | tee tee tests_output.txt` as usual
+            test_command += " > tests_output.txt"
+            # Save the return code, so we can check if it is timeout in the next step.
+            test_command += '; touch "$?".txt'
+            # Never fail the test step for the doctest job. We will check the results in the next step, and fail that
+            # step instead if the actual test failures are found. This is to avoid the timeout being reported as test
+            # failure.
+            test_command = f"({test_command}) || true"
+        else:
+            test_command += " | tee tests_output.txt"
         steps.append({"run": {"name": "Run tests", "command": test_command}})
+
+        # return code `124` means the previous (pytest run) step is timeout
+        if self.name == "pr_documentation_tests":
+            checkout_doctest_command = 'if [ -s reports/tests_pr_documentation_tests/failures_short.txt ]; '
+            checkout_doctest_command += 'then echo "some test failed"; '
+            checkout_doctest_command += 'cat reports/tests_pr_documentation_tests/failures_short.txt; '
+            checkout_doctest_command += 'cat reports/tests_pr_documentation_tests/summary_short.txt; exit -1; '
+            checkout_doctest_command += 'elif [ -s reports/tests_pr_documentation_tests/stats.txt ]; then echo "All tests pass!"; '
+            checkout_doctest_command += 'elif [ -f 124.txt ]; then echo "doctest timeout!"; else echo "other fatal error)"; exit -1; fi;'
+            steps.append({"run": {"name": "Check doctest results", "command": checkout_doctest_command}})
+
         steps.append({"store_artifacts": {"path": "~/transformers/tests_output.txt"}})
         steps.append({"store_artifacts": {"path": "~/transformers/reports"}})
         job["steps"] = steps
@@ -401,6 +432,51 @@ def job_name(self):
     tests_to_run="tests/repo_utils",
 )
 
+# At this moment, only the files that are in `utils/documentation_tests.txt` will be kept (together with a dummy file).
+py_command = 'import os; import json; fp = open("pr_documentation_tests.txt"); data_1 = fp.read().strip().split("\\n"); fp = open("utils/documentation_tests.txt"); data_2 = fp.read().strip().split("\\n"); to_test = [x for x in data_1 if x in set(data_2)] + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
+py_command = f"$(python3 -c '{py_command}')"
+command = f'echo "{py_command}" > pr_documentation_tests_filtered.txt'
+doc_test_job = CircleCIJob(
+    "pr_documentation_tests",
+    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
+    install_steps=[
+        "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time",
+        "pip install --upgrade pip",
+        "pip install -e .[dev]",
+        "pip install git+https://github.com/huggingface/accelerate",
+        "pip install --upgrade pytest pytest-sugar",
+        "find -name __pycache__ -delete",
+        "find . -name \*.pyc -delete",
+        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
+        "touch dummy.py",
+        {
+            "name": "Get files to test",
+            "command":
+                "git remote add upstream https://github.com/huggingface/transformers.git && git fetch upstream \n"
+                "git diff --name-only --relative --diff-filter=AMR refs/remotes/upstream/main...HEAD | grep -E '\.(py|mdx)$' | grep -Ev '^\..*|/\.' | grep -Ev '__' > pr_documentation_tests.txt"
+        },
+        {
+            "name": "List files beings changed: pr_documentation_tests.txt",
+            "command":
+                "cat pr_documentation_tests.txt"
+        },
+        {
+            "name": "Filter pr_documentation_tests.txt",
+            "command":
+                command
+        },
+        {
+            "name": "List files beings tested: pr_documentation_tests_filtered.txt",
+            "command":
+                "cat pr_documentation_tests_filtered.txt"
+        },
+    ],
+    tests_to_run="$(cat pr_documentation_tests_filtered.txt)",  # noqa
+    pytest_options={"-doctest-modules": None, "doctest-glob": "*.mdx", "dist": "loadfile", "rvsA": None},
+    command_timeout=1200,  # test cannot run longer than 1200 seconds
+    pytest_num_workers=1,
+)
+
 REGULAR_TESTS = [
     torch_and_tf_job,
     torch_and_flax_job,
@@ -411,6 +487,7 @@ def job_name(self):
     hub_job,
     onnx_job,
     exotic_models_job,
+    doc_test_job
 ]
 EXAMPLES_TESTS = [
     examples_torch_job,

diff --git a/.github/workflows/doctests.yml b/.github/workflows/doctests.yml
@@ -37,18 +37,10 @@ jobs:
       - name: Show installed libraries and their versions
         run: pip freeze
 
-      - name: Prepare files for doctests
-        run: |
-          python3 utils/prepare_for_doc_test.py src docs
-
       - name: Run doctests
         run: |
           python3 -m pytest -v --make-reports doc_tests_gpu --doctest-modules $(cat utils/documentation_tests.txt) -sv --doctest-continue-on-failure --doctest-glob="*.mdx"
 
-      - name: Clean files after doctests
-        run: |
-          python3 utils/prepare_for_doc_test.py src docs --remove_new_line
-
       - name: Failure short reports
         if: ${{ failure() }}
         continue-on-error: true

diff --git a/Makefile b/Makefile
@@ -47,10 +47,10 @@ repo-consistency:
 # this target runs checks on all files
 
 quality:
-	black --check $(check_dirs) setup.py
+	black --check $(check_dirs) setup.py conftest.py
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
-	ruff $(check_dirs) setup.py
+	ruff $(check_dirs) setup.py conftest.py
 	doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
 	python utils/check_doc_toc.py
 
@@ -65,8 +65,8 @@ extra_style_checks:
 # this target runs checks on all files and potentially modifies some of them
 
 style:
-	black $(check_dirs) setup.py
-	ruff $(check_dirs) setup.py --fix
+	black $(check_dirs) setup.py conftest.py
+	ruff $(check_dirs) setup.py conftest.py --fix
 	${MAKE} autogenerate_code
 	${MAKE} extra_style_checks
 

diff --git a/conftest.py b/conftest.py
@@ -20,6 +20,10 @@
 import warnings
 from os.path import abspath, dirname, join
 
+import _pytest
+
+from transformers.utils.doctest_utils import HfDoctestModule, HfDocTestParser
+
 
 # allow having multiple repository checkouts and not needing to remember to rerun
 # 'pip install -e .[dev]' when switching between checkouts and running tests.
@@ -38,9 +42,7 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
     )
-    config.addinivalue_line(
-        "markers", "is_pipeline_test: mark test to run only when pipelines are tested"
-    )
+    config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
     config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
     config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
 
@@ -66,7 +68,7 @@ def pytest_sessionfinish(session, exitstatus):
 
 
 # Doctest custom flag to ignore output.
-IGNORE_RESULT = doctest.register_optionflag('IGNORE_RESULT')
+IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
 
 OutputChecker = doctest.OutputChecker
 
@@ -79,3 +81,5 @@ def check_output(self, want, got, optionflags):
 
 
 doctest.OutputChecker = CustomOutputChecker
+_pytest.doctest.DoctestModule = HfDoctestModule
+doctest.DocTestParser = HfDocTestParser
diff --git a/docs/source/en/testing.mdx b/docs/source/en/testing.mdx
@@ -212,20 +212,12 @@ Example:
     ```"""
 
 ```
-3 steps are required to debug the docstring examples: 
-1. In order to properly run the test, **an extra line has to be added** at the end of the docstring. This can be automatically done on any file using: 
-```bash 
-python utils/prepare_for_doc_test.py <path_to_file_or_dir>
-```
 
-2. Then, you can use the following line to automatically test every docstring example in the desired file: 
+Just run the following line to automatically test every docstring example in the desired file: 
 ```bash 
 pytest --doctest-modules <path_to_file_or_dir>
 ```
-3. Once you are done debugging, you need to remove the extra line added in step **1.** by running the following: 
-```bash 
-python utils/prepare_for_doc_test.py <path_to_file_or_dir> --remove_new_line
-```
+If the file has a markdown extention, you should add the `--doctest-glob="*.mdx"` argument.
 
 ### Run only modified tests
 

diff --git a/setup.cfg b/setup.cfg
@@ -1,2 +1,3 @@
 [tool:pytest]
 doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
+doctest_glob=**/*.mdx
diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py
@@ -27,6 +27,7 @@
     copy_func,
     replace_return_docstrings,
 )
+from .doctest_utils import HfDocTestParser
 from .generic import (
     ContextManagers,
     ExplicitEnum,