diff --git a/.circleci/config.yml b/.circleci/config.yml
index 261333f826b8ed..325195c2aaaa8e 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -110,10 +110,12 @@ jobs:
                   key: v0.3-torch-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./tests/ | tee output.txt
+            - run: python -m pytest -n 8 --dist=loadfile -s --make_reports=tests ./tests/ | tee tests_output.txt
             - store_artifacts:
-                  path: ~/transformers/output.txt
-                  destination: test_output.txt
+                  path: ~/transformers/tests_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
+                  
     run_tests_tf:
         working_directory: ~/transformers
         docker:
@@ -258,10 +260,11 @@ jobs:
                   key: v0.3-torch_examples-{{ checksum "setup.py" }}
                   paths:
                       - '~/.cache/pip'
-            - run: python -m pytest -n 8 --dist=loadfile -rA -s ./examples/ | tee output.txt
+            - run: python -m pytest -n 8 --dist=loadfile -s --make_reports=examples ./examples/ | tee examples_output.txt
             - store_artifacts:
-                  path: ~/transformers/output.txt
-                  destination: test_output.txt
+                  path: ~/transformers/examples_output.txt
+            - store_artifacts:
+                  path: ~/transformers/reports
     build_doc:
         working_directory: ~/transformers
         docker:
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
index 166e496708f643..8a9a25bc0aa155 100644
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -22,6 +22,7 @@ jobs:
           which python
           python --version
           pip --version
+          
       - name: Current dir
         run: pwd
       - run: nvidia-smi
@@ -40,6 +41,7 @@ jobs:
           which python
           python --version
           pip --version
+          
       - name: Install dependencies
         run: |
           source .env/bin/activate
@@ -61,7 +63,8 @@ jobs:
           OMP_NUM_THREADS: 1
         run: |
           source .env/bin/activate
-          python -m pytest -n 2 --dist=loadfile -s ./tests/
+          python -m pytest -n 2 --dist=loadfile -s tests
+
 
   run_tests_torch_and_tf_multiple_gpu:
     runs-on: [self-hosted, multi-gpu]
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 369fb6ddf6c72c..451a5c0032a4a5 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -26,9 +26,11 @@ jobs:
           which python
           python --version
           pip --version
+          
       - name: Current dir
         run: pwd
       - run: nvidia-smi
+        
       - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
         if: steps.cache.outputs.cache-hit != 'true'
         run: |
@@ -37,6 +39,7 @@ jobs:
           which python
           python --version
           pip --version
+          
       - name: Install dependencies
         run: |
           source .env/bin/activate
@@ -51,7 +54,6 @@ jobs:
           python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
           python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
 
-
       - name: Run all tests on GPU
         env:
           TF_FORCE_GPU_ALLOW_GROWTH: "true"
@@ -59,7 +61,7 @@ jobs:
           RUN_SLOW: yes
         run: |
           source .env/bin/activate
-          python -m pytest -n 1 --dist=loadfile -s ./tests/ --durations=50
+          python -m pytest -n 1 --dist=loadfile -s --make_reports=tests tests
 
       - name: Run examples tests on GPU
         env:
@@ -69,7 +71,7 @@ jobs:
         run: |
           source .env/bin/activate
           pip install -r examples/requirements.txt
-          python -m pytest -n 1 --dist=loadfile -s examples --durations=50
+          python -m pytest -n 1 --dist=loadfile -s --make_reports=examples examples
 
       - name: Run all pipeline tests on GPU
         env:
@@ -79,7 +81,13 @@ jobs:
           RUN_PIPELINE_TESTS: yes
         run: |
           source .env/bin/activate
-          python -m pytest -n 1 --dist=loadfile -s ./tests/ -m is_pipeline_test --durations=50
+          python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make_reports=tests_pipeline tests 
+
+      - name: test suite reports artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: test_reports
+          path: reports
 
 
   run_all_tests_torch_and_tf_multiple_gpu:
diff --git a/examples/conftest.py b/examples/conftest.py
index 0cd00bcdc9f8e2..cc6c4acfff5797 100644
--- a/examples/conftest.py
+++ b/examples/conftest.py
@@ -14,3 +14,20 @@
 # silence FutureWarning warnings in tests since often we can't act on them until
 # they become normal warnings - i.e. the tests still need to test the current functionality
 warnings.simplefilter(action="ignore", category=FutureWarning)
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--make_reports",
+        action="store",
+        default=False,
+        help="generate report files - the value will be used as a `report_`+val+`reportname.txt`",
+    )
+
+
+def pytest_terminal_summary(terminalreporter):
+    from transformers.testing_utils import pytest_terminal_summary_main
+
+    make_reports = terminalreporter.config.getoption("--make_reports")
+    if make_reports:
+        pytest_terminal_summary_main(terminalreporter, id=make_reports)
diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
index 06e096f55044bd..3d8cb693cdb8dd 100644
--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -577,3 +577,103 @@ def mockenv(**kwargs):
     os.getenv("USE_TF", False)
     """
     return unittest.mock.patch.dict(os.environ, kwargs)
+
+
+def pytest_terminal_summary_main(tr, id):
+    """
+    Generate multiple reports at the end of test suite run - each report goes into a dedicated file
+    in the current directory. The report files are prefixed with the test suite name.
+
+    This function emulates --duration and -rA pytest arguments.
+
+    This function is to be called from `conftest.py` via `pytest_terminal_summary` wrapper that has
+    to be defined there.
+
+    Args:
+    - tr: `terminalreporter` passed from `conftest.py`
+    - id: unique id like `tests` or `examples` that will be incorporated into the final reports
+      filenames - this is needed as some jobs have multiple runs of pytest, so we can't have them
+      overwrite each other.
+
+    NB: this functions taps into a private _pytest API and while unlikely, it could break should
+    pytest do internal changes - also it calls default internal methods of terminalreporter which
+    can be hijacked by various `pytest-` plugins and interfere.
+
+    """
+    from _pytest.config import create_terminal_writer
+
+    if not len(id):
+        id = "tests"
+
+    config = tr.config
+    orig_writer = config.get_terminal_writer()
+    orig_tbstyle = config.option.tbstyle
+    orig_reportchars = tr.reportchars
+
+    report_files = dict(
+        durations="durations",
+        short_summary="short_summary",
+        summary_errors="errors",
+        summary_failures="failures",
+        summary_warnings="warnings",
+        summary_passes="passes",
+        summary_stats="stats",
+    )
+    dir = "reports"
+    Path(dir).mkdir(parents=True, exist_ok=True)
+    report_files.update((k, f"{dir}/report_{id}_{v}.txt") for k, v in report_files.items())
+
+    # custom durations report
+    # note: there is no need to call pytest --durations=XX to get this separate report
+    # adapted from https://github.com/pytest-dev/pytest/blob/897f151e/src/_pytest/runner.py#L66
+    dlist = []
+    for replist in tr.stats.values():
+        for rep in replist:
+            if hasattr(rep, "duration"):
+                dlist.append(rep)
+    if dlist:
+        dlist.sort(key=lambda x: x.duration, reverse=True)
+        with open(report_files["durations"], "w") as f:
+            durations_min = 0.05  # sec
+            f.write("slowest durations\n")
+            for i, rep in enumerate(dlist):
+                if rep.duration < durations_min:
+                    f.write(f"{len(dlist)-i} durations < {durations_min} secs were omitted")
+                    break
+                f.write(f"{rep.duration:02.2f}s {rep.when:<8} {rep.nodeid}\n")
+
+    # use ready-made report funcs, we are just hijacking the filehandle to log to a dedicated file each
+    # adapted from https://github.com/pytest-dev/pytest/blob/897f151e/src/_pytest/terminal.py#L814
+    # note: some pytest plugins may interfere by hijacking the default `terminalreporter` (e.g.
+    # pytest-instafail does that)
+    tr.reportchars = "wPpsxXEf"  # emulate -rA (used in summary_passes() and short_test_summary())
+    config.option.tbstyle = "auto"
+    with open(report_files["summary_failures"], "w") as f:
+        tr._tw = create_terminal_writer(config, f)
+        tr.summary_failures()
+
+    with open(report_files["summary_errors"], "w") as f:
+        tr._tw = create_terminal_writer(config, f)
+        tr.summary_errors()
+
+    with open(report_files["summary_warnings"], "w") as f:
+        tr._tw = create_terminal_writer(config, f)
+        tr.summary_warnings()  # normal warnings
+        tr.summary_warnings()  # final warnings
+
+    with open(report_files["summary_passes"], "w") as f:
+        tr._tw = create_terminal_writer(config, f)
+        tr.summary_passes()
+
+    with open(report_files["short_summary"], "w") as f:
+        tr._tw = create_terminal_writer(config, f)
+        tr.short_test_summary()
+
+    with open(report_files["summary_stats"], "w") as f:
+        tr._tw = create_terminal_writer(config, f)
+        tr.summary_stats()
+
+    # restore:
+    tr._tw = orig_writer
+    tr.reportchars = orig_reportchars
+    config.option.tbstyle = orig_tbstyle
diff --git a/tests/conftest.py b/tests/conftest.py
index d2b9b456716a67..a7c1f96f53da64 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,7 +11,6 @@
 git_repo_path = abspath(join(dirname(dirname(__file__)), "src"))
 sys.path.insert(1, git_repo_path)
 
-
 # silence FutureWarning warnings in tests since often we can't act on them until
 # they become normal warnings - i.e. the tests still need to test the current functionality
 warnings.simplefilter(action="ignore", category=FutureWarning)
@@ -22,3 +21,20 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "is_pt_tf_cross_test: mark test to run only when PT and TF interactions are tested"
     )
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--make_reports",
+        action="store",
+        default=False,
+        help="generate report files - the value will be used as a `report_`+val+`reportname.txt`",
+    )
+
+
+def pytest_terminal_summary(terminalreporter):
+    from transformers.testing_utils import pytest_terminal_summary_main
+
+    make_reports = terminalreporter.config.getoption("--make_reports")
+    if make_reports:
+        pytest_terminal_summary_main(terminalreporter, id=make_reports)