Skip to content

Commit

Permalink
run formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Titus-von-Koeller committed Sep 11, 2024
1 parent e607b7c commit ac108c6
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 85 deletions.
184 changes: 118 additions & 66 deletions .circleci/create_circleci_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
import argparse
import copy
import os
import random
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import glob

import yaml


Expand All @@ -32,7 +31,7 @@
"RUN_PT_FLAX_CROSS_TESTS": False,
}
# Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None}
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf": None}
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]


Expand All @@ -42,7 +41,7 @@ class EmptyJob:
def to_dict(self):
return {
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
"steps":["checkout"],
"steps": ["checkout"],
}


Expand Down Expand Up @@ -72,7 +71,10 @@ def __post_init__(self):
else:
# BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
print(os.environ.get("GIT_COMMIT_MESSAGE"))
if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
if (
"[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "")
or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci"
):
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
print(f"Using {self.docker_image} docker image")
if self.install_steps is None:
Expand All @@ -82,7 +84,7 @@ def __post_init__(self):
if isinstance(self.tests_to_run, str):
self.tests_to_run = [self.tests_to_run]
else:
test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt")
test_file = os.path.join("test_preparation", f"{self.job_name}_test_list.txt")
print("Looking for ", test_file)
if os.path.exists(test_file):
with open(test_file) as f:
Expand All @@ -105,46 +107,93 @@ def to_dict(self):
job["resource_class"] = self.resource_class

all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options}
pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()]
pytest_flags = [
f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}"
for key, value in all_options.items()
]
pytest_flags.append(
f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
)
# Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
# Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
additional_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
parallel = f" << pipeline.parameters.{self.job_name}_parallelism >> "
steps = [
"checkout",
{"attach_workspace": {"at": "test_preparation"}},
{"run": "apt-get update && apt-get install -y curl"},
{"run": " && ".join(self.install_steps)},
{"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"},
{"run": {
{
"run": {
"name": "Download NLTK files",
"command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """,
}
if "example" in self.name
else "echo Skipping"
},
{
"run": {
"name": "Show installed libraries and their size",
"command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}
"command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true""",
}
},
{"run": {
"name": "Show installed libraries and their versions",
"command": """pip list --format=freeze | tee installed.txt || true"""}
{
"run": {
"name": "Show installed libraries and their versions",
"command": """pip list --format=freeze | tee installed.txt || true""",
}
},
{"run": {
"name": "Show biggest libraries",
"command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}
{
"run": {
"name": "Show biggest libraries",
"command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true""",
}
},
{"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}},
{"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}},
{"run": {"name": "Split tests across parallel nodes: show current parallel tests",
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt"
}
{
"run": {
"name": "Get files to test",
"command": f"curl -L -o {self.job_name}_test_list.txt <<pipeline.parameters.{self.job_name}_test_list>>"
if self.name != "pr_documentation_tests"
else 'echo "Skipped"',
}
},
{
"run": {
"name": "Split tests across parallel nodes: show current parallel tests",
"command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'"
if self.parallelism
else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt",
}
},
{
"run": {
"name": "Run tests",
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)",
}
},
{"run": {
"name": "Run tests",
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
{
"run": {
"name": "Expand to show skipped tests",
"when": "always",
"command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip",
}
},
{
"run": {
"name": "Failed tests: show reasons",
"when": "always",
"command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail",
}
},
{
"run": {
"name": "Errors",
"when": "always",
"command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors",
}
},
{"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
{"run": {"name": "Failed tests: show reasons", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
{"run": {"name": "Errors", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}},
{"store_test_results": {"path": "test-results"}},
{"store_artifacts": {"path": "test-results/junit.xml"}},
{"store_artifacts": {"path": "reports"}},
Expand All @@ -159,13 +208,17 @@ def to_dict(self):

@property
def job_name(self):
return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}"
return (
self.name
if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name)
else f"tests_{self.name}"
)


# JOBS
torch_and_tf_job = CircleCIJob(
"torch_and_tf",
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
docker_image=[{"image": "huggingface/transformers-torch-tf-light"}],
additional_env={"RUN_PT_TF_CROSS_TESTS": True},
marker="is_pt_tf_cross_test",
pytest_options={"rA": None, "durations": 0},
Expand All @@ -175,7 +228,7 @@ def job_name(self):
torch_and_flax_job = CircleCIJob(
"torch_and_flax",
additional_env={"RUN_PT_FLAX_CROSS_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-jax-light"}],
docker_image=[{"image": "huggingface/transformers-torch-jax-light"}],
marker="is_pt_flax_cross_test",
pytest_options={"rA": None, "durations": 0},
)
Expand All @@ -185,62 +238,56 @@ def job_name(self):
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="not generate",
parallelism=6,
pytest_num_workers=8
pytest_num_workers=8,
)

generate_job = CircleCIJob(
"generate",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="generate",
parallelism=6,
pytest_num_workers=8
pytest_num_workers=8,
)

tokenization_job = CircleCIJob(
"tokenization",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=16
pytest_num_workers=16,
)

processor_job = CircleCIJob(
"processors",
docker_image=[{"image": "huggingface/transformers-torch-light"}],
parallelism=8,
pytest_num_workers=6
"processors", docker_image=[{"image": "huggingface/transformers-torch-light"}], parallelism=8, pytest_num_workers=6
)

tf_job = CircleCIJob(
"tf",
docker_image=[{"image":"huggingface/transformers-tf-light"}],
docker_image=[{"image": "huggingface/transformers-tf-light"}],
parallelism=6,
pytest_num_workers=16,
)


flax_job = CircleCIJob(
"flax",
docker_image=[{"image":"huggingface/transformers-jax-light"}],
parallelism=6,
pytest_num_workers=16
"flax", docker_image=[{"image": "huggingface/transformers-jax-light"}], parallelism=6, pytest_num_workers=16
)


pipelines_torch_job = CircleCIJob(
"pipelines_torch",
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
docker_image=[{"image": "huggingface/transformers-torch-light"}],
marker="is_pipeline_test",
parallelism=4
parallelism=4,
)


pipelines_tf_job = CircleCIJob(
"pipelines_tf",
additional_env={"RUN_PIPELINE_TESTS": True},
docker_image=[{"image":"huggingface/transformers-tf-light"}],
docker_image=[{"image": "huggingface/transformers-tf-light"}],
marker="is_pipeline_test",
parallelism=4
parallelism=4,
)


Expand All @@ -254,7 +301,7 @@ def job_name(self):
examples_torch_job = CircleCIJob(
"examples_torch",
additional_env={"OMP_NUM_THREADS": 8},
docker_image=[{"image":"huggingface/transformers-examples-torch"}],
docker_image=[{"image": "huggingface/transformers-examples-torch"}],
# TODO @ArthurZucker remove this once docker is easier to build
install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
pytest_num_workers=8,
Expand All @@ -264,17 +311,17 @@ def job_name(self):
examples_tensorflow_job = CircleCIJob(
"examples_tensorflow",
additional_env={"OMP_NUM_THREADS": 8},
docker_image=[{"image":"huggingface/transformers-examples-tf"}],
docker_image=[{"image": "huggingface/transformers-examples-tf"}],
pytest_num_workers=16,
)


hub_job = CircleCIJob(
"hub",
additional_env={"HUGGINGFACE_CO_STAGING": True},
docker_image=[{"image":"huggingface/transformers-torch-light"}],
docker_image=[{"image": "huggingface/transformers-torch-light"}],
install_steps=[
'uv venv && uv pip install .',
"uv venv && uv pip install .",
'git config --global user.email "ci@dummy.com"',
'git config --global user.name "ci"',
],
Expand All @@ -285,7 +332,7 @@ def job_name(self):

onnx_job = CircleCIJob(
"onnx",
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
docker_image=[{"image": "huggingface/transformers-torch-tf-light"}],
install_steps=[
"uv venv",
"uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]",
Expand All @@ -297,7 +344,7 @@ def job_name(self):

exotic_models_job = CircleCIJob(
"exotic_models",
docker_image=[{"image":"huggingface/transformers-exotic-models"}],
docker_image=[{"image": "huggingface/transformers-exotic-models"}],
pytest_num_workers=12,
parallelism=4,
pytest_options={"durations": 100},
Expand All @@ -306,7 +353,7 @@ def job_name(self):

repo_utils_job = CircleCIJob(
"repo_utils",
docker_image=[{"image":"huggingface/transformers-consistency"}],
docker_image=[{"image": "huggingface/transformers-consistency"}],
pytest_num_workers=4,
resource_class="large",
)
Expand All @@ -320,53 +367,58 @@ def job_name(self):
command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
doc_test_job = CircleCIJob(
"pr_documentation_tests",
docker_image=[{"image":"huggingface/transformers-consistency"}],
docker_image=[{"image": "huggingface/transformers-consistency"}],
additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
install_steps=[
# Add an empty file to keep the test step running correctly even no file is selected to be tested.
"uv venv && pip install .",
"touch dummy.py",
command,
"cat pr_documentation_tests_temp.txt",
"tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt"
"tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt",
],
tests_to_run="$(cat pr_documentation_tests.txt)", # noqa
pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None},
command_timeout=1200, # test cannot run longer than 1200 seconds
pytest_num_workers=1,
)

REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job] # fmt: skip
REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job] # fmt: skip
EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip


def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
os.environ["test_preparation_dir"] = folder
jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )]
jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation", f"{k.job_name}_test_list.txt"))]
print("The following jobs will be run ", jobs)

if len(jobs) == 0:
jobs = [EmptyJob()]
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
print("Full list of job name inputs", {j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs})
config = {
"version": "2.1",
"parameters": {
# Only used to accept the parameters from the trigger
"nightly": {"type": "boolean", "default": False},
"tests_to_run": {"type": "string", "default": ''},
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
"tests_to_run": {"type": "string", "default": ""},
**{j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs},
**{j.job_name + "_parallelism": {"type": "integer", "default": 1} for j in jobs},
},
"jobs" : {j.job_name: j.to_dict() for j in jobs},
"workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
"jobs": {j.job_name: j.to_dict() for j in jobs},
"workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}},
}
with open(os.path.join(folder, "generated_config.yml"), "w") as f:
f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>"))
f.write(
yaml.dump(config, sort_keys=False, default_flow_style=False)
.replace("' << pipeline", " << pipeline")
.replace(">> '", " >>")
)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit ac108c6

Please sign in to comment.