diff --git a/.circleci/config.yml b/.circleci/config.yml index 9d04cfb941cee2..9932156aa969db 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,90 +34,97 @@ jobs: - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt - - store_artifacts: - path: ~/transformers/tests_fetched_summary.txt - - run: | - if [ -f test_list.txt ]; then - cp test_list.txt test_preparation/test_list.txt - else - touch test_preparation/test_list.txt - fi - - run: | - if [ -f examples_test_list.txt ]; then - mv examples_test_list.txt test_preparation/examples_test_list.txt - else - touch test_preparation/examples_test_list.txt - fi - - run: | - if [ -f filtered_test_list_cross_tests.txt ]; then - mv filtered_test_list_cross_tests.txt test_preparation/filtered_test_list_cross_tests.txt - else - touch test_preparation/filtered_test_list_cross_tests.txt - fi - - run: | - if [ -f doctest_list.txt ]; then - cp doctest_list.txt test_preparation/doctest_list.txt - else - touch test_preparation/doctest_list.txt - fi - - run: | - if [ -f test_repo_utils.txt ]; then - mv test_repo_utils.txt test_preparation/test_repo_utils.txt - else - touch test_preparation/test_repo_utils.txt - fi - run: python utils/tests_fetcher.py --filter_tests + - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation - run: | - if [ -f test_list.txt ]; then - mv test_list.txt test_preparation/filtered_test_list.txt - else - touch test_preparation/filtered_test_list.txt + if [ ! -s test_preparation/generated_config.yml ]; then + echo "No tests to run, exiting early!" + circleci-agent step halt fi + - store_artifacts: - path: test_preparation/test_list.txt - - store_artifacts: - path: test_preparation/doctest_list.txt - - store_artifacts: - path: ~/transformers/test_preparation/filtered_test_list.txt - - store_artifacts: - path: test_preparation/examples_test_list.txt - - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation - - run: | - if [ ! -s test_preparation/generated_config.yml ]; then - echo "No tests to run, exiting early!" - circleci-agent step halt - fi + path: test_preparation + + - run: + name: "Retrieve Artifact Paths" + # [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts + # `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/` + command: | + project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}" + job_number=${CIRCLE_BUILD_NUM} + url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts" + curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN" + - run: + name: "Prepare pipeline parameters" + command: | + python utils/process_test_artifacts.py + + # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters. + # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation. + # We used: + + # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts + # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job + - store_artifacts: - path: test_preparation/generated_config.yml + path: test_preparation/transformed_artifacts.json - store_artifacts: - path: test_preparation/filtered_test_list_cross_tests.txt + path: test_preparation/artifacts.json - continuation/continue: + parameters: test_preparation/transformed_artifacts.json configuration_path: test_preparation/generated_config.yml # To run all tests for the nightly build fetch_all_tests: working_directory: ~/transformers docker: - - image: huggingface/transformers-consistency + - image: huggingface/transformers-quality parallelism: 1 steps: - checkout - - run: uv pip install -e . - - run: | - mkdir test_preparation - echo -n "tests" > test_preparation/test_list.txt - echo -n "all" > test_preparation/examples_test_list.txt - echo -n "tests/repo_utils" > test_preparation/test_repo_utils.txt + - run: uv pip install -U -e . + - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" + - run: mkdir -p test_preparation + - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt + - run: python utils/tests_fetcher.py --filter_tests + - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation - run: | - echo -n "tests" > test_list.txt - python utils/tests_fetcher.py --filter_tests - mv test_list.txt test_preparation/filtered_test_list.txt - - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation - - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt + if [ ! -s test_preparation/generated_config.yml ]; then + echo "No tests to run, exiting early!" + circleci-agent step halt + fi + - store_artifacts: - path: test_preparation/generated_config.txt + path: test_preparation + + - run: + name: "Retrieve Artifact Paths" + env: + CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }} + command: | + project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}" + job_number=${CIRCLE_BUILD_NUM} + url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts" + curl -o test_preparation/artifacts.json ${url} + - run: + name: "Prepare pipeline parameters" + command: | + python utils/process_test_artifacts.py + + # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters. + # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation. + # We used: + + # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts + # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job + + - store_artifacts: + path: test_preparation/transformed_artifacts.json + - store_artifacts: + path: test_preparation/artifacts.json - continuation/continue: - configuration_path: test_preparation/generated_config.yml + parameters: test_preparation/transformed_artifacts.json + configuration_path: test_preparation/generated_config.yml check_code_quality: working_directory: ~/transformers @@ -142,6 +149,7 @@ jobs: - run: python utils/custom_init_isort.py --check_only - run: python utils/sort_auto_mappings.py --check_only - run: python utils/check_doc_toc.py + - run: python utils/check_docstrings.py --check_all check_repository_consistency: working_directory: ~/transformers @@ -182,7 +190,10 @@ workflows: - check_circleci_user - check_code_quality - check_repository_consistency - - fetch_tests + - fetch_tests: + # [reference] https://circleci.com/docs/contexts/ + context: + - TRANSFORMERS_CONTEXT nightly: when: <> @@ -190,4 +201,4 @@ workflows: - check_circleci_user - check_code_quality - check_repository_consistency - - fetch_all_tests \ No newline at end of file + - fetch_all_tests diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 3f2c6df394e8eb..7ccf5ec96cec4f 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -32,7 +32,7 @@ "RUN_PT_FLAX_CROSS_TESTS": False, } # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical -COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "v": None} +COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None} DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}] @@ -50,16 +50,15 @@ def to_dict(self): class CircleCIJob: name: str additional_env: Dict[str, Any] = None - cache_name: str = None - cache_version: str = "0.8.2" docker_image: List[Dict[str, str]] = None install_steps: List[str] = None marker: Optional[str] = None - parallelism: Optional[int] = 1 + parallelism: Optional[int] = 0 pytest_num_workers: int = 12 pytest_options: Dict[str, Any] = None resource_class: Optional[str] = "2xlarge" tests_to_run: Optional[List[str]] = None + num_test_files_per_worker: Optional[int] = 10 # This should be only used for doctest job! command_timeout: Optional[int] = None @@ -67,8 +66,6 @@ def __post_init__(self): # Deal with defaults for mutable attributes. if self.additional_env is None: self.additional_env = {} - if self.cache_name is None: - self.cache_name = self.name if self.docker_image is None: # Let's avoid changing the default list and make a copy. self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE) @@ -79,155 +76,96 @@ def __post_init__(self): self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev" print(f"Using {self.docker_image} docker image") if self.install_steps is None: - self.install_steps = [] + self.install_steps = ["uv venv && uv pip install ."] if self.pytest_options is None: self.pytest_options = {} if isinstance(self.tests_to_run, str): self.tests_to_run = [self.tests_to_run] - if self.parallelism is None: - self.parallelism = 1 + else: + test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt") + print("Looking for ", test_file) + if os.path.exists(test_file): + with open(test_file) as f: + expanded_tests = f.read().strip().split("\n") + self.tests_to_run = expanded_tests + print("Found:", expanded_tests) + else: + self.tests_to_run = [] + print("not Found") def to_dict(self): env = COMMON_ENV_VARIABLES.copy() env.update(self.additional_env) - cache_branch_prefix = os.environ.get("CIRCLE_BRANCH", "pull") - if cache_branch_prefix != "main": - cache_branch_prefix = "pull" - job = { "docker": self.docker_image, "environment": env, } if self.resource_class is not None: job["resource_class"] = self.resource_class - if self.parallelism is not None: - job["parallelism"] = self.parallelism - steps = [ - "checkout", - {"attach_workspace": {"at": "test_preparation"}}, - ] - steps.extend([{"run": l} for l in self.install_steps]) - steps.append({"run": {"name": "Show installed libraries and their size", "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}}) - steps.append({"run": {"name": "Show installed libraries and their versions", "command": """pip list --format=freeze | tee installed.txt || true"""}}) - - steps.append({"run":{"name":"Show biggest libraries","command":"""dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}}) - steps.append({"store_artifacts": {"path": "installed.txt"}}) all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options} pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()] pytest_flags.append( f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}" ) - - steps.append({"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}}) - test_command = "" - if self.command_timeout: - test_command = f"timeout {self.command_timeout} " - # junit familiy xunit1 is necessary to support splitting on test name or class name with circleci split - test_command += f"python3 -m pytest -rsfE -p no:warnings -o junit_family=xunit1 --tb=short --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags) - - if self.parallelism == 1: - if self.tests_to_run is None: - test_command += " << pipeline.parameters.tests_to_run >>" - else: - test_command += " " + " ".join(self.tests_to_run) - else: - # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime) - tests = self.tests_to_run - if tests is None: - folder = os.environ["test_preparation_dir"] - test_file = os.path.join(folder, "filtered_test_list.txt") - if os.path.exists(test_file): # We take this job's tests from the filtered test_list.txt - with open(test_file) as f: - tests = f.read().split(" ") - - # expand the test list - if tests == ["tests"]: - tests = [os.path.join("tests", x) for x in os.listdir("tests")] - expanded_tests = [] - for test in tests: - if test.endswith(".py"): - expanded_tests.append(test) - elif test == "tests/models": - if "tokenization" in self.name: - expanded_tests.extend(glob.glob("tests/models/**/test_tokenization*.py", recursive=True)) - elif self.name in ["flax","torch","tf"]: - name = self.name if self.name != "torch" else "" - if self.name == "torch": - all_tests = glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True) - filtered = [k for k in all_tests if ("_tf_") not in k and "_flax_" not in k] - expanded_tests.extend(filtered) - else: - expanded_tests.extend(glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True)) - else: - expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True)) - elif test == "tests/pipelines": - expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True)) - else: - expanded_tests.append(test) - tests = " ".join(expanded_tests) - - # Each executor to run ~10 tests - n_executors = max(len(expanded_tests) // 10, 1) - # Avoid empty test list on some executor(s) or launching too many executors - if n_executors > self.parallelism: - n_executors = self.parallelism - job["parallelism"] = n_executors - - # Need to be newline separated for the command `circleci tests split` below - command = f'echo {tests} | tr " " "\\n" >> tests.txt' - steps.append({"run": {"name": "Get tests", "command": command}}) - - command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt' - steps.append({"run": {"name": "Split tests", "command": command}}) - - steps.append({"store_artifacts": {"path": "tests.txt"}}) - steps.append({"store_artifacts": {"path": "splitted_tests.txt"}}) - - test_command = "" - if self.command_timeout: - test_command = f"timeout {self.command_timeout} " - test_command += f"python3 -m pytest -rsfE -p no:warnings --tb=short -o junit_family=xunit1 --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags) - test_command += " $(cat splitted_tests.txt)" - if self.marker is not None: - test_command += f" -m {self.marker}" - - if self.name == "pr_documentation_tests": - # can't use ` | tee tee tests_output.txt` as usual - test_command += " > tests_output.txt" - # Save the return code, so we can check if it is timeout in the next step. - test_command += '; touch "$?".txt' - # Never fail the test step for the doctest job. We will check the results in the next step, and fail that - # step instead if the actual test failures are found. This is to avoid the timeout being reported as test - # failure. - test_command = f"({test_command}) || true" - else: - test_command = f"({test_command} | tee tests_output.txt)" - steps.append({"run": {"name": "Run tests", "command": test_command}}) - - steps.append({"run": {"name": "Skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}) - steps.append({"run": {"name": "Failed tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}) - steps.append({"run": {"name": "Errors", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}) - - steps.append({"store_test_results": {"path": "test-results"}}) - steps.append({"store_artifacts": {"path": "tests_output.txt"}}) - steps.append({"store_artifacts": {"path": "test-results/junit.xml"}}) - steps.append({"store_artifacts": {"path": "reports"}}) - + # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues + timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else "" + marker_cmd = f"-m '{self.marker}'" if self.marker is not None else "" + additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml" + parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> ' + steps = [ + "checkout", + {"attach_workspace": {"at": "test_preparation"}}, + {"run": "apt-get update && apt-get install -y curl"}, + {"run": " && ".join(self.install_steps)}, + {"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"}, + {"run": { + "name": "Show installed libraries and their size", + "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""} + }, + {"run": { + "name": "Show installed libraries and their versions", + "command": """pip list --format=freeze | tee installed.txt || true"""} + }, + {"run": { + "name": "Show biggest libraries", + "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""} + }, + {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}}, + {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}}, + {"run": {"name": "Split tests across parallel nodes: show current parallel tests", + "command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt" + } + }, + {"run": { + "name": "Run tests", + "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"} + }, + {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}, + {"run": {"name": "Failed tests: show reasons", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}, + {"run": {"name": "Errors", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}, + {"store_test_results": {"path": "test-results"}}, + {"store_artifacts": {"path": "test-results/junit.xml"}}, + {"store_artifacts": {"path": "reports"}}, + {"store_artifacts": {"path": "tests.txt"}}, + {"store_artifacts": {"path": "splitted_tests.txt"}}, + {"store_artifacts": {"path": "installed.txt"}}, + ] + if self.parallelism: + job["parallelism"] = parallel job["steps"] = steps return job @property def job_name(self): - return self.name if "examples" in self.name else f"tests_{self.name}" + return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}" # JOBS torch_and_tf_job = CircleCIJob( "torch_and_tf", docker_image=[{"image":"huggingface/transformers-torch-tf-light"}], - install_steps=["uv venv && uv pip install ."], additional_env={"RUN_PT_TF_CROSS_TESTS": True}, marker="is_pt_tf_cross_test", pytest_options={"rA": None, "durations": 0}, @@ -238,7 +176,6 @@ def job_name(self): "torch_and_flax", additional_env={"RUN_PT_FLAX_CROSS_TESTS": True}, docker_image=[{"image":"huggingface/transformers-torch-jax-light"}], - install_steps=["uv venv && uv pip install ."], marker="is_pt_flax_cross_test", pytest_options={"rA": None, "durations": 0}, ) @@ -246,24 +183,36 @@ def job_name(self): torch_job = CircleCIJob( "torch", docker_image=[{"image": "huggingface/transformers-torch-light"}], - install_steps=["uv venv && uv pip install ."], + marker="not generate", parallelism=6, - pytest_num_workers=16 + pytest_num_workers=8 +) + +generate_job = CircleCIJob( + "generate", + docker_image=[{"image": "huggingface/transformers-torch-light"}], + marker="generate", + parallelism=6, + pytest_num_workers=8 ) tokenization_job = CircleCIJob( "tokenization", docker_image=[{"image": "huggingface/transformers-torch-light"}], - install_steps=["uv venv && uv pip install ."], - parallelism=6, + parallelism=8, pytest_num_workers=16 ) +processor_job = CircleCIJob( + "processors", + docker_image=[{"image": "huggingface/transformers-torch-light"}], + parallelism=8, + pytest_num_workers=6 +) tf_job = CircleCIJob( "tf", docker_image=[{"image":"huggingface/transformers-tf-light"}], - install_steps=["uv venv", "uv pip install -e."], parallelism=6, pytest_num_workers=16, ) @@ -272,7 +221,6 @@ def job_name(self): flax_job = CircleCIJob( "flax", docker_image=[{"image":"huggingface/transformers-jax-light"}], - install_steps=["uv venv && uv pip install ."], parallelism=6, pytest_num_workers=16 ) @@ -282,8 +230,8 @@ def job_name(self): "pipelines_torch", additional_env={"RUN_PIPELINE_TESTS": True}, docker_image=[{"image":"huggingface/transformers-torch-light"}], - install_steps=["uv venv && uv pip install ."], marker="is_pipeline_test", + parallelism=4 ) @@ -291,8 +239,8 @@ def job_name(self): "pipelines_tf", additional_env={"RUN_PIPELINE_TESTS": True}, docker_image=[{"image":"huggingface/transformers-tf-light"}], - install_steps=["uv venv && uv pip install ."], marker="is_pipeline_test", + parallelism=4 ) @@ -300,34 +248,24 @@ def job_name(self): "custom_tokenizers", additional_env={"RUN_CUSTOM_TOKENIZERS": True}, docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}], - install_steps=["uv venv","uv pip install -e ."], - parallelism=None, - resource_class=None, - tests_to_run=[ - "./tests/models/bert_japanese/test_tokenization_bert_japanese.py", - "./tests/models/openai/test_tokenization_openai.py", - "./tests/models/clip/test_tokenization_clip.py", - ], ) examples_torch_job = CircleCIJob( "examples_torch", additional_env={"OMP_NUM_THREADS": 8}, - cache_name="torch_examples", docker_image=[{"image":"huggingface/transformers-examples-torch"}], # TODO @ArthurZucker remove this once docker is easier to build install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"], - pytest_num_workers=1, + pytest_num_workers=8, ) examples_tensorflow_job = CircleCIJob( "examples_tensorflow", - cache_name="tensorflow_examples", + additional_env={"OMP_NUM_THREADS": 8}, docker_image=[{"image":"huggingface/transformers-examples-tf"}], - install_steps=["uv venv && uv pip install ."], - parallelism=8 + pytest_num_workers=16, ) @@ -336,12 +274,12 @@ def job_name(self): additional_env={"HUGGINGFACE_CO_STAGING": True}, docker_image=[{"image":"huggingface/transformers-torch-light"}], install_steps=[ - "uv venv && uv pip install .", + 'uv venv && uv pip install .', 'git config --global user.email "ci@dummy.com"', 'git config --global user.name "ci"', ], marker="is_staging_test", - pytest_num_workers=1, + pytest_num_workers=2, ) @@ -349,8 +287,7 @@ def job_name(self): "onnx", docker_image=[{"image":"huggingface/transformers-torch-tf-light"}], install_steps=[ - "uv venv && uv pip install .", - "uv pip install --upgrade eager pip", + "uv venv", "uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]", ], pytest_options={"k onnx": None}, @@ -360,15 +297,7 @@ def job_name(self): exotic_models_job = CircleCIJob( "exotic_models", - install_steps=["uv venv && uv pip install ."], docker_image=[{"image":"huggingface/transformers-exotic-models"}], - tests_to_run=[ - "tests/models/*layoutlmv*", - "tests/models/*nat", - "tests/models/deta", - "tests/models/udop", - "tests/models/nougat", - ], pytest_num_workers=12, parallelism=4, pytest_options={"durations": 100}, @@ -378,11 +307,17 @@ def job_name(self): repo_utils_job = CircleCIJob( "repo_utils", docker_image=[{"image":"huggingface/transformers-consistency"}], - install_steps=["uv venv && uv pip install ."], - parallelism=None, - pytest_num_workers=1, + pytest_num_workers=4, resource_class="large", - tests_to_run="tests/repo_utils", +) + + +non_model_job = CircleCIJob( + "non_model", + docker_image=[{"image": "huggingface/transformers-torch-light"}], + marker="not generate", + parallelism=6, + pytest_num_workers=8, ) @@ -391,28 +326,18 @@ def job_name(self): # the bash output redirection.) py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)' py_command = f"$(python3 -c '{py_command}')" -command = f'echo "{py_command}" > pr_documentation_tests_temp.txt' +command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt' doc_test_job = CircleCIJob( "pr_documentation_tests", docker_image=[{"image":"huggingface/transformers-consistency"}], additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"}, install_steps=[ # Add an empty file to keep the test step running correctly even no file is selected to be tested. + "uv venv && pip install .", "touch dummy.py", - { - "name": "Get files to test", - "command": command, - }, - { - "name": "Show information in `Get files to test`", - "command": - "cat pr_documentation_tests_temp.txt" - }, - { - "name": "Get the last line in `pr_documentation_tests.txt`", - "command": - "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests.txt" - }, + command, + "cat pr_documentation_tests_temp.txt", + "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt" ], tests_to_run="$(cat pr_documentation_tests.txt)", # noqa pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None}, @@ -420,121 +345,37 @@ def job_name(self): pytest_num_workers=1, ) -REGULAR_TESTS = [ - torch_and_tf_job, - torch_and_flax_job, - torch_job, - tf_job, - flax_job, - custom_tokenizers_job, - hub_job, - onnx_job, - exotic_models_job, - tokenization_job -] -EXAMPLES_TESTS = [ - examples_torch_job, - examples_tensorflow_job, -] -PIPELINE_TESTS = [ - pipelines_torch_job, - pipelines_tf_job, -] +REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip +EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job] +PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job] REPO_UTIL_TESTS = [repo_utils_job] DOC_TESTS = [doc_test_job] - +ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip def create_circleci_config(folder=None): if folder is None: folder = os.getcwd() - # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism) os.environ["test_preparation_dir"] = folder - jobs = [] - all_test_file = os.path.join(folder, "test_list.txt") - if os.path.exists(all_test_file): - with open(all_test_file) as f: - all_test_list = f.read() - else: - all_test_list = [] - if len(all_test_list) > 0: - jobs.extend(PIPELINE_TESTS) - - test_file = os.path.join(folder, "filtered_test_list.txt") - if os.path.exists(test_file): - with open(test_file) as f: - test_list = f.read() - else: - test_list = [] - if len(test_list) > 0: - jobs.extend(REGULAR_TESTS) - - extended_tests_to_run = set(test_list.split()) - # Extend the test files for cross test jobs - for job in jobs: - if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]: - for test_path in copy.copy(extended_tests_to_run): - dir_path, fn = os.path.split(test_path) - if fn.startswith("test_modeling_tf_"): - fn = fn.replace("test_modeling_tf_", "test_modeling_") - elif fn.startswith("test_modeling_flax_"): - fn = fn.replace("test_modeling_flax_", "test_modeling_") - else: - if job.job_name == "test_torch_and_tf": - fn = fn.replace("test_modeling_", "test_modeling_tf_") - elif job.job_name == "test_torch_and_flax": - fn = fn.replace("test_modeling_", "test_modeling_flax_") - new_test_file = str(os.path.join(dir_path, fn)) - if os.path.isfile(new_test_file): - if new_test_file not in extended_tests_to_run: - extended_tests_to_run.add(new_test_file) - extended_tests_to_run = sorted(extended_tests_to_run) - for job in jobs: - if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]: - job.tests_to_run = extended_tests_to_run - fn = "filtered_test_list_cross_tests.txt" - f_path = os.path.join(folder, fn) - with open(f_path, "w") as fp: - fp.write(" ".join(extended_tests_to_run)) - - example_file = os.path.join(folder, "examples_test_list.txt") - if os.path.exists(example_file) and os.path.getsize(example_file) > 0: - with open(example_file, "r", encoding="utf-8") as f: - example_tests = f.read() - for job in EXAMPLES_TESTS: - framework = job.name.replace("examples_", "").replace("torch", "pytorch") - if example_tests == "all": - job.tests_to_run = [f"examples/{framework}"] - else: - job.tests_to_run = [f for f in example_tests.split(" ") if f.startswith(f"examples/{framework}")] - - if len(job.tests_to_run) > 0: - jobs.append(job) - - doctest_file = os.path.join(folder, "doctest_list.txt") - if os.path.exists(doctest_file): - with open(doctest_file) as f: - doctest_list = f.read() - else: - doctest_list = [] - if len(doctest_list) > 0: - jobs.extend(DOC_TESTS) - - repo_util_file = os.path.join(folder, "test_repo_utils.txt") - if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0: - jobs.extend(REPO_UTIL_TESTS) + jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )] + print("The following jobs will be run ", jobs) if len(jobs) == 0: jobs = [EmptyJob()] - config = {"version": "2.1"} - config["parameters"] = { - # Only used to accept the parameters from the trigger - "nightly": {"type": "boolean", "default": False}, - "tests_to_run": {"type": "string", "default": test_list}, + print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}) + config = { + "version": "2.1", + "parameters": { + # Only used to accept the parameters from the trigger + "nightly": {"type": "boolean", "default": False}, + "tests_to_run": {"type": "string", "default": ''}, + **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}, + **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs}, + }, + "jobs" : {j.job_name: j.to_dict() for j in jobs}, + "workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} } - config["jobs"] = {j.job_name: j.to_dict() for j in jobs} - config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} with open(os.path.join(folder, "generated_config.yml"), "w") as f: - f.write(yaml.dump(config, indent=2, width=1000000, sort_keys=False)) + f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>")) if __name__ == "__main__": diff --git a/.circleci/parse_test_outputs.py b/.circleci/parse_test_outputs.py index b80ce8513a1f91..a69da1a3eafb27 100644 --- a/.circleci/parse_test_outputs.py +++ b/.circleci/parse_test_outputs.py @@ -67,4 +67,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 9a1103b8af3d01..00000000000000 --- a/.coveragerc +++ /dev/null @@ -1,12 +0,0 @@ -[run] -source=transformers -omit = - # skip convertion scripts from testing for now - */convert_* - */__main__.py -[report] -exclude_lines = - pragma: no cover - raise - except - register_parameter \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index ff471096907ab8..ea7d6a02252cf5 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -1,6 +1,17 @@ name: "\U0001F41B Bug Report" description: Submit a bug report to help us improve transformers +labels: [ "bug" ] body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! 🤗 + + Before you submit your bug report: + + - If it is your first time submitting, be sure to check our [bug report guidelines](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#did-you-find-a-bug) + - Try our [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat) -- it might be able to help you with your issue + - type: textarea id: system-info attributes: @@ -17,50 +28,50 @@ body: description: | Your issue will be replied to more quickly if you can figure out the right person to tag with @ If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**. - + All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and a core maintainer will ping the right person. - + Please tag fewer than 3 people. - + Models: - - text models: @ArthurZucker and @younesbelkada - - vision models: @amyeroberts - - speech models: @sanchit-gandhi + - text models: @ArthurZucker + - vision models: @amyeroberts, @qubvel + - speech models: @ylacombe, @eustlb - graph models: @clefourrier - + Library: - + - flax: @sanchit-gandhi - - generate: @gante - - pipelines: @Narsil + - generate: @zucchini-nlp (visual-language models) or @gante (all others) + - pipelines: @Rocketknight1 - tensorflow: @gante and @Rocketknight1 - - tokenizers: @ArthurZucker - - trainer: @muellerzr and @pacman100 - + - tokenizers: @ArthurZucker and @itazap + - trainer: @muellerzr @SunMarc + Integrations: - - - deepspeed: HF Trainer/Accelerate: @pacman100 + + - deepspeed: HF Trainer/Accelerate: @muellerzr - ray/raytune: @richardliaw, @amogkam - Big Model Inference: @SunMarc - - quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada - + - quantization (bitsandbytes, autogpt): @SunMarc + Documentation: @stevhliu - + Model hub: - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator. - + HF projects: - + - accelerate: [different repo](https://github.com/huggingface/accelerate) - datasets: [different repo](https://github.com/huggingface/datasets) - diffusers: [different repo](https://github.com/huggingface/diffusers) - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers) - + Maintained examples (not research project or legacy): - + - Flax: @sanchit-gandhi - PyTorch: See Models above and tag the person corresponding to the modality of the example. - TensorFlow: @Rocketknight1 @@ -101,11 +112,11 @@ body: placeholder: | Steps to reproduce the behavior: - + 1. 2. 3. - + - type: textarea id: expected-behavior diff --git a/.github/ISSUE_TEMPLATE/i18n.md b/.github/ISSUE_TEMPLATE/i18n.md index 52667f930508a6..5b91427d55b73c 100644 --- a/.github/ISSUE_TEMPLATE/i18n.md +++ b/.github/ISSUE_TEMPLATE/i18n.md @@ -34,7 +34,7 @@ Some notes: ## Tutorial section - [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md) -- [ ] [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/master/docs/source/autoclass_tutorial.md) +- [ ] [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md) - [ ] [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md) - [ ] [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md) - [ ] [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c0f70fe8159f09..417f5a2e45b58c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -39,28 +39,29 @@ members/contributors who may be interested in your PR. Models: -- text models: @ArthurZucker and @younesbelkada -- vision models: @amyeroberts -- speech models: @sanchit-gandhi +- text models: @ArthurZucker +- vision models: @amyeroberts, @qubvel +- speech models: @ylacombe, @eustlb - graph models: @clefourrier Library: - flax: @sanchit-gandhi -- generate: @gante -- pipelines: @Narsil +- generate: @zucchini-nlp (visual-language models) or @gante (all others) +- pipelines: @Rocketknight1 - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker -- trainer: @muellerzr and @pacman100 +- trainer: @muellerzr and @SunMarc +- chat templates: @Rocketknight1 Integrations: -- deepspeed: HF Trainer/Accelerate: @pacman100 +- deepspeed: HF Trainer/Accelerate: @muellerzr - ray/raytune: @richardliaw, @amogkam - Big Model Inference: @SunMarc -- quantization (bitsandbytes, autogpt): @SunMarc and @younesbelkada +- quantization (bitsandbytes, autogpt): @SunMarc -Documentation: @stevhliu and @MKhalusova +Documentation: @stevhliu HF projects: diff --git a/.github/workflows/add-model-like.yml b/.github/workflows/add-model-like.yml index 5a1b953ef6cb08..cd676831784406 100644 --- a/.github/workflows/add-model-like.yml +++ b/.github/workflows/add-model-like.yml @@ -23,7 +23,7 @@ jobs: sudo apt -y update && sudo apt install -y libsndfile1-dev - name: Load cached virtual environment - uses: actions/cache@v2 + uses: actions/cache@v4 id: cache with: path: ~/venv/ diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000000000..cb9a3d7b7974aa --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,42 @@ +name: Self-hosted runner (benchmark) + +on: + schedule: + - cron: "17 2 * * *" + workflow_call: + +env: + HF_HOME: /mnt/cache + TF_FORCE_GPU_ALLOW_GROWTH: true + + +jobs: + benchmark: + name: Benchmark + runs-on: [single-gpu, nvidia-gpu, a10, ci] + container: + image: huggingface/transformers-all-latest-gpu + options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: Update clone + working-directory: /transformers + run: | + git fetch && git checkout ${{ github.sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: Benchmark (daily) + if: github.event_name == 'schedule' + working-directory: /transformers + run: | + python3 -m pip install optimum-benchmark>=0.3.0 + HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun + + - name: Benchmark (merged to main event) + if: github.event_name == 'push' && github.ref_name == 'main' + working-directory: /transformers + run: | + python3 -m pip install optimum-benchmark>=0.3.0 + HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results_merge_event --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun diff --git a/.github/workflows/build-ci-docker-images.yml b/.github/workflows/build-ci-docker-images.yml index 6f29df82769d82..9d947684ee867e 100644 --- a/.github/workflows/build-ci-docker-images.yml +++ b/.github/workflows/build-ci-docker-images.yml @@ -27,10 +27,10 @@ jobs: strategy: matrix: file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch", "examples-tf"] - continue-on-error: true + continue-on-error: true steps: - - + - name: Set tag run: | if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then @@ -61,4 +61,17 @@ jobs: REF=${{ github.sha }} file: "./docker/${{ matrix.file }}.dockerfile" push: ${{ contains(github.event.head_commit.message, 'ci-image]') || github.event_name == 'schedule' }} - tags: ${{ env.TAG }} \ No newline at end of file + tags: ${{ env.TAG }} + + notify: + runs-on: ubuntu-22.04 + if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }} + steps: + - name: Post to Slack + if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }} + uses: huggingface/hf-workflows/.github/actions/post-slack@main + with: + slack_channel: "#transformers-ci-circleci-images" + title: 🤗 New docker images for CircleCI are pushed. + status: ${{ job.status }} + slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index f113579691ea5c..c21faf2d747942 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -20,7 +20,8 @@ concurrency: jobs: latest-docker: name: "Latest PyTorch + TensorFlow [dev]" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -68,18 +69,9 @@ jobs: latest-torch-deepspeed-docker: name: "Latest PyTorch + DeepSpeed" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -114,18 +106,9 @@ jobs: # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`) latest-torch-deepspeed-docker-for-push-ci-daily-build: name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -165,7 +148,8 @@ jobs: name: "Doc builder" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -200,18 +184,9 @@ jobs: name: "Latest PyTorch [dev]" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -245,7 +220,8 @@ jobs: latest-pytorch-amd: name: "Latest PyTorch (AMD) [dev]" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -295,7 +271,8 @@ jobs: name: "Latest TensorFlow [dev]" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -330,7 +307,8 @@ jobs: latest-pytorch-deepspeed-amd: name: "PyTorch + DeepSpeed (AMD) [dev]" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -380,7 +358,8 @@ jobs: name: "Latest Pytorch + Quantization [dev]" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build-nightly-ci-docker-images.yml b/.github/workflows/build-nightly-ci-docker-images.yml index d7c18775a86e41..4b00a6d3fae366 100644 --- a/.github/workflows/build-nightly-ci-docker-images.yml +++ b/.github/workflows/build-nightly-ci-docker-images.yml @@ -13,18 +13,9 @@ concurrency: jobs: latest-with-torch-nightly-docker: name: "Nightly PyTorch + Stable TensorFlow" - runs-on: ubuntu-22.04 + runs-on: + group: aws-general-8-plus steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 @@ -50,18 +41,9 @@ jobs: nightly-torch-deepspeed-docker: name: "Nightly PyTorch + DeepSpeed" - runs-on: ubuntu-22.04 + runs-on: + group: aws-general-8-plus steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 @@ -82,4 +64,4 @@ jobs: build-args: | REF=main push: true - tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu \ No newline at end of file + tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml index 5ef7c7e7de9e94..c4f0b78986caea 100644 --- a/.github/workflows/build-past-ci-docker-images.yml +++ b/.github/workflows/build-past-ci-docker-images.yml @@ -16,7 +16,8 @@ jobs: fail-fast: false matrix: version: ["1.13", "1.12", "1.11"] - runs-on: ubuntu-22.04 + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -60,7 +61,8 @@ jobs: fail-fast: false matrix: version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"] - runs-on: ubuntu-22.04 + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml index e3e3b5f2df37f1..c55638ded1497c 100644 --- a/.github/workflows/build_documentation.yml +++ b/.github/workflows/build_documentation.yml @@ -1,6 +1,7 @@ name: Build documentation on: + workflow_dispatch: push: branches: - main @@ -15,7 +16,7 @@ jobs: commit_sha: ${{ github.sha }} package: transformers notebook_folder: transformers_doc - languages: de en es fr hi it ko pt tr zh ja te + languages: ar de en es fr hi it ko pt tr zh ja te custom_container: huggingface/transformers-doc-builder secrets: token: ${{ secrets.HUGGINGFACE_PUSH }} diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index c8d073ea34688f..f698f860b2f93c 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -14,5 +14,5 @@ jobs: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: transformers - languages: de en es fr hi it ko pt tr zh ja te + languages: ar de en es fr hi it ko pt tr zh ja te custom_container: huggingface/transformers-doc-builder diff --git a/.github/workflows/check_tiny_models.yml b/.github/workflows/check_tiny_models.yml index 56a84f776bf0af..a2b4846051a054 100644 --- a/.github/workflows/check_tiny_models.yml +++ b/.github/workflows/check_tiny_models.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: # Semantic version range syntax or exact version of a Python version python-version: '3.8' diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml index f88af8e39af27d..001e2c531d9bc8 100644 --- a/.github/workflows/model_jobs.yml +++ b/.github/workflows/model_jobs.yml @@ -12,6 +12,12 @@ on: slice_id: required: true type: number + runner: + required: true + type: string + docker: + required: true + type: string env: HF_HOME: /mnt/cache @@ -31,12 +37,14 @@ jobs: run_models_gpu: name: " " strategy: + max-parallel: 8 fail-fast: false matrix: folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }} - runs-on: ['${{ inputs.machine_type }}', nvidia-gpu, t4, daily-ci] + runs-on: + group: '${{ inputs.machine_type }}' container: - image: huggingface/transformers-all-latest-gpu + image: ${{ inputs.docker }} options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: Echo input and matrix info @@ -65,6 +73,18 @@ jobs: working-directory: /transformers run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + - name: Update / Install some packages (for Past CI) + if: ${{ contains(inputs.docker, '-past-') }} + working-directory: /transformers + run: | + python3 -m pip install -U datasets + + - name: Update / Install some packages (for Past CI) + if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }} + working-directory: /transformers + run: | + python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate + - name: NVIDIA-SMI run: | nvidia-smi @@ -78,25 +98,42 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ inputs.machine_type }}" + + if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ inputs.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -rs -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} + run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - name: Run test shell: bash run: | - mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports - echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt - echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" + mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" - - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports diff --git a/.github/workflows/model_jobs_amd.yml b/.github/workflows/model_jobs_amd.yml new file mode 100644 index 00000000000000..a7e6c7b1ccd576 --- /dev/null +++ b/.github/workflows/model_jobs_amd.yml @@ -0,0 +1,129 @@ +name: model jobs + +on: + workflow_call: + inputs: + folder_slices: + required: true + type: string + machine_type: + required: true + type: string + slice_id: + required: true + type: number + runner: + required: true + type: string + docker: + required: true + type: string + +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes + # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. + # This token is created under the bot `hf-transformers-bot`. + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true + RUN_PT_TF_CROSS_TESTS: 1 + CUDA_VISIBLE_DEVICES: 0,1 + +jobs: + run_models_gpu: + name: " " + strategy: + max-parallel: 1 # For now, not to parallelize. Can change later if it works well. + fail-fast: false + matrix: + folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }} + runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] + container: + image: ${{ inputs.docker }} + options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: Echo input and matrix info + shell: bash + run: | + echo "${{ inputs.folder_slices }}" + echo "${{ matrix.folders }}" + echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}" + + - name: Echo folder ${{ matrix.folders }} + shell: bash + # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to + # set the artifact folder names (because the character `/` is not allowed). + run: | + echo "${{ matrix.folders }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'models/'/'models_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: Update / Install some packages (for Past CI) + if: ${{ contains(inputs.docker, '-past-') }} + working-directory: /transformers + run: | + python3 -m pip install -U datasets + + - name: Update / Install some packages (for Past CI) + if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }} + working-directory: /transformers + run: | + python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate + + - name: ROCM-SMI + run: | + rocm-smi + + - name: ROCM-INFO + run: | + rocminfo | grep "Agent" -A 14 + + - name: Show ROCR environment + run: | + echo "ROCR: $ROCR_VISIBLE_DEVICES" + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run all tests on GPU + working-directory: /transformers + run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test" + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + + - name: Run test + shell: bash + run: | + mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" + + - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports diff --git a/.github/workflows/push-important-models.yml b/.github/workflows/push-important-models.yml index ef965396361116..41bcd43fcc6fc2 100644 --- a/.github/workflows/push-important-models.yml +++ b/.github/workflows/push-important-models.yml @@ -5,7 +5,6 @@ on: branches: [ main ] env: - IS_GITHUB_CI: "1" OUTPUT_SLACK_CHANNEL_ID: "C06L2SGMEEA" HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} HF_HOME: /mnt/cache @@ -86,7 +85,7 @@ jobs: - name: Run FA2 tests id: run_fa2_tests run: - pytest -rs -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_* + pytest -rsfE -m "flash_attn_test" --make-reports=${{ matrix.model-name }}_fa2_tests/ tests/${{ matrix.model-name }}/test_modeling_* - name: "Test suite reports artifacts: ${{ matrix.model-name }}_fa2_tests" if: ${{ always() }} @@ -108,7 +107,7 @@ jobs: id: run_integration_tests if: always() run: - pytest -rs -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_* + pytest -rsfE -k "IntegrationTest" --make-reports=tests_integration_${{ matrix.model-name }} tests/${{ matrix.model-name }}/test_modeling_* - name: "Test suite reports artifacts: tests_integration_${{ matrix.model-name }}" if: ${{ always() }} @@ -134,3 +133,10 @@ jobs: slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} waitForSSH: true + + benchmark: + name: Benchmark workflow + needs: get_modified_models + if: ${{ needs.get_modified_models.outputs.matrix != '[]' && needs.get_modified_models.outputs.matrix != '' && fromJson(needs.get_modified_models.outputs.matrix)[0] != null }} + uses: ./.github/workflows/benchmark.yml + secrets: inherit diff --git a/.github/workflows/release-conda.yml b/.github/workflows/release-conda.yml index 7a1990eec6b3d7..c0e28d7a510d7f 100644 --- a/.github/workflows/release-conda.yml +++ b/.github/workflows/release-conda.yml @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v1 + uses: actions/checkout@v4 - name: Install miniconda uses: conda-incubator/setup-miniconda@v2 diff --git a/.github/workflows/self-nightly-caller.yml b/.github/workflows/self-nightly-caller.yml new file mode 100644 index 00000000000000..5538e2d56e7490 --- /dev/null +++ b/.github/workflows/self-nightly-caller.yml @@ -0,0 +1,43 @@ +name: Self-hosted runner (nightly-ci) + + +on: + repository_dispatch: + schedule: + - cron: "17 2 * * *" + push: + branches: + - run_nightly_ci* + +jobs: + build_nightly_ci_images: + name: Build Nightly CI Docker Images + if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci')) + uses: ./.github/workflows/build-nightly-ci-docker-images.yml + secrets: inherit + + model-ci: + name: Model CI + needs: [build_nightly_ci_images] + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_models_gpu + slack_report_channel: "#transformers-ci-past-future" + runner: ci + docker: huggingface/transformers-all-latest-torch-nightly-gpu + ci_event: Nightly CI + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + needs: [build_nightly_ci_images] + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-past-future" + runner: ci + # test deepspeed nightly build with the latest release torch + docker: huggingface/transformers-pytorch-deepspeed-latest-gpu + ci_event: Nightly CI + working-directory-prefix: /workspace + secrets: inherit diff --git a/.github/workflows/self-nightly-past-ci-caller.yml b/.github/workflows/self-nightly-past-ci-caller.yml index 67840355960c8c..142399a6366ce6 100644 --- a/.github/workflows/self-nightly-past-ci-caller.yml +++ b/.github/workflows/self-nightly-past-ci-caller.yml @@ -2,32 +2,30 @@ name: Self-hosted runner (nightly-past-ci-caller) on: schedule: - # 2:17 am on each Sunday and Thursday - - - cron: "17 2 * * 0,4" + - cron: "17 2,14 * * *" push: branches: - - run_nightly_ci* - run_past_ci* jobs: - build_nightly_ci_images: - name: Build Nightly CI Docker Images - if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci')) - uses: ./.github/workflows/build-nightly-ci-docker-images.yml - secrets: inherit - - run_nightly_ci: - name: Nightly CI - needs: [build_nightly_ci_images] - uses: ./.github/workflows/self-nightly-scheduled.yml - secrets: inherit + get_number: + name: Get number + runs-on: ubuntu-22.04 + outputs: + run_number: ${{ steps.get_number.outputs.run_number }} + steps: + - name: Get number + id: get_number + run: | + echo "${{ github.run_number }}" + echo "$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" + echo "run_number=$(python3 -c 'print(int(${{ github.run_number }}) % 10)')" >> $GITHUB_OUTPUT run_past_ci_pytorch_1-13: name: PyTorch 1.13 - if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) - needs: [run_nightly_ci] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 0 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + uses: ./.github/workflows/self-past-caller.yml with: framework: pytorch version: "1.13" @@ -36,9 +34,9 @@ jobs: run_past_ci_pytorch_1-12: name: PyTorch 1.12 - if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) - needs: [run_past_ci_pytorch_1-13] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 1 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + uses: ./.github/workflows/self-past-caller.yml with: framework: pytorch version: "1.12" @@ -47,9 +45,9 @@ jobs: run_past_ci_pytorch_1-11: name: PyTorch 1.11 - if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) - needs: [run_past_ci_pytorch_1-12] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 2 && (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))) + uses: ./.github/workflows/self-past-caller.yml with: framework: pytorch version: "1.11" @@ -58,9 +56,9 @@ jobs: run_past_ci_tensorflow_2-11: name: TensorFlow 2.11 - if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_pytorch_1-11] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 3 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + uses: ./.github/workflows/self-past-caller.yml with: framework: tensorflow version: "2.11" @@ -69,9 +67,9 @@ jobs: run_past_ci_tensorflow_2-10: name: TensorFlow 2.10 - if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_tensorflow_2-11] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 4 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + uses: ./.github/workflows/self-past-caller.yml with: framework: tensorflow version: "2.10" @@ -80,9 +78,9 @@ jobs: run_past_ci_tensorflow_2-9: name: TensorFlow 2.9 - if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_tensorflow_2-10] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 5 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + uses: ./.github/workflows/self-past-caller.yml with: framework: tensorflow version: "2.9" @@ -91,9 +89,9 @@ jobs: run_past_ci_tensorflow_2-8: name: TensorFlow 2.8 - if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_tensorflow_2-9] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 6 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + uses: ./.github/workflows/self-past-caller.yml with: framework: tensorflow version: "2.8" @@ -102,9 +100,9 @@ jobs: run_past_ci_tensorflow_2-7: name: TensorFlow 2.7 - if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_tensorflow_2-8] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 7 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + uses: ./.github/workflows/self-past-caller.yml with: framework: tensorflow version: "2.7" @@ -113,9 +111,9 @@ jobs: run_past_ci_tensorflow_2-6: name: TensorFlow 2.6 - if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_tensorflow_2-7] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 8 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + uses: ./.github/workflows/self-past-caller.yml with: framework: tensorflow version: "2.6" @@ -124,9 +122,9 @@ jobs: run_past_ci_tensorflow_2-5: name: TensorFlow 2.5 - if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) - needs: [run_past_ci_tensorflow_2-6] - uses: ./.github/workflows/self-past.yml + needs: get_number + if: needs.get_number.outputs.run_number == 9 && (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')) + uses: ./.github/workflows/self-past-caller.yml with: framework: tensorflow version: "2.5" diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml deleted file mode 100644 index 875e715b068b6c..00000000000000 --- a/.github/workflows/self-nightly-scheduled.yml +++ /dev/null @@ -1,290 +0,0 @@ -name: Self-hosted runner (nightly-ci) - -# Note that each job's dependencies go into a corresponding docker file. -# -# For example for `run_torch_cuda_extensions_gpu` the docker image is -# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at -# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` - -on: - repository_dispatch: - workflow_call: - -env: - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - RUN_SLOW: yes - HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} - TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 - CUDA_VISIBLE_DEVICES: 0,1 - -jobs: - setup: - name: Setup - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - container: - image: huggingface/transformers-all-latest-torch-nightly-gpu - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - name: Update clone - working-directory: /transformers - run: | - git fetch && git checkout ${{ github.sha }} - - - name: Cleanup - working-directory: /transformers - run: | - rm -rf tests/__pycache__ - rm -rf tests/models/__pycache__ - rm -rf reports - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - id: set-matrix - name: Identify models to test - working-directory: /transformers/tests - run: | - echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT - - - name: NVIDIA-SMI - run: | - nvidia-smi - - run_tests_single_gpu: - name: Model tests - strategy: - fail-fast: false - matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - container: - image: huggingface/transformers-all-latest-torch-nightly-gpu - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - - run_tests_multi_gpu: - name: Model tests - strategy: - fail-fast: false - matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - container: - image: huggingface/transformers-all-latest-torch-nightly-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - - run_torch_cuda_extensions_gpu: - name: Torch CUDA extension tests - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - needs: setup - container: - image: huggingface/transformers-pytorch-deepspeed-nightly-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: Update clone - working-directory: /workspace/transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /workspace/transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: Remove cached torch extensions - run: rm -rf /github/home/.cache/torch_extensions/ - - # To avoid unknown test failures - - name: Pre build DeepSpeed *again* - working-directory: /workspace - run: | - python3 -m pip uninstall -y deepspeed - rm -rf DeepSpeed - git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build - DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /workspace/transformers - run: | - python utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /workspace/transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /workspace/transformers - run: | - python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_nightly - path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - - send_results: - name: Send results to webhook - runs-on: ubuntu-22.04 - if: always() - needs: [ - setup, - run_tests_single_gpu, - run_tests_multi_gpu, - run_torch_cuda_extensions_gpu - ] - steps: - - name: Preliminary job status - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - echo "Setup status: ${{ needs.setup.result }}" - - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} - CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Nightly CI - SETUP_STATUS: ${{ needs.setup.result }} - # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" - - - # delete-artifact - - uses: geekyeggo/delete-artifact@v2 - with: - name: | - single-* - multi-* diff --git a/.github/workflows/self-past-caller.yml b/.github/workflows/self-past-caller.yml new file mode 100644 index 00000000000000..1929a01c34d947 --- /dev/null +++ b/.github/workflows/self-past-caller.yml @@ -0,0 +1,40 @@ +name: Self-hosted runner (past-ci) + + +on: + workflow_call: + inputs: + framework: + required: true + type: string + version: + required: true + type: string + # Use this to control the commit to test against + sha: + default: 'main' + required: false + type: string + +jobs: + model-ci: + name: Model CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_models_gpu + slack_report_channel: "#transformers-ci-past-future" + runner: past-ci + docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu + ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }} + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-past-future" + runner: past-ci + docker: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu + ci_event: Past CI - ${{ inputs.framework }}-${{ inputs.version }} + secrets: inherit diff --git a/.github/workflows/self-past.yml b/.github/workflows/self-past.yml deleted file mode 100644 index ca47c454f6894a..00000000000000 --- a/.github/workflows/self-past.yml +++ /dev/null @@ -1,357 +0,0 @@ -name: Self-hosted runner (past-ci) - -# Note that each job's dependencies go into a corresponding docker file. -# -# For example for `run_torch_cuda_extensions_gpu` the docker image is -# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at -# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` - -on: - workflow_call: - inputs: - framework: - required: true - type: string - version: - required: true - type: string - # Use this to control the commit to test against - sha: - default: 'main' - required: false - type: string - -env: - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - RUN_SLOW: yes - HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} - TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 - CUDA_VISIBLE_DEVICES: 0,1 - -jobs: - setup: - name: Setup - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - container: - image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ inputs.sha }} - - - name: Cleanup - working-directory: /transformers - run: | - rm -rf tests/__pycache__ - rm -rf tests/models/__pycache__ - rm -rf reports - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - id: set-matrix - working-directory: /transformers - name: Identify models to test - run: | - cd tests - echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT - - run_tests_single_gpu: - name: Model tests - strategy: - fail-fast: false - matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - container: - image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu - options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ inputs.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: Update some packages - working-directory: /transformers - run: python3 -m pip install -U datasets - - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Install - if: inputs.framework == 'pytorch' - working-directory: /transformers - run: | - python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - - name: Save job name - if: ${{ always() }} - shell: bash - run: | - matrix_folders=${matrix_folders/'models_'/'models/'} - job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})" - echo "$job_name" - echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - - run_tests_multi_gpu: - name: Model tests - strategy: - fail-fast: false - matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - container: - image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ inputs.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: Update some packages - working-directory: /transformers - run: python3 -m pip install -U datasets - - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Install - if: inputs.framework == 'pytorch' - working-directory: /transformers - run: | - python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - - name: Save job name - if: ${{ always() }} - shell: bash - run: | - matrix_folders=${matrix_folders/'models_'/'models/'} - job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})" - echo "$job_name" - echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} - path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} - - run_torch_cuda_extensions_gpu: - name: Torch CUDA extension tests - if: inputs.framework == 'pytorch' - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, past-ci] - needs: setup - container: - image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu - options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: Update some packages - working-directory: /transformers - run: python3 -m pip install -U datasets - - - name: Install - working-directory: /transformers - run: | - python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate - - - name: Remove cached torch extensions - run: rm -rf /github/home/.cache/torch_extensions/ - - # To avoid unknown test failures - - name: Pre build DeepSpeed *again* - working-directory: / - run: | - python3 -m pip uninstall -y deepspeed - rm -rf DeepSpeed - git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build - DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check - - - name: NVIDIA-SMI - run: | - nvidia-smi - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }} - path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - - send_results: - name: Send results to webhook - runs-on: ubuntu-22.04 - if: always() - needs: [ - setup, - run_tests_single_gpu, - run_tests_multi_gpu, - run_torch_cuda_extensions_gpu - ] - steps: - - name: Preliminary job status - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - echo "Setup status: ${{ needs.setup.result }}" - - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - - # Create a directory to store test failure tables in the next step - - name: Create directory - run: mkdir test_failure_tables - - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} - CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }} - SETUP_STATUS: ${{ needs.setup.result }} - # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" - - # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - - name: Failure table artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }} - path: test_failure_tables - - # delete-artifact - - uses: geekyeggo/delete-artifact@v2 - with: - name: | - single-* - multi-* diff --git a/.github/workflows/self-pr-slow-ci.yml b/.github/workflows/self-pr-slow-ci.yml index 10a2156f210fbc..2287b5e3f31587 100644 --- a/.github/workflows/self-pr-slow-ci.yml +++ b/.github/workflows/self-pr-slow-ci.yml @@ -4,7 +4,7 @@ on: pull_request: paths: - "src/transformers/models/*/modeling_*.py" - - "tests/models/*/test_*.py" + - "tests/**/test_*.py" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -110,7 +110,10 @@ jobs: - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -v -rs --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} + run: | + export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})" + echo $CUDA_VISIBLE_DEVICES + python3 -m pytest -v -rsfE --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 8d68002e329418..6931c2f3eadcad 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -64,23 +64,24 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} test_map: ${{ steps.set-matrix.outputs.test_map }} + env: + # `CI_BRANCH_PUSH`: The branch name from the push event + # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event + # `CI_SHA_PUSH`: The commit SHA from the push event + # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables shell: bash - # `CI_BRANCH_PUSH`: The branch name from the push event - # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) - # `CI_SHA_PUSH`: The commit SHA from the push event - # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -159,6 +160,12 @@ jobs: container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -166,11 +173,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -256,6 +259,12 @@ jobs: # run_tests_torch_cuda_extensions_single_gpu, # run_tests_torch_cuda_extensions_multi_gpu ] + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: - name: Preliminary job status shell: bash @@ -271,11 +280,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -324,6 +329,7 @@ jobs: # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. run: | + pip install huggingface_hub pip install slack_sdk pip show slack_sdk python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 1bc02ccd826eb0..b328f65d34a5fe 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -40,23 +40,24 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} test_map: ${{ steps.set-matrix.outputs.test_map }} + env: + # `CI_BRANCH_PUSH`: The branch name from the push event + # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event + # `CI_SHA_PUSH`: The commit SHA from the push event + # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables shell: bash - # `CI_BRANCH_PUSH`: The branch name from the push event - # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) - # `CI_SHA_PUSH`: The commit SHA from the push event - # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -135,6 +136,12 @@ jobs: container: image: huggingface/transformers-all-latest-gpu-push-ci options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -142,11 +149,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -228,6 +231,12 @@ jobs: container: image: huggingface/transformers-all-latest-gpu-push-ci options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -235,11 +244,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -321,6 +326,12 @@ jobs: container: image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -328,11 +339,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -411,6 +418,12 @@ jobs: container: image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -418,11 +431,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -500,6 +509,12 @@ jobs: run_tests_torch_cuda_extensions_single_gpu, run_tests_torch_cuda_extensions_multi_gpu ] + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: - name: Preliminary job status shell: bash @@ -513,11 +528,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -563,6 +574,7 @@ jobs: # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. run: | - pip install slack_sdk + pip install huggingface_hub + pip install slack_sdk pip show slack_sdk python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" diff --git a/.github/workflows/self-scheduled-amd-mi210-caller.yml b/.github/workflows/self-scheduled-amd-mi210-caller.yml index 6abba6894aaffa..1c79b38a314e0b 100644 --- a/.github/workflows/self-scheduled-amd-mi210-caller.yml +++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml @@ -10,11 +10,46 @@ on: - run_amd_scheduled_ci_caller* jobs: - run_amd_ci: - name: AMD mi210 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) + model-ci: + name: Model CI uses: ./.github/workflows/self-scheduled-amd.yml with: - gpu_flavor: mi210 + job: run_models_gpu slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + torch-pipeline: + name: Torch pipeline CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + example-ci: + name: Example CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-deepspeed-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 secrets: inherit diff --git a/.github/workflows/self-scheduled-amd-mi250-caller.yml b/.github/workflows/self-scheduled-amd-mi250-caller.yml index 36365d4a67f1e2..fd151305716396 100644 --- a/.github/workflows/self-scheduled-amd-mi250-caller.yml +++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml @@ -10,11 +10,46 @@ on: - run_amd_scheduled_ci_caller* jobs: - run_amd_ci: - name: AMD mi250 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) + model-ci: + name: Model CI uses: ./.github/workflows/self-scheduled-amd.yml with: - gpu_flavor: mi250 + job: run_models_gpu slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + torch-pipeline: + name: Torch pipeline CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + example-ci: + name: Example CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-deepspeed-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 secrets: inherit diff --git a/.github/workflows/self-scheduled-amd-mi300-caller.yml b/.github/workflows/self-scheduled-amd-mi300-caller.yml deleted file mode 100644 index a9e7b934c34b77..00000000000000 --- a/.github/workflows/self-scheduled-amd-mi300-caller.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Self-hosted runner (AMD mi300 scheduled CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (AMD scheduled CI caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_scheduled_ci_caller* - -jobs: - run_amd_ci: - name: AMD mi300 - needs: build-docker-containers - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci')))) - uses: ./.github/workflows/self-scheduled-amd.yml - with: - gpu_flavor: mi300 - slack_report_channel: "#transformers-ci-daily-amd" - secrets: inherit diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml index e9f280f51ab43d..47f92cd6a2b086 100644 --- a/.github/workflows/self-scheduled-amd.yml +++ b/.github/workflows/self-scheduled-amd.yml @@ -3,10 +3,23 @@ name: Self-hosted runner (scheduled-amd) # Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the # CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes # us towards the limit of allowed jobs on GitHub Actions. + on: workflow_call: inputs: - gpu_flavor: + job: + required: true + type: string + slack_report_channel: + required: true + type: string + runner: + required: true + type: string + docker: + required: true + type: string + ci_event: required: true type: string @@ -18,7 +31,7 @@ env: RUN_SLOW: yes HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} - + NUM_SLICES: 2 # Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running. # This is done so that we avoid parallelizing the scheduled tests, to leave available @@ -42,7 +55,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: image: huggingface/transformers-pytorch-amd-gpu options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -50,25 +63,29 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" setup: + if: contains(fromJSON('["run_models_gpu"]'), inputs.job) name: Setup needs: check_runners strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: image: huggingface/transformers-pytorch-amd-gpu options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} + folder_slices: ${{ steps.set-matrix.outputs.folder_slices }} + slice_ids: ${{ steps.set-matrix.outputs.slice_ids }} steps: - name: Update clone working-directory: /transformers @@ -90,7 +107,8 @@ jobs: name: Identify models to test working-directory: /transformers/tests run: | - echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT + echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT + echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - name: ROCM-SMI run: | @@ -99,6 +117,7 @@ jobs: - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -108,99 +127,38 @@ jobs: run: | python3 utils/print_env.py - run_models_gpu_single_gpu: + run_models_gpu: + if: ${{ inputs.job == 'run_models_gpu' }} name: Single GPU tests + needs: setup strategy: max-parallel: 1 # For now, not to parallelize. Can change later if it works well. fail-fast: false matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [single-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: ROCM-SMI - run: | - rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + machine_type: [single-gpu, multi-gpu] + slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} + uses: ./.github/workflows/model_jobs_amd.yml + with: + folder_slices: ${{ needs.setup.outputs.folder_slices }} + machine_type: ${{ matrix.machine_type }} + slice_id: ${{ matrix.slice_id }} + runner: ${{ inputs.runner }} + docker: ${{ inputs.docker }} + secrets: inherit - run_models_gpu_multi_gpu: - name: Multi GPU tests + run_pipelines_torch_gpu: + if: ${{ inputs.job == 'run_pipelines_torch_gpu' }} + name: PyTorch pipelines + needs: check_runners strategy: - max-parallel: 1 fail-fast: false matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + machine_type: [single-gpu, multi-gpu] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: ${{ inputs.docker }} options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup steps: - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - name: Update clone working-directory: /transformers run: git fetch && git checkout ${{ github.sha }} @@ -212,9 +170,11 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -228,33 +188,35 @@ jobs: working-directory: /transformers run: pip freeze - - name: Run all tests on GPU + - name: Run all pipeline tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test" + run: | + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test" - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports run_examples_gpu: - name: Examples tests + if: ${{ inputs.job == 'run_examples_gpu' }} + name: Examples directory + needs: check_runners strategy: fail-fast: false matrix: machine_type: [single-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: ${{ inputs.docker }} options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup steps: - name: Update clone working-directory: /transformers @@ -267,9 +229,11 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -301,73 +265,17 @@ jobs: name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports - run_pipelines_torch_gpu: - name: PyTorch pipelines tests - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: ROCM-SMI - run: | - rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all pipeline tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - run_torch_cuda_extensions_gpu: + if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} name: Torch ROCm deepspeed tests + needs: check_runners strategy: fail-fast: false matrix: machine_type: [single-gpu, multi-gpu] - - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - needs: setup + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: - image: huggingface/transformers-pytorch-deepspeed-amd-gpu + image: ${{ inputs.docker }} options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: Update clone @@ -381,6 +289,7 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 @@ -414,106 +323,27 @@ jobs: name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - run_extract_warnings: - name: Extract warnings in CI artifacts - runs-on: ubuntu-22.04 - if: always() - needs: [ - check_runner_status, - check_runners, - setup, - run_models_gpu_single_gpu, - run_models_gpu_multi_gpu, - run_examples_gpu, - run_pipelines_torch_gpu, - run_torch_cuda_extensions_gpu - ] - steps: - - name: Checkout transformers - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - - name: Install transformers - run: pip install transformers - - - name: Show installed libraries and their versions - run: pip freeze - - - name: Create output directory - run: mkdir warnings_in_ci - - - uses: actions/download-artifact@v4 - with: - path: warnings_in_ci - - - name: Show artifacts - run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')" - working-directory: warnings_in_ci - - - name: Extract warnings in CI artifacts - run: | - python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh - echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')" - - - name: Upload artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: warnings_in_ci - path: warnings_in_ci/selected_warnings.json - send_results: - name: Send results to webhook - runs-on: ubuntu-22.04 - if: always() + name: Slack Report needs: [ check_runner_status, check_runners, setup, - run_models_gpu_single_gpu, - run_models_gpu_multi_gpu, - run_examples_gpu, + run_models_gpu, run_pipelines_torch_gpu, - run_torch_cuda_extensions_gpu, - run_extract_warnings + run_examples_gpu, + run_torch_cuda_extensions_gpu ] - steps: - - name: Preliminary job status - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - echo "Runner availability: ${{ needs.check_runner_status.result }}" - echo "Runner status: ${{ needs.check_runners.result }}" - echo "Setup status: ${{ needs.setup.result }}" - - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID_DAILY_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Scheduled CI (AMD) - ${{ inputs.gpu_flavor }} - CI_SHA: ${{ github.sha }} - CI_WORKFLOW_REF: ${{ github.workflow_ref }} - RUNNER_STATUS: ${{ needs.check_runner_status.result }} - RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} - SETUP_STATUS: ${{ needs.setup.result }} - # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - sudo apt-get install -y curl - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" - - # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - - name: Failure table artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: test_failure_tables - path: test_failure_tables + if: ${{ always() }} + uses: ./.github/workflows/slack-report.yml + with: + job: ${{ inputs.job }} + # This would be `skipped` if `setup` is skipped. + setup_status: ${{ needs.setup.result }} + slack_report_channel: ${{ inputs.slack_report_channel }} + # This would be an empty string if `setup` is skipped. + folder_slices: ${{ needs.setup.outputs.folder_slices }} + quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }} + ci_event: ${{ inputs.ci_event }} + + secrets: inherit diff --git a/.github/workflows/self-scheduled-caller.yml b/.github/workflows/self-scheduled-caller.yml index 40689c629a09bf..75ea3bb24bc7fa 100644 --- a/.github/workflows/self-scheduled-caller.yml +++ b/.github/workflows/self-scheduled-caller.yml @@ -16,6 +16,9 @@ jobs: with: job: run_models_gpu slack_report_channel: "#transformers-ci-daily-models" + runner: daily-ci + docker: huggingface/transformers-all-latest-gpu + ci_event: Daily CI secrets: inherit torch-pipeline: @@ -24,6 +27,9 @@ jobs: with: job: run_pipelines_torch_gpu slack_report_channel: "#transformers-ci-daily-pipeline-torch" + runner: daily-ci + docker: huggingface/transformers-pytorch-gpu + ci_event: Daily CI secrets: inherit tf-pipeline: @@ -32,6 +38,9 @@ jobs: with: job: run_pipelines_tf_gpu slack_report_channel: "#transformers-ci-daily-pipeline-tf" + runner: daily-ci + docker: huggingface/transformers-tensorflow-gpu + ci_event: Daily CI secrets: inherit example-ci: @@ -40,6 +49,9 @@ jobs: with: job: run_examples_gpu slack_report_channel: "#transformers-ci-daily-examples" + runner: daily-ci + docker: huggingface/transformers-all-latest-gpu + ci_event: Daily CI secrets: inherit deepspeed-ci: @@ -48,6 +60,10 @@ jobs: with: job: run_torch_cuda_extensions_gpu slack_report_channel: "#transformers-ci-daily-deepspeed" + runner: daily-ci + docker: huggingface/transformers-pytorch-deepspeed-latest-gpu + ci_event: Daily CI + working-directory-prefix: /workspace secrets: inherit quantization-ci: @@ -56,4 +72,7 @@ jobs: with: job: run_quantization_torch_gpu slack_report_channel: "#transformers-ci-daily-quantization" + runner: daily-ci + docker: huggingface/transformers-quantization-latest-gpu + ci_event: Daily CI secrets: inherit diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 5911c81bf4f95d..1a6f4a485430d4 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -15,6 +15,19 @@ on: slack_report_channel: required: true type: string + runner: + required: true + type: string + docker: + required: true + type: string + ci_event: + required: true + type: string + working-directory-prefix: + default: '' + required: false + type: string env: HF_HOME: /mnt/cache @@ -37,8 +50,9 @@ jobs: name: Setup strategy: matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-all-latest-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -70,7 +84,7 @@ jobs: run: | echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - + - id: set-matrix-quantization if: ${{ inputs.job == 'run_quantization_torch_gpu' }} name: Identify quantization method to test @@ -89,13 +103,15 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} uses: ./.github/workflows/model_jobs.yml with: folder_slices: ${{ needs.setup.outputs.folder_slices }} machine_type: ${{ matrix.machine_type }} slice_id: ${{ matrix.slice_id }} + runner: ${{ inputs.runner }} + docker: ${{ inputs.docker }} secrets: inherit run_pipelines_torch_gpu: @@ -104,8 +120,9 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-pytorch-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -131,22 +148,39 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports run_pipelines_tf_gpu: if: ${{ inputs.job == 'run_pipelines_tf_gpu' }} @@ -154,8 +188,9 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-tensorflow-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -182,22 +217,39 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ always() }} run: | - cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt + cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports + name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports run_examples_gpu: if: ${{ inputs.job == 'run_examples_gpu' }} @@ -205,8 +257,9 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + machine_type: [aws-g4dn-2xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-all-latest-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -232,23 +285,40 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run examples tests on GPU working-directory: /transformers run: | pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports + name: ${{ env.machine_type }}_run_examples_gpu_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports run_torch_cuda_extensions_gpu: if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} @@ -256,70 +326,108 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: - image: huggingface/transformers-pytorch-deepspeed-latest-gpu + image: ${{ inputs.docker }} options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: Update clone - working-directory: /workspace/transformers + working-directory: ${{ inputs.working-directory-prefix }}/transformers run: git fetch && git checkout ${{ github.sha }} - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /workspace/transformers + working-directory: ${{ inputs.working-directory-prefix }}/transformers run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + - name: Update / Install some packages (for Past CI) + if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }} + working-directory: ${{ inputs.working-directory-prefix }}/transformers + run: | + python3 -m pip install -U datasets + python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate + - name: Remove cached torch extensions run: rm -rf /github/home/.cache/torch_extensions/ # To avoid unknown test failures - - name: Pre build DeepSpeed *again* - working-directory: /workspace + - name: Pre build DeepSpeed *again* (for daily CI) + if: ${{ contains(inputs.ci_event, 'Daily CI') }} + working-directory: ${{ inputs.working-directory-prefix }}/ run: | python3 -m pip uninstall -y deepspeed DS_DISABLE_NINJA=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + # To avoid unknown test failures + - name: Pre build DeepSpeed *again* (for nightly & Past CI) + if: ${{ contains(inputs.ci_event, 'Nightly CI') || contains(inputs.ci_event, 'Past CI') }} + working-directory: ${{ inputs.working-directory-prefix }}/ + run: | + python3 -m pip uninstall -y deepspeed + rm -rf DeepSpeed + git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build + DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check + - name: NVIDIA-SMI run: | nvidia-smi - name: Environment - working-directory: /workspace/transformers + working-directory: ${{ inputs.working-directory-prefix }}/transformers run: | - python utils/print_env.py + python3 utils/print_env.py - name: Show installed libraries and their versions - working-directory: /workspace/transformers + working-directory: ${{ inputs.working-directory-prefix }}/transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all tests on GPU - working-directory: /workspace/transformers + working-directory: ${{ inputs.working-directory-prefix }}/transformers run: | - python -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt + run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - path: /workspace/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports run_quantization_torch_gpu: if: ${{ inputs.job == 'run_quantization_torch_gpu' }} name: " " needs: setup strategy: + max-parallel: 4 fail-fast: false matrix: folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }} - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-quantization-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -354,22 +462,39 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run quantization tests on GPU working-directory: /transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports + name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports run_extract_warnings: # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic. @@ -434,5 +559,6 @@ jobs: # This would be an empty string if `setup` is skipped. folder_slices: ${{ needs.setup.outputs.folder_slices }} quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }} - + ci_event: ${{ inputs.ci_event }} + secrets: inherit diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml index 0d1197a05d122a..ee2962ba89c37f 100644 --- a/.github/workflows/slack-report.yml +++ b/.github/workflows/slack-report.yml @@ -18,7 +18,12 @@ on: quantization_matrix: required: true type: string + ci_event: + required: true + type: string +env: + TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} jobs: send_results: @@ -43,7 +48,7 @@ jobs: CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: scheduled + CI_EVENT: ${{ inputs.ci_event }} CI_SHA: ${{ github.sha }} CI_WORKFLOW_REF: ${{ github.workflow_ref }} CI_TEST_JOB: ${{ inputs.job }} @@ -54,6 +59,7 @@ jobs: # empty string, and the called script still get one argument (which is the emtpy string). run: | sudo apt-get install -y curl + pip install huggingface_hub pip install slack_sdk pip show slack_sdk python utils/notification_service.py "${{ inputs.folder_slices }}" @@ -73,7 +79,7 @@ jobs: CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }} - CI_EVENT: scheduled + CI_EVENT: ${{ inputs.ci_event }} CI_SHA: ${{ github.sha }} CI_TEST_JOB: ${{ inputs.job }} SETUP_STATUS: ${{ inputs.setup_status }} @@ -81,6 +87,7 @@ jobs: # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`. run: | sudo apt-get install -y curl + pip install huggingface_hub pip install slack_sdk pip show slack_sdk python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml index 3319be368a5cba..db649876f60492 100644 --- a/.github/workflows/ssh-runner.yml +++ b/.github/workflows/ssh-runner.yml @@ -14,7 +14,6 @@ on: required: true env: - IS_GITHUB_CI: "1" HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} HF_HOME: /mnt/cache TRANSFORMERS_IS_CI: yes @@ -54,11 +53,33 @@ jobs: - name: NVIDIA-SMI run: | nvidia-smi - + + - name: Store Slack infos + #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step + shell: bash + run: | + echo "${{ github.actor }}" + github_actor=${{ github.actor }} + github_actor=${github_actor/'-'/'_'} + echo "$github_actor" + echo "github_actor=$github_actor" >> $GITHUB_ENV + + - name: Store Slack infos + #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step + shell: bash + run: | + echo "${{ env.github_actor }}" + if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then + echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV + else + echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV + fi + - name: Tailscale # In order to be able to SSH when a test fails uses: huggingface/tailscale-action@main with: authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }} - slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} + slackChannel: ${{ env.SLACKCHANNEL }} slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} waitForSSH: true + sshTimeout: 15m diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 4fd4a8cb7bd9f9..65eaf755ab3a69 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -9,13 +9,15 @@ jobs: name: Close Stale Issues if: github.repository == 'huggingface/transformers' runs-on: ubuntu-22.04 + permissions: + issues: write env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.8 diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml new file mode 100644 index 00000000000000..29a11e9354dbb1 --- /dev/null +++ b/.github/workflows/trufflehog.yml @@ -0,0 +1,18 @@ +on: + push: + +name: Secret Leaks + +permissions: + contents: read + +jobs: + trufflehog: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Secret Scanning + uses: trufflesecurity/trufflehog@main diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c67e83b8fa2b4b..4d62a44ab250d5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,7 +61,10 @@ feedback. The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter. Before you report an issue, we would really appreciate it if you could **make sure the bug was not -already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) first. This helps us respond quicker to fixing issues related to the library versus general questions. +already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) or on our [discord](https://discord.com/invite/hugging-face-879548962464493619) first. This helps us respond quicker to fixing issues related to the library versus general questions. + +> [!TIP] +> We have a [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat), and we highly encourage you to ask all your questions there. There is always a chance your bug can be fixed with a simple flag 👾🔫 Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it: @@ -129,7 +132,7 @@ You will need basic `git` proficiency to contribute to manual. Type `git --help` in a shell and enjoy! If you prefer books, [Pro Git](https://git-scm.com/book/en/v2) is a very good reference. -You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L426)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing: +You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing: 1. Fork the [repository](https://github.com/huggingface/transformers) by clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code @@ -160,7 +163,7 @@ You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main If 🤗 Transformers was already installed in the virtual environment, remove it with `pip uninstall transformers` before reinstalling it in editable mode with the `-e` flag. - + Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a failure with this command. If that's the case make sure to install the Deep Learning framework you are working with (PyTorch, TensorFlow and/or Flax) then do: @@ -219,7 +222,7 @@ You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check make sure you install the documentation builder: - + ```bash pip install ".[docs]" ``` @@ -338,12 +341,12 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_ne RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification ``` -Like the slow tests, there are other environment variables available which not enabled by default during testing: +Like the slow tests, there are other environment variables available which are not enabled by default during testing: - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers. - `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration. - `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration. -More environment variables and additional information can be found in the [testing_utils.py](src/transformers/testing_utils.py). +More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py). 🤗 Transformers uses `pytest` as a test runner only. It doesn't use any `pytest`-specific features in the test suite itself. diff --git a/Makefile b/Makefile index f9b2a8c9a7c620..d3998327cc71f1 100644 --- a/Makefile +++ b/Makefile @@ -53,15 +53,14 @@ quality: @python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1) ruff check $(check_dirs) setup.py conftest.py ruff format --check $(check_dirs) setup.py conftest.py - python utils/custom_init_isort.py --check_only python utils/sort_auto_mappings.py --check_only python utils/check_doc_toc.py + python utils/check_docstrings.py --check_all # Format source code automatically and check is there are any problems left that need manual fixing extra_style_checks: - python utils/custom_init_isort.py python utils/sort_auto_mappings.py python utils/check_doc_toc.py --fix_and_overwrite diff --git a/README.md b/README.md index d87b55414ce45c..a2325ae037624e 100644 --- a/README.md +++ b/README.md @@ -25,39 +25,31 @@ limitations under the License.

- - Build - - - GitHub - - - Documentation - - - GitHub release - - - Contributor Covenant - + Build + GitHub + Documentation + GitHub release + Contributor Covenant DOI

English | - 简体中文 | - 繁體中文 | - 한국어 | - Español | - 日本語 | - हिन्दी | - Русский | - Рortuguês | - తెలుగు | - Français | - Deutsch | - Tiếng Việt | + 简体中文 | + 繁體中文 | + 한국어 | + Español | + 日本語 | + हिन्दी | + Русский | + Рortuguês | + తెలుగు | + Français | + Deutsch | + Tiếng Việt | + العربية | + اردو |

diff --git a/SECURITY.md b/SECURITY.md index f5a3acc5a91b93..431b17a85042dc 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,7 +14,7 @@ Models uploaded on the Hugging Face Hub come in different formats. We heavily re models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized by the transformers library), as developed specifically to prevent arbitrary code execution on your system. -To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetenstors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model. +To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model. ### Remote code @@ -36,5 +36,4 @@ Please inspect the code of the tools before passing them to the Agent to protect ## Reporting a Vulnerability -🤗 Please feel free to submit vulnerability reports to our private bug bounty program at https://hackerone.com/hugging_face. You'll need to request access to the program by emailing security@huggingface.co. -Note that you'll need to be invited to our program, so send us a quick email at security@huggingface.co if you've found a vulnerability. +Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software. diff --git a/awesome-transformers.md b/awesome-transformers.md index 2ecdd3406f7095..d55e276841a3b0 100644 --- a/awesome-transformers.md +++ b/awesome-transformers.md @@ -596,7 +596,7 @@ Keywords: Data-Centric AI, Data Quality, Noisy Labels, Outlier Detection, Active ## [BentoML](https://github.com/bentoml/BentoML) -[BentoML](https://github.com/bentoml) is the unified framework for for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. +[BentoML](https://github.com/bentoml) is the unified framework for building, shipping, and scaling production-ready AI applications incorporating traditional ML, pre-trained AI models, Generative and Large Language Models. All Hugging Face models and pipelines can be seamlessly integrated into BentoML applications, enabling the running of models on the most suitable hardware and independent scaling based on usage. Keywords: BentoML, Framework, Deployment, AI Applications diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 9e38c1f70a14ae..304bbd4441cf66 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -32,6 +32,8 @@ from git import Repo +from huggingface_hub import HfApi + from optimum_benchmark import Benchmark from optimum_benchmark_wrapper import main @@ -99,7 +101,7 @@ def summarize(run_dir, metrics, expand_metrics=False): # post-processing of report: show a few selected/important metric for metric in metrics: keys = metric.split(".") - value = report + value = report.to_dict() current = metrics_values for key in keys: # Avoid KeyError when a user's specified metric has typo. @@ -143,7 +145,6 @@ def summarize(run_dir, metrics, expand_metrics=False): with open(os.path.join(report_dir, "summary.json"), "w") as fp: json.dump(summary, fp, indent=4) - # TODO: upload to Hub return summaries @@ -191,7 +192,6 @@ def combine_summaries(summaries): with open(os.path.join(exp_run_dir, "summary.json"), "w") as fp: json.dump(combined, fp, indent=4) - # TODO: upload to Hub print(json.dumps(combined, indent=4)) return combined @@ -216,6 +216,11 @@ def list_str(values): help="Comma-separated list of branch names and/or commit sha values on which the benchmark will run. If `diff` is specified, it will run on both the current head and the `main` branch.", ) parser.add_argument("--metrics", type=str, help="The metrics to be included in the summary.") + + parser.add_argument("--repo_id", type=str, default=None, help="The repository to which the file will be uploaded.") + parser.add_argument("--path_in_repo", type=str, default=None, help="Relative filepath in the repo.") + parser.add_argument("--token", type=str, default=None, help="A valid user access token (string).") + args, optimum_benchmark_args = parser.parse_known_args() repo = Repo(PATH_TO_REPO) @@ -308,3 +313,14 @@ def list_str(values): json.dump(run_summaries, fp, indent=4) combined_summary = combine_summaries(run_summaries) + + if args.repo_id is not None and args.path_in_repo is not None: + # Upload to Hub + api = HfApi() + api.upload_folder( + folder_path=exp_run_dir, + path_in_repo=args.path_in_repo, + repo_id=args.repo_id, + repo_type="dataset", + token=args.token, + ) diff --git a/conftest.py b/conftest.py index 3f2dae258b211c..40e43f25e8933d 100644 --- a/conftest.py +++ b/conftest.py @@ -53,7 +53,7 @@ "test_torch_save_load", "test_initialization", "test_forward_signature", - "test_model_common_attributes", + "test_model_get_set_embeddings", "test_model_main_input_name", "test_correct_missing_keys", "test_tie_model_weights", diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile index c9200799ae1ae4..1f09626d8904f7 100644 --- a/docker/consistency.dockerfile +++ b/docker/consistency.dockerfile @@ -2,14 +2,15 @@ FROM python:3.10-slim ENV PYTHONDONTWRITEBYTECODE=1 USER root ARG REF=main -RUN apt-get update && apt-get install -y time git pkg-config make git-lfs +RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs ENV UV_PYTHON=/usr/local/bin/python RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython -RUN uv pip install --no-cache-dir --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir tensorflow-cpu tf-keras -RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,vision,testing]" +RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu +# tensorflow pin matching setup.py +RUN uv pip install --no-cache-dir pypi-kenlm +RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16" +RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]" RUN git lfs install RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean - +RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file diff --git a/docker/torch-light.dockerfile b/docker/torch-light.dockerfile index 524a68fd55407f..710a599abbe935 100644 --- a/docker/torch-light.dockerfile +++ b/docker/torch-light.dockerfile @@ -6,6 +6,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" +RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]" RUN pip uninstall -y transformers \ No newline at end of file diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index 378a65d1bf37b8..9c5e3c91415745 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -9,7 +9,7 @@ SHELL ["sh", "-lc"] # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant # to be used as arguments for docker build (so far). -ARG PYTORCH='2.3.0' +ARG PYTORCH='2.4.0' # (not always a valid torch version) ARG INTEL_TORCH_EXT='2.3.0' # Example: `cu102`, `cu113`, etc. diff --git a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile index fc6f912235be10..d31e1cae553407 100644 --- a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile @@ -22,7 +22,7 @@ RUN apt update && \ apt clean && \ rm -rf /var/lib/apt/lists/* -RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic<2" +RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic>=2.0.0" RUN python3 -m pip uninstall -y apex torch torchvision torchaudio RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM --no-cache-dir diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile index f5ca0222a34f0c..eeaf728cab712a 100644 --- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -42,12 +42,12 @@ RUN python3 -m pip uninstall -y deepspeed # This has to be run (again) inside the GPU VMs running the tests. # The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests. # TODO: Find out why test fail. -RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 +RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 # When installing in editable mode, `transformers` is not recognized as a package. # this line must be added in order for python to be aware of transformers. RUN cd transformers && python3 setup.py develop # The base image ships with `pydantic==1.8.2` which is not working - i.e. the next command fails -RUN python3 -m pip install -U --no-cache-dir "pydantic<2" +RUN python3 -m pip install -U --no-cache-dir "pydantic>=2.0.0" RUN python3 -c "from deepspeed.launcher.runner import main" diff --git a/docker/transformers-pytorch-gpu/Dockerfile b/docker/transformers-pytorch-gpu/Dockerfile index c9f77a78ce9b83..2c1f153eef275e 100644 --- a/docker/transformers-pytorch-gpu/Dockerfile +++ b/docker/transformers-pytorch-gpu/Dockerfile @@ -11,7 +11,7 @@ ARG REF=main RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF # If set to nothing, will install the latest version -ARG PYTORCH='2.3.0' +ARG PYTORCH='2.4.0' ARG TORCH_VISION='' ARG TORCH_AUDIO='' # Example: `cu102`, `cu113`, etc. diff --git a/docs/TRANSLATING.md b/docs/TRANSLATING.md index 420e7a8b16a1c8..49747821f476f0 100644 --- a/docs/TRANSLATING.md +++ b/docs/TRANSLATING.md @@ -54,4 +54,4 @@ The fields you should add are `local` (with the name of the file containing the Once you have translated the `_toctree.yml` file, you can start translating the [MDX](https://mdxjs.com/) files associated with your docs chapter. -> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu and @MKhalusova. +> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu. diff --git a/docs/source/ar/_config.py b/docs/source/ar/_config.py new file mode 100644 index 00000000000000..f49e4e4731965a --- /dev/null +++ b/docs/source/ar/_config.py @@ -0,0 +1,14 @@ +# docstyle-ignore +INSTALL_CONTENT = """ +# Transformers installation +! pip install transformers datasets evaluate accelerate +# To install from source instead of the last release, comment the command above and uncomment the following one. +# ! pip install git+https://github.com/huggingface/transformers.git +""" + +notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] +black_avoid_patterns = { + "{processor_class}": "FakeProcessorClass", + "{model_class}": "FakeModelClass", + "{object_class}": "FakeObjectClass", +} diff --git a/docs/source/ar/_toctree.yml b/docs/source/ar/_toctree.yml new file mode 100644 index 00000000000000..39e0ae14e19c29 --- /dev/null +++ b/docs/source/ar/_toctree.yml @@ -0,0 +1,892 @@ +- sections: + - local: index + title: 🤗 المحولات + - local: quicktour + title: جولة سريعة + - local: installation + title: التثبيت + title: البدء +- sections: + - local: pipeline_tutorial + title: تشغيل الاستنتاج باستخدام خطوط الأنابيب + - local: autoclass_tutorial + title: كتابة تعليمات برمجية متكيفه باستخدام AutoClass + - local: preprocessing + title: معالجة البيانات مسبقًا + - local: training + title: ضبط نموذج مسبق التدريب + - local: run_scripts + title: التدريب باستخدام نص برمجي + - local: accelerate + title: إعداد تدريب موزع باستخدام 🤗 Accelerate + - local: peft + title: تحميل النماذج المخصصة وتدريبها باستخدام 🤗 PEFT + - local: model_sharing + title: مشاركة نموذجك + - local: agents + title: الوكلاء + - local: llm_tutorial + title: التوليد باستخدام LLMs + - local: conversations + title: الدردشة مع المحولات + title: البرامج التعليمية +# - sections: +# - isExpanded: false +# sections: +# - local: tasks/sequence_classification +# title: تصنيف النصوص +# - local: tasks/token_classification +# title: تصنيف الرموز +# - local: tasks/question_answering +# title: الإجابة على الأسئلة +# - local: tasks/language_modeling +# title: نمذجة اللغة السببية +# - local: tasks/masked_language_modeling +# title: نمذجة اللغة المقنعة +# - local: tasks/translation +# title: الترجمة +# - local: tasks/summarization +# title: التلخيص +# - local: tasks/multiple_choice +# title: الاختيار المتعدد +# title: معالجة اللغات الطبيعية +# - isExpanded: false +# sections: +# - local: tasks/audio_classification +# title: تصنيف الصوت +# - local: tasks/asr +# title: التعرف التلقائي على الكلام +# title: الصوت +# - isExpanded: false +# sections: +# - local: tasks/image_classification +# title: تصنيف الصور +# - local: tasks/semantic_segmentation +# title: تجزئة الصور +# - local: tasks/video_classification +# title: تصنيف الفيديو +# - local: tasks/object_detection +# title: اكتشاف الأشياء +# - local: tasks/zero_shot_object_detection +# title: اكتشاف الأشياء بدون تدريب +# - local: tasks/zero_shot_image_classification +# title: تصنيف الصور بدون تدريب +# - local: tasks/monocular_depth_estimation +# title: تقدير العمق +# - local: tasks/image_to_image +# title: صورة إلى صورة +# - local: tasks/image_feature_extraction +# title: استخراج ميزات الصورة +# - local: tasks/mask_generation +# title: توليد القناع +# - local: tasks/knowledge_distillation_for_image_classification +# title: التقليل المعرفي للرؤية الحاسوبية +# title: الرؤية الحاسوبية +# - isExpanded: false +# sections: +# - local: tasks/image_captioning +# title: وصف الصور Image captioning +# - local: tasks/document_question_answering +# title: الإجابة على أسئلة المستندات +# - local: tasks/visual_question_answering +# title: الإجابة على الأسئلة المرئية +# - local: tasks/text-to-speech +# title: تحويل النص إلى كلام +# title: المتعددة الوسائط +# - isExpanded: false +# sections: +# - local: generation_strategies +# title: تخصيص استراتيجية التوليد +# - local: kv_cache +# title: أفضل الممارسات للتوليد باستخدام ذاكرة التخزين المؤقت +# title: التوليد +# - isExpanded: false +# sections: +# - local: tasks/idefics +# title: مهام الصور مع IDEFICS +# - local: tasks/prompting +# title: دليل إرشادي لمحفزات النماذج اللغوية الكبيرة +# title: الإرشاد +# title: أدلة المهام +# - sections: +# - local: fast_tokenizers +# title: استخدم برامج التجزئة السريعة من 🤗 Tokenizers +# - local: multilingual +# title: تشغيل الاستنتاج باستخدام نماذج متعددة اللغات +# - local: create_a_model +# title: استخدام واجهات برمجة التطبيقات الخاصة بالنموذج +# - local: custom_models +# title: مشاركة نموذج مخصص +# - local: chat_templating +# title: قوالب لنماذج الدردشة +# - local: trainer +# title: المدرب +# - local: sagemaker +# title: تشغيل التدريب على Amazon SageMaker +# - local: serialization +# title: التصدير إلى ONNX +# - local: tflite +# title: التصدير إلى TFLite +# - local: torchscript +# title: التصدير إلى TorchScript +# - local: benchmarks +# title: المعايير +# - local: notebooks +# title: دفاتر الملاحظات مع الأمثلة +# - local: community +# title: موارد المجتمع +# - local: troubleshooting +# title: استكشاف الأخطاء وإصلاحها +# - local: gguf +# title: التوافق مع ملفات GGUF +# title: أدلة المطورين +# - sections: +# - local: quantization/overview +# title: نظرة عامة +# - local: quantization/bitsandbytes +# title: bitsandbytes +# - local: quantization/gptq +# title: GPTQ +# - local: quantization/awq +# title: AWQ +# - local: quantization/aqlm +# title: AQLM +# - local: quantization/quanto +# title: Quanto +# - local: quantization/eetq +# title: EETQ +# - local: quantization/hqq +# title: HQQ +# - local: quantization/optimum +# title: Optimum +# - local: quantization/contribute +# title: المساهمة بطريقة جديدة للتكميم +# title: أساليب التكميم +# - sections: +# - local: performance +# title: الأداء-نظرة عامة +# - local: llm_optims +# title: تحسين الاستدلال LLM +# - sections: +# - local: perf_train_gpu_one +# title: استخدام عدة وحدات معالجة رسوميات (GPUs) بشكل متوازٍ +# - local: perf_train_gpu_many +# title: وحدات معالجة الرسومات (GPU) متعددة والتوازي +# - local: fsdp +# title: Fully Sharded Data Parallel +# - local: deepspeed +# title: DeepSpeed +# - local: perf_train_cpu +# title: التدريب الفعال على وحدة المعالجة المركزية (CPU) +# - local: perf_train_cpu_many +# title: التدريب الموزع لوحدة المعالجة المركزية (CPU) +# - local: perf_train_tpu_tf +# title: التدريب على (TPU) باستخدام TensorFlow +# - local: perf_train_special +# title: تدريب PyTorch على Apple silicon +# - local: perf_hardware +# title: الأجهزة المخصصة للتدريب +# - local: hpo_train +# title: البحث عن المعاملات المثلى باستخدام واجهة برمجة تطبيقات المدرب +# title: تقنيات التدريب الفعال +# - sections: +# - local: perf_infer_cpu +# title: الإستدلال على وحدة المعالجة المركزية (CPU) +# - local: perf_infer_gpu_one +# title: الإستدلال على وحدة معالجة الرسومات (GPU) +# title: تحسين الاستدلال +# - local: big_models +# title: إنشاء نموذج كبير +# - local: debugging +# title: تصحيح الأخطاء البرمجية +# - local: tf_xla +# title: تكامل XLA لنماذج TensorFlow +# - local: perf_torch_compile +# title: تحسين الاستدلال باستخدام `torch.compile()` +# title: الأداء وقابلية التوسع +# - sections: +# - local: contributing +# title: كيفية المساهمة في 🤗 المحولات؟ +# - local: add_new_model +# title: كيفية إضافة نموذج إلى 🤗 المحولات؟ +# - local: add_new_pipeline +# title: كيفية إضافة خط أنابيب إلى 🤗 المحولات؟ +# - local: testing +# title: الاختبار +# - local: pr_checks +# title: التحقق من طلب السحب +# title: المساهمة +- sections: + # - local: philosophy + # title: الفلسفة + - local: glossary + title: (قاموس المصطلحات (قائمة الكلمات + # - local: task_summary + # title: ما الذي يمكن أن تفعله 🤗 المحولات + # - local: tasks_explained + # title: كيف تحل المحولات المهام + # - local: model_summary + # title: عائلة نماذج المحول + # - local: tokenizer_summary + # title: ملخص برنامج مقسم النصوص (tokenizers) + # - local: attention + # title: الانتباه Attention + # - local: pad_truncation + # title: الحشو والتقليم + # - local: bertology + # title: BERTology + # - local: perplexity + # title: حيرة النماذج ذات الطول الثابت + # - local: pipeline_webserver + # title: خطوط الأنابيب للاستدلال على خادم الويب + # - local: model_memory_anatomy + # title: تشريح تدريب النموذج + # - local: llm_tutorial_optimization + # title: الاستفادة القصوى من LLMs + title: أطر مفاهيمية +# - sections: +# - sections: +# - local: main_classes/agent +# title: الوكلاء والأدوات +# - local: model_doc/auto +# title: فئات يتم إنشاؤها ديناميكيًا +# - local: main_classes/backbones +# title: العمود الفقري +# - local: main_classes/callback +# title: عمليات الاسترجاع +# - local: main_classes/configuration +# title: التكوين +# - local: main_classes/data_collator +# title: مجمع البيانات +# - local: main_classes/keras_callbacks +# title: استدعاءات Keras +# - local: main_classes/logging +# title: التسجيل +# - local: main_classes/model +# title: النماذج +# - local: main_classes/text_generation +# title: توليد النصوص +# - local: main_classes/onnx +# title: ONNX +# - local: main_classes/optimizer_schedules +# title: التحسين +# - local: main_classes/output +# title: مخرجات النموذج +# - local: main_classes/pipelines +# title: خطوط الأنابيب +# - local: main_classes/processors +# title: المعالجات +# - local: main_classes/quantization +# title: التكميم +# - local: main_classes/tokenizer +# title: برنامج مقسم النصوص +# - local: main_classes/trainer +# title: المدرب +# - local: main_classes/deepspeed +# title: DeepSpeed +# - local: main_classes/feature_extractor +# title: مستخرج الميزات +# - local: main_classes/image_processor +# title: معالج الصور +# title: الفئات الرئيسية +# - sections: +# - isExpanded: false +# sections: +# - local: model_doc/albert +# title: ALBERT +# - local: model_doc/bart +# title: BART +# - local: model_doc/barthez +# title: BARThez +# - local: model_doc/bartpho +# title: BARTpho +# - local: model_doc/bert +# title: BERT +# - local: model_doc/bert-generation +# title: BertGeneration +# - local: model_doc/bert-japanese +# title: BertJapanese +# - local: model_doc/bertweet +# title: Bertweet +# - local: model_doc/big_bird +# title: BigBird +# - local: model_doc/bigbird_pegasus +# title: BigBirdPegasus +# - local: model_doc/biogpt +# title: BioGpt +# - local: model_doc/blenderbot +# title: Blenderbot +# - local: model_doc/blenderbot-small +# title: Blenderbot Small +# - local: model_doc/bloom +# title: BLOOM +# - local: model_doc/bort +# title: BORT +# - local: model_doc/byt5 +# title: ByT5 +# - local: model_doc/camembert +# title: CamemBERT +# - local: model_doc/canine +# title: CANINE +# - local: model_doc/codegen +# title: CodeGen +# - local: model_doc/code_llama +# title: CodeLlama +# - local: model_doc/cohere +# title: Cohere +# - local: model_doc/convbert +# title: ConvBERT +# - local: model_doc/cpm +# title: CPM +# - local: model_doc/cpmant +# title: CPMANT +# - local: model_doc/ctrl +# title: CTRL +# - local: model_doc/dbrx +# title: DBRX +# - local: model_doc/deberta +# title: DeBERTa +# - local: model_doc/deberta-v2 +# title: DeBERTa-v2 +# - local: model_doc/dialogpt +# title: DialoGPT +# - local: model_doc/distilbert +# title: DistilBERT +# - local: model_doc/dpr +# title: DPR +# - local: model_doc/electra +# title: ELECTRA +# - local: model_doc/encoder-decoder +# title: Encoder Decoder Models +# - local: model_doc/ernie +# title: ERNIE +# - local: model_doc/ernie_m +# title: ErnieM +# - local: model_doc/esm +# title: ESM +# - local: model_doc/falcon +# title: Falcon +# - local: model_doc/fastspeech2_conformer +# title: FastSpeech2Conformer +# - local: model_doc/flan-t5 +# title: FLAN-T5 +# - local: model_doc/flan-ul2 +# title: FLAN-UL2 +# - local: model_doc/flaubert +# title: FlauBERT +# - local: model_doc/fnet +# title: FNet +# - local: model_doc/fsmt +# title: FSMT +# - local: model_doc/funnel +# title: Funnel Transformer +# - local: model_doc/fuyu +# title: Fuyu +# - local: model_doc/gemma +# title: Gemma +# - local: model_doc/openai-gpt +# title: GPT +# - local: model_doc/gpt_neo +# title: GPT Neo +# - local: model_doc/gpt_neox +# title: GPT NeoX +# - local: model_doc/gpt_neox_japanese +# title: GPT NeoX Japanese +# - local: model_doc/gptj +# title: GPT-J +# - local: model_doc/gpt2 +# title: GPT2 +# - local: model_doc/gpt_bigcode +# title: GPTBigCode +# - local: model_doc/gptsan-japanese +# title: GPTSAN Japanese +# - local: model_doc/gpt-sw3 +# title: GPTSw3 +# - local: model_doc/herbert +# title: HerBERT +# - local: model_doc/ibert +# title: I-BERT +# - local: model_doc/jamba +# title: Jamba +# - local: model_doc/jetmoe +# title: JetMoe +# - local: model_doc/jukebox +# title: Jukebox +# - local: model_doc/led +# title: LED +# - local: model_doc/llama +# title: LLaMA +# - local: model_doc/llama2 +# title: Llama2 +# - local: model_doc/llama3 +# title: Llama3 +# - local: model_doc/longformer +# title: Longformer +# - local: model_doc/longt5 +# title: LongT5 +# - local: model_doc/luke +# title: LUKE +# - local: model_doc/m2m_100 +# title: M2M100 +# - local: model_doc/madlad-400 +# title: MADLAD-400 +# - local: model_doc/mamba +# title: Mamba +# - local: model_doc/marian +# title: MarianMT +# - local: model_doc/markuplm +# title: MarkupLM +# - local: model_doc/mbart +# title: MBart and MBart-50 +# - local: model_doc/mega +# title: MEGA +# - local: model_doc/megatron-bert +# title: MegatronBERT +# - local: model_doc/megatron_gpt2 +# title: MegatronGPT2 +# - local: model_doc/mistral +# title: Mistral +# - local: model_doc/mixtral +# title: Mixtral +# - local: model_doc/mluke +# title: mLUKE +# - local: model_doc/mobilebert +# title: MobileBERT +# - local: model_doc/mpnet +# title: MPNet +# - local: model_doc/mpt +# title: MPT +# - local: model_doc/mra +# title: MRA +# - local: model_doc/mt5 +# title: MT5 +# - local: model_doc/mvp +# title: MVP +# - local: model_doc/nezha +# title: NEZHA +# - local: model_doc/nllb +# title: NLLB +# - local: model_doc/nllb-moe +# title: NLLB-MoE +# - local: model_doc/nystromformer +# title: Nyströmformer +# - local: model_doc/olmo +# title: OLMo +# - local: model_doc/open-llama +# title: Open-Llama +# - local: model_doc/opt +# title: OPT +# - local: model_doc/pegasus +# title: Pegasus +# - local: model_doc/pegasus_x +# title: PEGASUS-X +# - local: model_doc/persimmon +# title: Persimmon +# - local: model_doc/phi +# title: Phi +# - local: model_doc/phi3 +# title: Phi-3 +# - local: model_doc/phobert +# title: PhoBERT +# - local: model_doc/plbart +# title: PLBart +# - local: model_doc/prophetnet +# title: ProphetNet +# - local: model_doc/qdqbert +# title: QDQBert +# - local: model_doc/qwen2 +# title: Qwen2 +# - local: model_doc/qwen2_moe +# title: Qwen2MoE +# - local: model_doc/rag +# title: RAG +# - local: model_doc/realm +# title: REALM +# - local: model_doc/recurrent_gemma +# title: RecurrentGemma +# - local: model_doc/reformer +# title: Reformer +# - local: model_doc/rembert +# title: RemBERT +# - local: model_doc/retribert +# title: RetriBERT +# - local: model_doc/roberta +# title: RoBERTa +# - local: model_doc/roberta-prelayernorm +# title: RoBERTa-PreLayerNorm +# - local: model_doc/roc_bert +# title: RoCBert +# - local: model_doc/roformer +# title: RoFormer +# - local: model_doc/rwkv +# title: RWKV +# - local: model_doc/splinter +# title: Splinter +# - local: model_doc/squeezebert +# title: SqueezeBERT +# - local: model_doc/stablelm +# title: StableLm +# - local: model_doc/starcoder2 +# title: Starcoder2 +# - local: model_doc/switch_transformers +# title: SwitchTransformers +# - local: model_doc/t5 +# title: T5 +# - local: model_doc/t5v1.1 +# title: T5v1.1 +# - local: model_doc/tapex +# title: TAPEX +# - local: model_doc/transfo-xl +# title: Transformer XL +# - local: model_doc/ul2 +# title: UL2 +# - local: model_doc/umt5 +# title: UMT5 +# - local: model_doc/xmod +# title: X-MOD +# - local: model_doc/xglm +# title: XGLM +# - local: model_doc/xlm +# title: XLM +# - local: model_doc/xlm-prophetnet +# title: XLM-ProphetNet +# - local: model_doc/xlm-roberta +# title: XLM-RoBERTa +# - local: model_doc/xlm-roberta-xl +# title: XLM-RoBERTa-XL +# - local: model_doc/xlm-v +# title: XLM-V +# - local: model_doc/xlnet +# title: XLNet +# - local: model_doc/yoso +# title: YOSO +# title: Text models +# - isExpanded: false +# sections: +# - local: model_doc/beit +# title: BEiT +# - local: model_doc/bit +# title: BiT +# - local: model_doc/conditional_detr +# title: Conditional DETR +# - local: model_doc/convnext +# title: ConvNeXT +# - local: model_doc/convnextv2 +# title: ConvNeXTV2 +# - local: model_doc/cvt +# title: CVT +# - local: model_doc/deformable_detr +# title: Deformable DETR +# - local: model_doc/deit +# title: DeiT +# - local: model_doc/depth_anything +# title: Depth Anything +# - local: model_doc/deta +# title: DETA +# - local: model_doc/detr +# title: DETR +# - local: model_doc/dinat +# title: DiNAT +# - local: model_doc/dinov2 +# title: DINOV2 +# - local: model_doc/dit +# title: DiT +# - local: model_doc/dpt +# title: DPT +# - local: model_doc/efficientformer +# title: EfficientFormer +# - local: model_doc/efficientnet +# title: EfficientNet +# - local: model_doc/focalnet +# title: FocalNet +# - local: model_doc/glpn +# title: GLPN +# - local: model_doc/imagegpt +# title: ImageGPT +# - local: model_doc/levit +# title: LeViT +# - local: model_doc/mask2former +# title: Mask2Former +# - local: model_doc/maskformer +# title: MaskFormer +# - local: model_doc/mobilenet_v1 +# title: MobileNetV1 +# - local: model_doc/mobilenet_v2 +# title: MobileNetV2 +# - local: model_doc/mobilevit +# title: MobileViT +# - local: model_doc/mobilevitv2 +# title: MobileViTV2 +# - local: model_doc/nat +# title: NAT +# - local: model_doc/poolformer +# title: PoolFormer +# - local: model_doc/pvt +# title: Pyramid Vision Transformer (PVT) +# - local: model_doc/pvt_v2 +# title: Pyramid Vision Transformer v2 (PVTv2) +# - local: model_doc/regnet +# title: RegNet +# - local: model_doc/resnet +# title: ResNet +# - local: model_doc/segformer +# title: SegFormer +# - local: model_doc/seggpt +# title: SegGpt +# - local: model_doc/superpoint +# title: SuperPoint +# - local: model_doc/swiftformer +# title: SwiftFormer +# - local: model_doc/swin +# title: Swin Transformer +# - local: model_doc/swinv2 +# title: Swin Transformer V2 +# - local: model_doc/swin2sr +# title: Swin2SR +# - local: model_doc/table-transformer +# title: Table Transformer +# - local: model_doc/upernet +# title: UperNet +# - local: model_doc/van +# title: VAN +# - local: model_doc/vit +# title: Vision Transformer (ViT) +# - local: model_doc/vit_hybrid +# title: ViT Hybrid +# - local: model_doc/vitdet +# title: ViTDet +# - local: model_doc/vit_mae +# title: ViTMAE +# - local: model_doc/vitmatte +# title: ViTMatte +# - local: model_doc/vit_msn +# title: ViTMSN +# - local: model_doc/yolos +# title: YOLOS +# title: Vision models +# - isExpanded: false +# sections: +# - local: model_doc/audio-spectrogram-transformer +# title: Audio Spectrogram Transformer +# - local: model_doc/bark +# title: Bark +# - local: model_doc/clap +# title: CLAP +# - local: model_doc/encodec +# title: EnCodec +# - local: model_doc/hubert +# title: Hubert +# - local: model_doc/mctct +# title: MCTCT +# - local: model_doc/mms +# title: MMS +# - local: model_doc/musicgen +# title: MusicGen +# - local: model_doc/musicgen_melody +# title: MusicGen Melody +# - local: model_doc/pop2piano +# title: Pop2Piano +# - local: model_doc/seamless_m4t +# title: Seamless-M4T +# - local: model_doc/seamless_m4t_v2 +# title: SeamlessM4T-v2 +# - local: model_doc/sew +# title: SEW +# - local: model_doc/sew-d +# title: SEW-D +# - local: model_doc/speech_to_text +# title: Speech2Text +# - local: model_doc/speech_to_text_2 +# title: Speech2Text2 +# - local: model_doc/speecht5 +# title: SpeechT5 +# - local: model_doc/unispeech +# title: UniSpeech +# - local: model_doc/unispeech-sat +# title: UniSpeech-SAT +# - local: model_doc/univnet +# title: UnivNet +# - local: model_doc/vits +# title: VITS +# - local: model_doc/wav2vec2 +# title: Wav2Vec2 +# - local: model_doc/wav2vec2-bert +# title: Wav2Vec2-BERT +# - local: model_doc/wav2vec2-conformer +# title: Wav2Vec2-Conformer +# - local: model_doc/wav2vec2_phoneme +# title: Wav2Vec2Phoneme +# - local: model_doc/wavlm +# title: WavLM +# - local: model_doc/whisper +# title: Whisper +# - local: model_doc/xls_r +# title: XLS-R +# - local: model_doc/xlsr_wav2vec2 +# title: XLSR-Wav2Vec2 +# title: Audio models +# - isExpanded: false +# sections: +# - local: model_doc/timesformer +# title: TimeSformer +# - local: model_doc/videomae +# title: VideoMAE +# - local: model_doc/vivit +# title: ViViT +# title: Video models +# - isExpanded: false +# sections: +# - local: model_doc/align +# title: ALIGN +# - local: model_doc/altclip +# title: AltCLIP +# - local: model_doc/blip +# title: BLIP +# - local: model_doc/blip-2 +# title: BLIP-2 +# - local: model_doc/bridgetower +# title: BridgeTower +# - local: model_doc/bros +# title: BROS +# - local: model_doc/chinese_clip +# title: Chinese-CLIP +# - local: model_doc/clip +# title: CLIP +# - local: model_doc/clipseg +# title: CLIPSeg +# - local: model_doc/clvp +# title: CLVP +# - local: model_doc/data2vec +# title: Data2Vec +# - local: model_doc/deplot +# title: DePlot +# - local: model_doc/donut +# title: Donut +# - local: model_doc/flava +# title: FLAVA +# - local: model_doc/git +# title: GIT +# - local: model_doc/grounding-dino +# title: Grounding DINO +# - local: model_doc/groupvit +# title: GroupViT +# - local: model_doc/idefics +# title: IDEFICS +# - local: model_doc/idefics2 +# title: Idefics2 +# - local: model_doc/instructblip +# title: InstructBLIP +# - local: model_doc/kosmos-2 +# title: KOSMOS-2 +# - local: model_doc/layoutlm +# title: LayoutLM +# - local: model_doc/layoutlmv2 +# title: LayoutLMV2 +# - local: model_doc/layoutlmv3 +# title: LayoutLMV3 +# - local: model_doc/layoutxlm +# title: LayoutXLM +# - local: model_doc/lilt +# title: LiLT +# - local: model_doc/llava +# title: Llava +# - local: model_doc/llava_next +# title: LLaVA-NeXT +# - local: model_doc/lxmert +# title: LXMERT +# - local: model_doc/matcha +# title: MatCha +# - local: model_doc/mgp-str +# title: MGP-STR +# - local: model_doc/nougat +# title: Nougat +# - local: model_doc/oneformer +# title: OneFormer +# - local: model_doc/owlvit +# title: OWL-ViT +# - local: model_doc/owlv2 +# title: OWLv2 +# - local: model_doc/paligemma +# title: PaliGemma +# - local: model_doc/perceiver +# title: Perceiver +# - local: model_doc/pix2struct +# title: Pix2Struct +# - local: model_doc/sam +# title: Segment Anything +# - local: model_doc/siglip +# title: SigLIP +# - local: model_doc/speech-encoder-decoder +# title: Speech Encoder Decoder Models +# - local: model_doc/tapas +# title: TAPAS +# - local: model_doc/trocr +# title: TrOCR +# - local: model_doc/tvlt +# title: TVLT +# - local: model_doc/tvp +# title: TVP +# - local: model_doc/udop +# title: UDOP +# - local: model_doc/video_llava +# title: VideoLlava +# - local: model_doc/vilt +# title: ViLT +# - local: model_doc/vipllava +# title: VipLlava +# - local: model_doc/vision-encoder-decoder +# title: Vision Encoder Decoder Models +# - local: model_doc/vision-text-dual-encoder +# title: Vision Text Dual Encoder +# - local: model_doc/visual_bert +# title: VisualBERT +# - local: model_doc/xclip +# title: X-CLIP +# title: Multimodal models +# - isExpanded: false +# sections: +# - local: model_doc/decision_transformer +# title: محول القرار +# - local: model_doc/trajectory_transformer +# title: محول المسار +# title: نماذج التعلم التعزيزية +# - isExpanded: false +# sections: +# - local: model_doc/autoformer +# title: Autoformer +# - local: model_doc/informer +# title: Informer +# - local: model_doc/patchtsmixer +# title: PatchTSMixer +# - local: model_doc/patchtst +# title: PatchTST +# - local: model_doc/time_series_transformer +# title: محول السلاسل الزمنية +# title: نماذج السلاسل الزمنية +# - isExpanded: false +# sections: +# - local: model_doc/graphormer +# title: Graphormer +# title: نماذج الرسم البياني +# title: النماذج +# - sections: +# - local: internal/modeling_utils +# title: الطبقات المخصصة والمرافق +# - local: internal/pipelines_utils +# title: مرافق خطوط الأنابيب +# - local: internal/tokenization_utils +# title: مرافق مقسم النصوص +# - local: internal/trainer_utils +# title: مرافق المدرب +# - local: internal/generation_utils +# title: مرافق التوليد +# - local: internal/image_processing_utils +# title: مرافق معالجة الصور +# - local: internal/audio_utils +# title: مرافق معالجة الصوت +# - local: internal/file_utils +# title: مرافق عامة +# - local: internal/time_series_utils +# title: مرافق السلاسل الزمنية +# title: مساعدون داخليون +# title: API diff --git a/docs/source/ar/accelerate.md b/docs/source/ar/accelerate.md new file mode 100644 index 00000000000000..486c1efe59af60 --- /dev/null +++ b/docs/source/ar/accelerate.md @@ -0,0 +1,120 @@ +# التدريب الموزع باستخدام 🤗 Accelerate + + +مع تزايد حجم النماذج اللغوية، برز التوازي كأحد الاستراتيجيات لتدريب نماذج أكبر على أجهزة محدودة وتسريع عملية التدريب بمقدار كبير. أنشأنا في Hugging Face، قمنا بإنشاء مكتبة [ Accelerate](https://huggingface.co/docs/accelerate) لمساعدة المستخدمين على تدريب أي نموذج من Transformers بسهولة على أي نوع من الإعدادات الموزعة، سواء كان ذلك على عدة وحدات معالجة رسومات (GPUs) على جهاز واحد أو على عدة وحدات معالجة رسومات موزعة على عدة أجهزة. في هذا الدليل، تعلم كيفية تخصيص حلقة تدريب PyTorch الأصلية لتمكين التدريب في بيئة موزعة. + +## الإعداد + +ابدأ بتثبيت 🤗 Accelerate: + +```bash +pip install accelerate +``` + +ثم قم باستيراد وإنشاء كائن [`~accelerate.Accelerator`]. سيقوم [`~accelerate.Accelerator`] تلقائيًا باكتشاف نوع الإعداد الموزع الخاص بك وتهيئة جميع المكونات اللازمة للتدريب. لن تحتاج إلى وضع نموذجك على جهاز بشكل معين. + +```py +>>> from accelerate import Accelerator + +>>> accelerator = Accelerator() +``` + +## الاستعداد للتسريع + +الخطوة التالية هي تمرير جميع كائنات التدريب ذات الصلة إلى دالة الإعداد [`~accelerate.Accelerator.prepare`]. ويشمل ذلك DataLoaders للتدريب والتقييم، ونموذجًا ومُحَسِّنً المعاملات (optimizer): + +```py +>>> train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( +... train_dataloader, eval_dataloader, model, optimizer +... ) +``` + +## الخلفي Backward + +الإضافة الأخيرة هي استبدال الدالة المعتادة `loss.backward()` في حلقة التدريب الخاصة بك بدالة [`~accelerate.Accelerator.backward`] في 🤗 Accelerate: + +```py +>>> for epoch in range(num_epochs): +... for batch in train_dataloader: +... outputs = model(**batch) +... loss = outputs.loss +... accelerator.backward(loss) + +... optimizer.step() +... lr_scheduler.step() +... optimizer.zero_grad() +... progress_bar.update(1) +``` + +كما يمكنك أن ترى في الكود التالي، فأنت بحاجة فقط إلى إضافة أربعة أسطر من الكود إلى حلقة التدريب الخاصة بك لتمكين التدريب الموزع! + +```diff ++ from accelerate import Accelerator + from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler + ++ accelerator = Accelerator() + + model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2) + optimizer = AdamW(model.parameters(), lr=3e-5) + +- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") +- model.to(device) + ++ train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( ++ train_dataloader, eval_dataloader, model, optimizer ++ ) + + num_epochs = 3 + num_training_steps = num_epochs * len(train_dataloader) + lr_scheduler = get_scheduler( + "linear", + optimizer=optimizer, + num_warmup_steps=0, + num_training_steps=num_training_steps + ) + + progress_bar = tqdm(range(num_training_steps)) + + model.train() + for epoch in range(num_epochs): + for batch in train_dataloader: +- batch = {k: v.to(device) for k, v in batch.items()} + outputs = model(**batch) + loss = outputs.loss +- loss.backward() ++ accelerator.backward(loss) +optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + progress_bar.update(1) +``` + +## تدريب + +بمجرد إضافة أسطر الكود ذات الصلة، قم بتشغيل التدريب الخاص بك في أحد النصوص أو الدفاتر مثل Colaboratory. + +### التدريب باستخدام نص برمجي + +إذا كنت تشغل التدريب الخاص بك من نص برمجي، فقم بتشغيل الأمر التالي لإنشاء وحفظ ملف تكوين: + +```bash +accelerate config +``` + +ثم قم بتشغيل التدريب الخاص بك باستخدام: + +```bash +accelerate launch train.py +``` + +### التدريب باستخدام دفتر ملاحظات + +يمكن أيضًا تشغيل 🤗 Accelerate في دفاتر إذا كنت تخطط لاستخدام وحدات معالجة الرسوميات (TPUs) في Colaboratory. قم بتغليف كل الكود المسؤول عن التدريب في دالة، ومررها إلى [`~accelerate.notebook_launcher`]: + +```py +>>> from accelerate import notebook_launcher + +>>> notebook_launcher(training_function) +``` + +للحصول على مزيد من المعلومات حول 🤗 Accelerate وميزاته الغنية، يرجى الرجوع إلى [الوثائق](https://huggingface.co/docs/accelerate). \ No newline at end of file diff --git a/docs/source/ar/agents.md b/docs/source/ar/agents.md new file mode 100644 index 00000000000000..92b2a4715f6f07 --- /dev/null +++ b/docs/source/ar/agents.md @@ -0,0 +1,539 @@ +# الوكلاء والأدوات + +[[open-in-colab]] + +### ما هو الوكيل؟ + +يمكن للنظم اللغوية الكبيرة (LLMs) التي تم تدريبها على أداء [نمذجة اللغة السببية](./tasks/language_modeling.) التعامل مع مجموعة واسعة من المهام، ولكنها غالبًا ما تواجه صعوبات في المهام الأساسية مثل المنطق والحساب والبحث. وعندما يتم استدعاؤها في مجالات لا تؤدي فيها أداءً جيدًا، فإنها غالبًا ما تفشل في توليد الإجابة التي نتوقعها منها. + +يتمثل أحد النهج للتغلب على هذا القصور في إنشاء "وكيل". + +الوكيل هو نظام يستخدم LLM كمحرك له، ولديه حق الوصول إلى وظائف تسمى "أدوات". + +هذه "الأدوات" هي وظائف لأداء مهمة، وتحتوي على جميع الأوصاف اللازمة للوكيل لاستخدامها بشكل صحيح. + +يمكن برمجة الوكيل للقيام بما يلي: +- وضع سلسلة من الإجراءات/الأدوات وتشغيلها جميعًا في نفس الوقت مثل [`CodeAgent`] على سبيل المثال +- التخطيط للاجراءات/الأدوات وتنفيذها واحدة تلو الأخرى والانتظار حتى انتهاء كل إجراء قبل إطلاق التالي مثل [`ReactJsonAgent`] على سبيل المثال + +### أنواع الوكلاء + +#### الوكيل البرمجي (Code agent) + +يتمتع هذا الوكيل يتبع خطوات محددة: أولًا، يخطط لسلسلة من الإجراءات التي يريد تنفيذها، ثم شفرة Python لتنفيذ جميع الإجراءات في نفس الوقت. وهو يتعامل بشكل أصلي مع أنواع مختلفة من المدخلات والمخرجات للأدوات التي يستخدمها، وبالتالي فهو الخيار الموصى به للمهام متعددة الوسائط. + +#### وكلاء التفاعل + +هذا هو الوكيل الذي يتم اللجوء إليه لحل مهام الاستدلال، حيث يجعل إطار ReAct ([Yao et al.، 2022](https://huggingface.co/papers/2210.03629)) من الكفاءة حقًا التفكير على أساس ملاحظاته السابقة. + +نقوم بتنفيذ إصدارين من ReactJsonAgent: +- [`ReactJsonAgent`] يقوم بتوليد استدعاءات الأدوات كـ JSON في إخراجها. +- [`ReactCodeAgent`] هو نوع جديد من ReactJsonAgent يقوم بتوليد استدعاءات أدواته كمقاطع من التعليمات البرمجية، والتي تعمل بشكل جيد حقًا مع LLMs التي تتمتع بأداء قوي في البرمجة. + +> [!TIP] +> اقرأ منشور المدونة [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) لمعرفة المزيد عن وكيل ReAct. + +![إطار عمل وكيل ReAct](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png) + +على سبيل المثال، إليك كيف يعمل وكيل ReAct Code طريقه من خلال السؤال التالي. + +```py3 +>>> agent.run( +... "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?", +... ) +=====New task===== +How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need? +====Agent is executing the code below: +bert_blocks = search(query="number of blocks in BERT base encoder") +print("BERT blocks:", bert_blocks) +==== +Print outputs: +BERT blocks: twelve encoder blocks + +====Agent is executing the code below: +attention_layer = search(query="number of layers in Attention is All You Need") +print("Attention layers:", attention_layer) +==== +Print outputs: +Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture. + +====Agent is executing the code below: +bert_blocks = 12 +attention_layers = 6 +diff = bert_blocks - attention_layers +print("Difference in blocks:", diff) +final_answer(diff) +==== + +Print outputs: +Difference in blocks: 6 + +Final answer: 6 +``` + +### كيف يمكنني بناء وكيل؟ + +لتهيئة وكيل، تحتاج إلى هذه الوسائط: + +- نموذج لغوي كبير (LLM) يشكل المحرك الأساسي للوكيل. الوكيل نفسه ليس النموذج اللغوي، بل هو برنامج يستخدم النموذج اللغوي كمحرك له. +- موجه النظام (system prompt): هذه هي التعليمات التي يتم إعطاؤها للنموذج اللغوي لإنشاء مخرجاته. +- صندوق أدوات (toolbox) يختار الوكيل منه الأدوات لتنفيذها +- محلل (parser) لاستخراج الأدوات التي يجب استدعاؤها من مخرجات النموذج اللغوي LLM والأدوات التي يجب استخدامها + +عند تهيئة نظام الوكيل، يتم استخدام سمات الأداة لإنشاء وصف للأداة، ثم يتم دمجها في موجه النظام الخاص `system_prompt` للوكيل لإعلامه بالأدوات التي يمكنه استخدامها ولماذا. + +للبدء، يرجى تثبيت `agents` الإضافية لتثبيت جميع التبعيات الافتراضية. + +```bash +pip install transformers[agents] +``` + +قم ببناء محرك LLM الخاص بك من خلال تعريف طريقة `llm_engine` التي تقبل قائمة من [الرسائل](./chat_templating.) وتعيد النص. يجب أن تقبل هذه الدالة القابلة للاستدعاء أيضًا معامل `stop` يشير إلى متى يجب التوقف عن التوليد. + +```python +from huggingface_hub import login, InferenceClient + +login("") + +client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct") + +def llm_engine(messages, stop_sequences=["Task"]) -> str: + response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) + answer = response.choices[0].message.content + return answer +``` + +يمكنك استخدام أي طريقة `llm_engine` طالما أنها: +1. يتبع تنسيق [رسائل](./chat_templating.md) لإدخاله (`List [Dict [str، str]]`) ويعيد `str` +2. يتوقف عن توليد المخراجات من التسلسلات التي تم تمريرها في معامل `stop` + +أنت بحاجة أيضًا إلى معامل "الأدوات" الذي يقبل قائمة من "الأدوات". يمكنك توفير قائمة فارغة لـ "الأدوات"، ولكن استخدم صندوق الأدوات الافتراضي مع معامل اختياري `add_base_tools=True`. + +الآن يمكنك إنشاء وكيل، مثل [`CodeAgent`], وتشغيله. ولتسهيل الأمر، نقدم أيضًا فئة [`HfEngine`] التي تستخدم `huggingface_hub.InferenceClient` بشكل مخفى. + +```python +from transformers import CodeAgent, HfEngine + +llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct") +agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) + +agent.run( + "Could you translate this sentence from French, say it out loud and return the audio.", + sentence="Où est la boulangerie la plus proche?", +) +``` + +هذه الميزة ستكون مفيدة في حالة الحاجة الملحة! يمكنك حتى ترك معامل `llm_engine` غير محدد، وسيتم إنشاء [`HfEngine`] بشكل تلقائي. + +```python +from transformers import CodeAgent + +agent = CodeAgent(tools=[], add_base_tools=True) + +agent.run( + "Could you translate this sentence from French, say it out loud and give me the audio.", + sentence="Où est la boulangerie la plus proche?", +) +``` + +لاحظ أننا استخدمنا معامل "sentence" إضافي: يمكنك تمرير النص كمعامل إضافي إلى النموذج. + +يمكنك أيضًا استخدام هذا للإشارة إلى مسار الملفات المحلية أو البعيدة للنموذج لاستخدامها: + +```py +from transformers import ReactCodeAgent + +agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) + +agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3") +``` + + +تم تحديد موجه النظام ومحلل المخرجات تلقائيًا، ولكن يمكنك فحصهما بسهولة عن طريق استدعاء `system_prompt_template` على وكيلك. + +```python +print(agent.system_prompt_template) +``` + +من المهم أن تشرح بأكبر قدر ممكن من الوضوح المهمة التي تريد تنفيذها. +كل عملية [`~Agent.run`] مستقلة، وبما أن الوكيل مدعوم من LLM، فقد تؤدي الاختلافات الطفيفة في موجهك إلى نتائج مختلفة تمامًا. +يمكنك أيضًا تشغيل وكيل بشكل متتالي لمهام مختلفة: في كل مرة يتم فيها إعادة تهيئة سمتي `agent.task` و`agent.logs`. + + +#### تنفيذ التعليمات البرمجية + +يقوم مفسر Python بتنفيذ التعليمات البرمجية على مجموعة من المدخلات التي يتم تمريرها جنبًا إلى جنب مع أدواتك. +يجب أن يكون هذا الأمر آمنًا لأن الوظائف الوحيدة التي يمكن استدعاؤها هي الأدوات التي قدمتها (خاصة إذا كانت أدوات من Hugging Face فقط) ووظيفة الطباعة، لذا فأنت مقيد بالفعل بما يمكن تنفيذه. + +مفسر Python لا يسمح أيضًا باستدعاء دوال بشكل افتراضي خارج قائمة آمنة، لذا فإن جميع الهجمات الأكثر وضوحًا لا ينبغي أن تكون مشكلة. +يمكنك أيضًا الإذن باستيرادات إضافية عن طريق تمرير الوحدات النمطية المصرح بها كقائمة من السلاسل في معامل `additional_authorized_imports` عند تهيئة [`ReactCodeAgent`] أو [`CodeAgent`]: + +```py +>>> from transformers import ReactCodeAgent + +>>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4']) +>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") + +(...) +'Hugging Face – Blog' +``` + +سيتم إيقاف التنفيذ عند أي رمز يحاول تنفيذ عملية غير قانونية أو إذا كان هناك خطأ Python عادي في التعليمات البرمجية التي تم إنشاؤها بواسطة الوكيل. + +> [!WARNING] +> يمكن لـ LLM توليد شفرة برمجية عشوائية سيتم تنفيذها بعد ذلك: لا تقمب استدعاء أى دوال غير آمنة! + +### موجه النظام + +ينشئ الوكيل، أو بالأحرى LLM الذي يقود الوكيل، يولد مخرجات بناءً على موجه النظام. يمكن تخصيص موجه النظام وتصميمه للمهام المقصودة. على سبيل المثال، تحقق من موجه النظام لـ [`ReactCodeAgent`] (الإصدار أدناه مبسط قليلاً). + +```text +You will be given a task to solve as best you can. +You have access to the following tools: +<> + +To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences. + +At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use. +Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence. +During each intermediate step, you can use 'print()' to save whatever important information you will then need. +These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step. + +In the end you have to return a final answer using the `final_answer` tool. + +Here are a few examples using notional tools: +--- +{examples} + +Above example were using notional tools that might not exist for you. You only have acces to those tools: +<> +You also can perform computations in the python code you generate. + +Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```' sequence. You MUST provide at least the 'Code:' sequence to move forward. + +Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks. +Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result. + +Remember to make sure that variables you use are all defined. + +Now Begin! +``` + +يتضمن موجه النظام: +- *مقدمة* تشرح كيف يجب أن يتصرف الوكيل والأدوات التي يجب عليه استخدامها. +- وصف لجميع الأدوات التي يتم تحديدها بواسطة رمز `<>` الذي يتم استبداله ديناميكيًا في وقت التشغيل بالأدوات التي يحددها المستخدم أو يختارها. + - يأتي وصف الأداة من سمات الأداة، `name`، و`description`، و`inputs` و`output_type`، وقالب `jinja2` بسيط يمكنك تحسينه. +- شكل المخرج المتوقع. + +يمكنك تحسين موجه النظام، على سبيل المثال، عن طريق إضافة شرح لتنسيق المخرجات. + +للحصول على أقصى قدر من المرونة، يمكنك الكتابة فوق قالب موجه النظام بالكامل عن طريق تمرير موجه مخصص كمعامل إلى معلمة `system_prompt`. + +```python +from transformers import ReactJsonAgent +from transformers.agents import PythonInterpreterTool + +agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}") +``` + +> [!WARNING] +> يرجى التأكد من تحديد سلسلة `<>` في مكان ما في `template` حتى يكون الوكيل على علم +بالأدوات المتاحة. + + +### فحص تشغيل الوكيل + +فيما يلي بعض السمات المفيدة لفحص ما حدث بعد التشغيل: +- تخزن `agent.logs` سجلات مفصلة للوكيل. في كل خطوة من تشغيل الوكيل، يتم تخزين كل شيء في قاموس إلحاقه بـ `agent.logs`. +- تشغيل `agent.write_inner_memory_from_logs()` يخلق ذاكرة داخلية لسجلات الوكيل للنظام LLM لعرضها، كقائمة من رسائل الدردشة. تنتقل هذه الطريقة عبر كل خطوة من سجل الوكيل ولا تخزن سوى ما يهمها كرسالة: على سبيل المثال، سيحفظ موجه النظام والمهمة في رسائل منفصلة، ثم لكل خطوة سيخزن مخرج LLM كرسالة، ومخرج استدعاء الأداة كرسالة أخرى. استخدم هذا إذا كنت تريد عرضًا عامًا لما حدث - ولكن لن يتم نسخ كل سجل بواسطة هذه الطريقة. + +## الأدوات + +الأداة هي عبارة عن وظيفة أساسية يستخدمها الوكيل لتنفيذ مهمة محددة. + +يمكنك على سبيل المثال التحقق من [`PythonInterpreterTool`]: لديه اسم ووصف ووصف للمدخلات ونوع للمخرج، وطريقة `__call__` التي تقوم بتنفيذ المهمة المطلوبة. + +عند تهيئة الوكيل، يتم استخدام سمات الأداة لتوليد وصف للأداة يتم تضمينه في موجه النظام الخاص بالوكيل. يتيح هذا للوكيل معرفة الأدوات التي يمكنه استخدامها ولماذا. + +### صندوق الأدوات الافتراضي + +يأتي Transformers مع صندوق أدوات افتراضي لتمكين الوكلاء، والذي يمكنك إضافته إلى وكيلك عند التهيئة باستخدام معامل `add_base_tools = True`: + +- **الإجابة على أسئلة المستند**: الإجابة على سؤال حول المستند (مثل ملف PDF) بتنسيق صورة ([Donut](./model_doc/donut)) +- **الإجابة على أسئلة الصور**: الإجابة على سؤال حول صورة ([VILT](./model_doc/vilt)) +- **التحدث إلى النص**: قم بتفريغ الكلام إلى نص ([Whisper](./model_doc/whisper)) +- **النص إلى كلام**: تحويل النص إلى كلام ([SpeechT5](./model_doc/speecht5)) +- **الترجمة**: ترجمة جملة معينة من لغة المصدر إلى لغة الهدف. +- **مفسر كود Python**: تشغيل كود Python الذي تم إنشاؤه بواسطة LLM في بيئة آمنة. لن يتم إضافة هذه الأداة إلى [`ReactJsonAgent`] إلا إذا استخدمت `add_base_tools=True`، نظرًا لأن الأدوات المستندة إلى التعليمات البرمجية يمكنها بالفعل تنفيذ كود Python +لا تترجم النصوص الخاصة ولا الأكواد البرمجية ولا الروابط ولا رموز HTML وCSS: + +يمكنك استخدام أداة يدويًا عن طريق استدعاء دالة [`load_tool`] وتحديد مهمة لتنفيذها. + +```python +from transformers import load_tool + +tool = load_tool("text-to-speech") +audio = tool("This is a text to speech tool") +``` + +### إنشاء أداة جديدة + +يمكنك إنشاء أداتك الخاصة لتغطية حالات الاستخدام التي لا تغطيها الأدوات الافتراضية من Hugging Face. +على سبيل المثال، دعنا نقوم بإنشاء أداة تعرض النموذج الأكثر تنزيلًا لمهمة معينة من Hub. + +سوف نبدأ بالكود التالي. + +```python +from huggingface_hub import list_models + +task = "text-classification" + +model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) +print(model.id) +``` + +يمكن تحويل هذه الشيفرة إلى فئة ترث من الفئة العليا [`Tool`]. + +تحتاج الأداة المخصصة إلى: + +- اسم `name`، والتي تمثل اسم الأداة نفسها. عادةً ما يصف الاسم وظيفتها. بما أن الكود يعيد النموذج الأكثر تنزيلًا لمهمة ما، فلنسمها `model_download_counter`. +- تستخدم خاصية `description` لملء موجه نظام الوكيل. +- خاصية `inputs`، والتي هي عبارة عن قاموس بمفاتيح "type" و"description". يحتوي على معلومات تساعد المفسر Python على اتخاذ خيارات مستنيرة بشأن المدخلات. +- خاصية `output_type`، والتي تحدد نوع المخرج. +- طريقة `forward` والتي تحتوي على الكود الذي سيتم تنفيذه للحصول على النتيجة النهائية. + +```python +from transformers import Tool +from huggingface_hub import list_models + +class HFModelDownloadsTool(Tool): + name = "model_download_counter" + description = ( + "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. " + "It returns the name of the checkpoint." + ) + + inputs = { + "task": { + "type": "text", + "description": "the task category (such as text-classification, depth-estimation, etc)", + } + } + output_type = "text" + + def forward(self, task: str): + model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) + return model.id +``` + +الآن بعد أن أصبحت فئة `HfModelDownloadsTool` المخصصة جاهزة، يمكنك حفظها في ملف باسم `model_downloads.py` واستيرادها للاستخدام. + +```python +from model_downloads import HFModelDownloadsTool + +tool = HFModelDownloadsTool() +``` + +يمكنك أيضًا مشاركة أداتك المخصصة في Hub عن طريق استدعاء [`~Tool.push_to_hub`] على الأداة. تأكد من أنك قمت بإنشاء مستودع لها على Hub وأنك تستخدم رمز وصول للقراءة. + +```python +tool.push_to_hub("{your_username}/hf-model-downloads") +``` + +قم بتحميل الأداة باستخدام دالة [`~Tool.load_tool`] ومررها إلى معلمة `tools` في الوكيل الخاص بك. + +```python +from transformers import load_tool, CodeAgent + +model_download_tool = load_tool("m-ric/hf-model-downloads") +agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine) +agent.run( + "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" +) +``` + +ستحصل على ما يلي: + +```text +======== New task ======== +Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub? +==== Agent is executing the code below: +most_downloaded_model = model_download_counter(task="text-to-video") +print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.") +==== +``` + +والناتج: + +`"النموذج الأكثر تنزيلًا لمهمة `text-to-video` هو ByteDance/AnimateDiff-Lightning."` + +### إدارة صندوق أدوات الوكيل الخاص بك + +إذا كنت قد قمت بتهيئة وكيل، فمن غير الملائم إعادة تهيئته من البداية لإضافة أداة جديدة ترغب في استخدامها. باستخدام مكتبة Transformers، يمكنك إدارة صندوق أدوات الوكيل بإضافة أو استبدال أداة موجودة. + +دعنا نضيف الأداة `model_download_tool` إلى وكيل تم تهيئته مسبقًا باستخدام صندوق الأدوات الافتراضي. + +```python +from transformers import CodeAgent + +agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) +agent.toolbox.add_tool(model_download_tool) +``` + +الآن يمكننا الاستفادة من الأداة الجديدة وأداة تحويل النص إلى كلام السابقة: + +```python + agent.run( + "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?" + ) +``` + +| **Audio** | +|------------------------------------------------------------------------------------------------------------------------------------------------------| +|