diff --git a/.circleci/config.yml b/.circleci/config.yml index 6558dc1454b273..ca2afc67c10e3e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,64 +34,44 @@ jobs: - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt - - store_artifacts: - path: ~/transformers/tests_fetched_summary.txt - - run: | - if [ -f test_list.txt ]; then - cp test_list.txt test_preparation/test_list.txt - else - touch test_preparation/test_list.txt - fi - - run: | - if [ -f examples_test_list.txt ]; then - mv examples_test_list.txt test_preparation/examples_test_list.txt - else - touch test_preparation/examples_test_list.txt - fi - - run: | - if [ -f filtered_test_list_cross_tests.txt ]; then - mv filtered_test_list_cross_tests.txt test_preparation/filtered_test_list_cross_tests.txt - else - touch test_preparation/filtered_test_list_cross_tests.txt - fi - - run: | - if [ -f doctest_list.txt ]; then - cp doctest_list.txt test_preparation/doctest_list.txt - else - touch test_preparation/doctest_list.txt - fi - - run: | - if [ -f test_repo_utils.txt ]; then - mv test_repo_utils.txt test_preparation/test_repo_utils.txt - else - touch test_preparation/test_repo_utils.txt - fi - run: python utils/tests_fetcher.py --filter_tests + - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation - run: | - if [ -f test_list.txt ]; then - mv test_list.txt test_preparation/filtered_test_list.txt - else - touch test_preparation/filtered_test_list.txt + if [ ! -s test_preparation/generated_config.yml ]; then + echo "No tests to run, exiting early!" + circleci-agent step halt fi + - store_artifacts: - path: test_preparation/test_list.txt - - store_artifacts: - path: test_preparation/doctest_list.txt - - store_artifacts: - path: ~/transformers/test_preparation/filtered_test_list.txt - - store_artifacts: - path: test_preparation/examples_test_list.txt - - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation - - run: | - if [ ! -s test_preparation/generated_config.yml ]; then - echo "No tests to run, exiting early!" - circleci-agent step halt - fi + path: test_preparation + + - run: + name: "Retrieve Artifact Paths" + # [reference] https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts + # `CIRCLE_TOKEN` is defined as an environment variables set within a context, see `https://circleci.com/docs/contexts/` + command: | + project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}" + job_number=${CIRCLE_BUILD_NUM} + url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts" + curl -o test_preparation/artifacts.json ${url} --header "Circle-Token: $CIRCLE_TOKEN" + - run: + name: "Prepare pipeline parameters" + command: | + python utils/process_test_artifacts.py + + # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters. + # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation. + # We used: + + # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts + # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job + - store_artifacts: - path: test_preparation/generated_config.yml + path: test_preparation/transformed_artifacts.json - store_artifacts: - path: test_preparation/filtered_test_list_cross_tests.txt + path: test_preparation/artifacts.json - continuation/continue: + parameters: test_preparation/transformed_artifacts.json configuration_path: test_preparation/generated_config.yml # To run all tests for the nightly build @@ -102,22 +82,49 @@ jobs: parallelism: 1 steps: - checkout - - run: uv pip install -e . - - run: | - mkdir test_preparation - echo -n "tests" > test_preparation/test_list.txt - echo -n "all" > test_preparation/examples_test_list.txt - echo -n "tests/repo_utils" > test_preparation/test_repo_utils.txt + - run: uv pip install -U -e . + - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" + - run: mkdir -p test_preparation + - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt + - run: python utils/tests_fetcher.py --filter_tests + - run: export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)" && echo $GIT_COMMIT_MESSAGE && python .circleci/create_circleci_config.py --fetcher_folder test_preparation - run: | - echo -n "tests" > test_list.txt - python utils/tests_fetcher.py --filter_tests - mv test_list.txt test_preparation/filtered_test_list.txt - - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation - - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt + if [ ! -s test_preparation/generated_config.yml ]; then + echo "No tests to run, exiting early!" + circleci-agent step halt + fi + + - store_artifacts: + path: test_preparation + + - run: + name: "Retrieve Artifact Paths" + env: + CIRCLE_TOKEN: ${{ secrets.CI_ARTIFACT_TOKEN }} + command: | + project_slug="gh/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}" + job_number=${CIRCLE_BUILD_NUM} + url="https://circleci.com/api/v2/project/${project_slug}/${job_number}/artifacts" + curl -o test_preparation/artifacts.json ${url} + - run: + name: "Prepare pipeline parameters" + command: | + python utils/process_test_artifacts.py + + # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters. + # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation. + # We used: + + # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts + # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job + - store_artifacts: - path: test_preparation/generated_config.txt + path: test_preparation/transformed_artifacts.json + - store_artifacts: + path: test_preparation/artifacts.json - continuation/continue: - configuration_path: test_preparation/generated_config.yml + parameters: test_preparation/transformed_artifacts.json + configuration_path: test_preparation/generated_config.yml check_code_quality: working_directory: ~/transformers @@ -130,7 +137,7 @@ jobs: parallelism: 1 steps: - checkout - - run: uv pip install -e . + - run: uv pip install -e ".[quality]" - run: name: Show installed libraries and their versions command: pip freeze | tee installed.txt @@ -155,13 +162,14 @@ jobs: parallelism: 1 steps: - checkout - - run: uv pip install -e . + - run: uv pip install -e ".[quality]" - run: name: Show installed libraries and their versions command: pip freeze | tee installed.txt - store_artifacts: path: ~/transformers/installed.txt - run: python utils/check_copies.py + - run: python utils/check_modular_conversion.py - run: python utils/check_table.py - run: python utils/check_dummies.py - run: python utils/check_repo.py @@ -183,7 +191,10 @@ workflows: - check_circleci_user - check_code_quality - check_repository_consistency - - fetch_tests + - fetch_tests: + # [reference] https://circleci.com/docs/contexts/ + context: + - TRANSFORMERS_CONTEXT nightly: when: <> diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index a7dd366389dc8f..7ccf5ec96cec4f 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -32,7 +32,7 @@ "RUN_PT_FLAX_CROSS_TESTS": False, } # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical -COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "v": None} +COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsf":None} DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}] @@ -50,16 +50,15 @@ def to_dict(self): class CircleCIJob: name: str additional_env: Dict[str, Any] = None - cache_name: str = None - cache_version: str = "0.8.2" docker_image: List[Dict[str, str]] = None install_steps: List[str] = None marker: Optional[str] = None - parallelism: Optional[int] = 1 + parallelism: Optional[int] = 0 pytest_num_workers: int = 12 pytest_options: Dict[str, Any] = None resource_class: Optional[str] = "2xlarge" tests_to_run: Optional[List[str]] = None + num_test_files_per_worker: Optional[int] = 10 # This should be only used for doctest job! command_timeout: Optional[int] = None @@ -67,8 +66,6 @@ def __post_init__(self): # Deal with defaults for mutable attributes. if self.additional_env is None: self.additional_env = {} - if self.cache_name is None: - self.cache_name = self.name if self.docker_image is None: # Let's avoid changing the default list and make a copy. self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE) @@ -79,156 +76,96 @@ def __post_init__(self): self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev" print(f"Using {self.docker_image} docker image") if self.install_steps is None: - self.install_steps = [] + self.install_steps = ["uv venv && uv pip install ."] if self.pytest_options is None: self.pytest_options = {} if isinstance(self.tests_to_run, str): self.tests_to_run = [self.tests_to_run] - if self.parallelism is None: - self.parallelism = 1 + else: + test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt") + print("Looking for ", test_file) + if os.path.exists(test_file): + with open(test_file) as f: + expanded_tests = f.read().strip().split("\n") + self.tests_to_run = expanded_tests + print("Found:", expanded_tests) + else: + self.tests_to_run = [] + print("not Found") def to_dict(self): env = COMMON_ENV_VARIABLES.copy() env.update(self.additional_env) - cache_branch_prefix = os.environ.get("CIRCLE_BRANCH", "pull") - if cache_branch_prefix != "main": - cache_branch_prefix = "pull" - job = { "docker": self.docker_image, "environment": env, } if self.resource_class is not None: job["resource_class"] = self.resource_class - if self.parallelism is not None: - job["parallelism"] = self.parallelism - steps = [ - "checkout", - {"attach_workspace": {"at": "test_preparation"}}, - ] - steps.extend([{"run": l} for l in self.install_steps]) - steps.append({"run": {"name": "Show installed libraries and their size", "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""}}) - steps.append({"run": {"name": "Show installed libraries and their versions", "command": """pip list --format=freeze | tee installed.txt || true"""}}) - - steps.append({"run":{"name":"Show biggest libraries","command":"""dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""}}) - steps.append({"store_artifacts": {"path": "installed.txt"}}) all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options} pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()] pytest_flags.append( f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}" ) - - steps.append({"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}}) - - # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues - if "examples" in self.name: - steps.append({"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """}}) - - test_command = "" - if self.command_timeout: - test_command = f"timeout {self.command_timeout} " - # junit familiy xunit1 is necessary to support splitting on test name or class name with circleci split - test_command += f"python3 -m pytest -rsfE -p no:warnings --tb=short -o junit_family=xunit1 --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags) - - if self.parallelism == 1: - if self.tests_to_run is None: - test_command += " << pipeline.parameters.tests_to_run >>" - else: - test_command += " " + " ".join(self.tests_to_run) - else: - # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime) - tests = self.tests_to_run - if tests is None: - folder = os.environ["test_preparation_dir"] - test_file = os.path.join(folder, "filtered_test_list.txt") - if os.path.exists(test_file): # We take this job's tests from the filtered test_list.txt - with open(test_file) as f: - tests = f.read().split(" ") - - # expand the test list - if tests == ["tests"]: - tests = [os.path.join("tests", x) for x in os.listdir("tests")] - expanded_tests = [] - for test in tests: - if test.endswith(".py"): - expanded_tests.append(test) - elif test == "tests/models": - if "tokenization" in self.name: - expanded_tests.extend(glob.glob("tests/models/**/test_tokenization*.py", recursive=True)) - elif self.name in ["flax","torch","tf"]: - name = self.name if self.name != "torch" else "" - if self.name == "torch": - all_tests = glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True) - filtered = [k for k in all_tests if ("_tf_") not in k and "_flax_" not in k] - expanded_tests.extend(filtered) - else: - expanded_tests.extend(glob.glob(f"tests/models/**/test_modeling_{name}*.py", recursive=True)) - else: - expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True)) - elif test == "tests/pipelines": - expanded_tests.extend(glob.glob("tests/models/**/test_modeling*.py", recursive=True)) - else: - expanded_tests.append(test) - tests = " ".join(expanded_tests) - - # Each executor to run ~10 tests - n_executors = max(len(expanded_tests) // 10, 1) - # Avoid empty test list on some executor(s) or launching too many executors - if n_executors > self.parallelism: - n_executors = self.parallelism - job["parallelism"] = n_executors - - # Need to be newline separated for the command `circleci tests split` below - command = f'echo {tests} | tr " " "\\n" >> tests.txt' - steps.append({"run": {"name": "Get tests", "command": command}}) - - command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt' - steps.append({"run": {"name": "Split tests", "command": command}}) - - steps.append({"store_artifacts": {"path": "tests.txt"}}) - steps.append({"store_artifacts": {"path": "splitted_tests.txt"}}) - - test_command += " $(cat splitted_tests.txt)" - if self.marker is not None: - test_command += f" -m {self.marker}" - - if self.name == "pr_documentation_tests": - # can't use ` | tee tee tests_output.txt` as usual - test_command += " > tests_output.txt" - # Save the return code, so we can check if it is timeout in the next step. - test_command += '; touch "$?".txt' - # Never fail the test step for the doctest job. We will check the results in the next step, and fail that - # step instead if the actual test failures are found. This is to avoid the timeout being reported as test - # failure. - test_command = f"({test_command}) || true" - else: - test_command = f"({test_command} | tee tests_output.txt)" - steps.append({"run": {"name": "Run tests", "command": test_command}}) - - steps.append({"run": {"name": "Skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}) - steps.append({"run": {"name": "Failed tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}) - steps.append({"run": {"name": "Errors", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}) - - steps.append({"store_test_results": {"path": "test-results"}}) - steps.append({"store_artifacts": {"path": "tests_output.txt"}}) - steps.append({"store_artifacts": {"path": "test-results/junit.xml"}}) - steps.append({"store_artifacts": {"path": "reports"}}) - + # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues + timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else "" + marker_cmd = f"-m '{self.marker}'" if self.marker is not None else "" + additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml" + parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> ' + steps = [ + "checkout", + {"attach_workspace": {"at": "test_preparation"}}, + {"run": "apt-get update && apt-get install -y curl"}, + {"run": " && ".join(self.install_steps)}, + {"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"}, + {"run": { + "name": "Show installed libraries and their size", + "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""} + }, + {"run": { + "name": "Show installed libraries and their versions", + "command": """pip list --format=freeze | tee installed.txt || true"""} + }, + {"run": { + "name": "Show biggest libraries", + "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""} + }, + {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}}, + {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <>' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}}, + {"run": {"name": "Split tests across parallel nodes: show current parallel tests", + "command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt" + } + }, + {"run": { + "name": "Run tests", + "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"} + }, + {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}, + {"run": {"name": "Failed tests: show reasons", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}, + {"run": {"name": "Errors", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}, + {"store_test_results": {"path": "test-results"}}, + {"store_artifacts": {"path": "test-results/junit.xml"}}, + {"store_artifacts": {"path": "reports"}}, + {"store_artifacts": {"path": "tests.txt"}}, + {"store_artifacts": {"path": "splitted_tests.txt"}}, + {"store_artifacts": {"path": "installed.txt"}}, + ] + if self.parallelism: + job["parallelism"] = parallel job["steps"] = steps return job @property def job_name(self): - return self.name if "examples" in self.name else f"tests_{self.name}" + return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}" # JOBS torch_and_tf_job = CircleCIJob( "torch_and_tf", docker_image=[{"image":"huggingface/transformers-torch-tf-light"}], - install_steps=["uv venv && uv pip install ."], additional_env={"RUN_PT_TF_CROSS_TESTS": True}, marker="is_pt_tf_cross_test", pytest_options={"rA": None, "durations": 0}, @@ -239,7 +176,6 @@ def job_name(self): "torch_and_flax", additional_env={"RUN_PT_FLAX_CROSS_TESTS": True}, docker_image=[{"image":"huggingface/transformers-torch-jax-light"}], - install_steps=["uv venv && uv pip install ."], marker="is_pt_flax_cross_test", pytest_options={"rA": None, "durations": 0}, ) @@ -247,35 +183,46 @@ def job_name(self): torch_job = CircleCIJob( "torch", docker_image=[{"image": "huggingface/transformers-torch-light"}], - install_steps=["uv venv && uv pip install ."], + marker="not generate", + parallelism=6, + pytest_num_workers=8 +) + +generate_job = CircleCIJob( + "generate", + docker_image=[{"image": "huggingface/transformers-torch-light"}], + marker="generate", parallelism=6, - pytest_num_workers=4 + pytest_num_workers=8 ) tokenization_job = CircleCIJob( "tokenization", docker_image=[{"image": "huggingface/transformers-torch-light"}], - install_steps=["uv venv && uv pip install ."], - parallelism=6, - pytest_num_workers=4 + parallelism=8, + pytest_num_workers=16 ) +processor_job = CircleCIJob( + "processors", + docker_image=[{"image": "huggingface/transformers-torch-light"}], + parallelism=8, + pytest_num_workers=6 +) tf_job = CircleCIJob( "tf", docker_image=[{"image":"huggingface/transformers-tf-light"}], - install_steps=["uv venv", "uv pip install -e."], parallelism=6, - pytest_num_workers=4, + pytest_num_workers=16, ) flax_job = CircleCIJob( "flax", docker_image=[{"image":"huggingface/transformers-jax-light"}], - install_steps=["uv venv && uv pip install ."], parallelism=6, - pytest_num_workers=4 + pytest_num_workers=16 ) @@ -283,8 +230,8 @@ def job_name(self): "pipelines_torch", additional_env={"RUN_PIPELINE_TESTS": True}, docker_image=[{"image":"huggingface/transformers-torch-light"}], - install_steps=["uv venv && uv pip install ."], marker="is_pipeline_test", + parallelism=4 ) @@ -292,8 +239,8 @@ def job_name(self): "pipelines_tf", additional_env={"RUN_PIPELINE_TESTS": True}, docker_image=[{"image":"huggingface/transformers-tf-light"}], - install_steps=["uv venv && uv pip install ."], marker="is_pipeline_test", + parallelism=4 ) @@ -301,34 +248,24 @@ def job_name(self): "custom_tokenizers", additional_env={"RUN_CUSTOM_TOKENIZERS": True}, docker_image=[{"image": "huggingface/transformers-custom-tokenizers"}], - install_steps=["uv venv","uv pip install -e ."], - parallelism=None, - resource_class=None, - tests_to_run=[ - "./tests/models/bert_japanese/test_tokenization_bert_japanese.py", - "./tests/models/openai/test_tokenization_openai.py", - "./tests/models/clip/test_tokenization_clip.py", - ], ) examples_torch_job = CircleCIJob( "examples_torch", additional_env={"OMP_NUM_THREADS": 8}, - cache_name="torch_examples", docker_image=[{"image":"huggingface/transformers-examples-torch"}], # TODO @ArthurZucker remove this once docker is easier to build install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"], - pytest_num_workers=1, + pytest_num_workers=8, ) examples_tensorflow_job = CircleCIJob( "examples_tensorflow", - cache_name="tensorflow_examples", + additional_env={"OMP_NUM_THREADS": 8}, docker_image=[{"image":"huggingface/transformers-examples-tf"}], - install_steps=["uv venv && uv pip install . && uv pip install -r examples/tensorflow/_tests_requirements.txt"], - parallelism=8 + pytest_num_workers=16, ) @@ -337,12 +274,12 @@ def job_name(self): additional_env={"HUGGINGFACE_CO_STAGING": True}, docker_image=[{"image":"huggingface/transformers-torch-light"}], install_steps=[ - "uv venv && uv pip install .", + 'uv venv && uv pip install .', 'git config --global user.email "ci@dummy.com"', 'git config --global user.name "ci"', ], marker="is_staging_test", - pytest_num_workers=1, + pytest_num_workers=2, ) @@ -350,8 +287,7 @@ def job_name(self): "onnx", docker_image=[{"image":"huggingface/transformers-torch-tf-light"}], install_steps=[ - "uv venv && uv pip install .", - "uv pip install --upgrade eager pip", + "uv venv", "uv pip install .[torch,tf,testing,sentencepiece,onnxruntime,vision,rjieba]", ], pytest_options={"k onnx": None}, @@ -361,15 +297,7 @@ def job_name(self): exotic_models_job = CircleCIJob( "exotic_models", - install_steps=["uv venv && uv pip install ."], docker_image=[{"image":"huggingface/transformers-exotic-models"}], - tests_to_run=[ - "tests/models/*layoutlmv*", - "tests/models/*nat", - "tests/models/deta", - "tests/models/udop", - "tests/models/nougat", - ], pytest_num_workers=12, parallelism=4, pytest_options={"durations": 100}, @@ -379,11 +307,17 @@ def job_name(self): repo_utils_job = CircleCIJob( "repo_utils", docker_image=[{"image":"huggingface/transformers-consistency"}], - install_steps=["uv venv && uv pip install ."], - parallelism=None, - pytest_num_workers=1, + pytest_num_workers=4, resource_class="large", - tests_to_run="tests/repo_utils", +) + + +non_model_job = CircleCIJob( + "non_model", + docker_image=[{"image": "huggingface/transformers-torch-light"}], + marker="not generate", + parallelism=6, + pytest_num_workers=8, ) @@ -392,28 +326,18 @@ def job_name(self): # the bash output redirection.) py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)' py_command = f"$(python3 -c '{py_command}')" -command = f'echo "{py_command}" > pr_documentation_tests_temp.txt' +command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt' doc_test_job = CircleCIJob( "pr_documentation_tests", docker_image=[{"image":"huggingface/transformers-consistency"}], additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"}, install_steps=[ # Add an empty file to keep the test step running correctly even no file is selected to be tested. + "uv venv && pip install .", "touch dummy.py", - { - "name": "Get files to test", - "command": command, - }, - { - "name": "Show information in `Get files to test`", - "command": - "cat pr_documentation_tests_temp.txt" - }, - { - "name": "Get the last line in `pr_documentation_tests.txt`", - "command": - "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests.txt" - }, + command, + "cat pr_documentation_tests_temp.txt", + "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt" ], tests_to_run="$(cat pr_documentation_tests.txt)", # noqa pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None}, @@ -421,121 +345,37 @@ def job_name(self): pytest_num_workers=1, ) -REGULAR_TESTS = [ - torch_and_tf_job, - torch_and_flax_job, - torch_job, - tf_job, - flax_job, - custom_tokenizers_job, - hub_job, - onnx_job, - exotic_models_job, - tokenization_job -] -EXAMPLES_TESTS = [ - examples_torch_job, - examples_tensorflow_job, -] -PIPELINE_TESTS = [ - pipelines_torch_job, - pipelines_tf_job, -] +REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip +EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job] +PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job] REPO_UTIL_TESTS = [repo_utils_job] DOC_TESTS = [doc_test_job] - +ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip def create_circleci_config(folder=None): if folder is None: folder = os.getcwd() - # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism) os.environ["test_preparation_dir"] = folder - jobs = [] - all_test_file = os.path.join(folder, "test_list.txt") - if os.path.exists(all_test_file): - with open(all_test_file) as f: - all_test_list = f.read() - else: - all_test_list = [] - if len(all_test_list) > 0: - jobs.extend(PIPELINE_TESTS) - - test_file = os.path.join(folder, "filtered_test_list.txt") - if os.path.exists(test_file): - with open(test_file) as f: - test_list = f.read() - else: - test_list = [] - if len(test_list) > 0: - jobs.extend(REGULAR_TESTS) - - extended_tests_to_run = set(test_list.split()) - # Extend the test files for cross test jobs - for job in jobs: - if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]: - for test_path in copy.copy(extended_tests_to_run): - dir_path, fn = os.path.split(test_path) - if fn.startswith("test_modeling_tf_"): - fn = fn.replace("test_modeling_tf_", "test_modeling_") - elif fn.startswith("test_modeling_flax_"): - fn = fn.replace("test_modeling_flax_", "test_modeling_") - else: - if job.job_name == "test_torch_and_tf": - fn = fn.replace("test_modeling_", "test_modeling_tf_") - elif job.job_name == "test_torch_and_flax": - fn = fn.replace("test_modeling_", "test_modeling_flax_") - new_test_file = str(os.path.join(dir_path, fn)) - if os.path.isfile(new_test_file): - if new_test_file not in extended_tests_to_run: - extended_tests_to_run.add(new_test_file) - extended_tests_to_run = sorted(extended_tests_to_run) - for job in jobs: - if job.job_name in ["tests_torch_and_tf", "tests_torch_and_flax"]: - job.tests_to_run = extended_tests_to_run - fn = "filtered_test_list_cross_tests.txt" - f_path = os.path.join(folder, fn) - with open(f_path, "w") as fp: - fp.write(" ".join(extended_tests_to_run)) - - example_file = os.path.join(folder, "examples_test_list.txt") - if os.path.exists(example_file) and os.path.getsize(example_file) > 0: - with open(example_file, "r", encoding="utf-8") as f: - example_tests = f.read() - for job in EXAMPLES_TESTS: - framework = job.name.replace("examples_", "").replace("torch", "pytorch") - if example_tests == "all": - job.tests_to_run = [f"examples/{framework}"] - else: - job.tests_to_run = [f for f in example_tests.split(" ") if f.startswith(f"examples/{framework}")] - - if len(job.tests_to_run) > 0: - jobs.append(job) - - doctest_file = os.path.join(folder, "doctest_list.txt") - if os.path.exists(doctest_file): - with open(doctest_file) as f: - doctest_list = f.read() - else: - doctest_list = [] - if len(doctest_list) > 0: - jobs.extend(DOC_TESTS) - - repo_util_file = os.path.join(folder, "test_repo_utils.txt") - if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0: - jobs.extend(REPO_UTIL_TESTS) + jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )] + print("The following jobs will be run ", jobs) if len(jobs) == 0: jobs = [EmptyJob()] - config = {"version": "2.1"} - config["parameters"] = { - # Only used to accept the parameters from the trigger - "nightly": {"type": "boolean", "default": False}, - "tests_to_run": {"type": "string", "default": test_list}, + print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}) + config = { + "version": "2.1", + "parameters": { + # Only used to accept the parameters from the trigger + "nightly": {"type": "boolean", "default": False}, + "tests_to_run": {"type": "string", "default": ''}, + **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}, + **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs}, + }, + "jobs" : {j.job_name: j.to_dict() for j in jobs}, + "workflows": {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} } - config["jobs"] = {j.job_name: j.to_dict() for j in jobs} - config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} with open(os.path.join(folder, "generated_config.yml"), "w") as f: - f.write(yaml.dump(config, indent=2, width=1000000, sort_keys=False)) + f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>")) if __name__ == "__main__": diff --git a/.circleci/parse_test_outputs.py b/.circleci/parse_test_outputs.py index b80ce8513a1f91..a69da1a3eafb27 100644 --- a/.circleci/parse_test_outputs.py +++ b/.circleci/parse_test_outputs.py @@ -67,4 +67,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 9a1103b8af3d01..00000000000000 --- a/.coveragerc +++ /dev/null @@ -1,12 +0,0 @@ -[run] -source=transformers -omit = - # skip convertion scripts from testing for now - */convert_* - */__main__.py -[report] -exclude_lines = - pragma: no cover - raise - except - register_parameter \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 7415ca71d46640..ea7d6a02252cf5 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -37,17 +37,17 @@ body: Models: - text models: @ArthurZucker - - vision models: @amyeroberts - - speech models: @sanchit-gandhi + - vision models: @amyeroberts, @qubvel + - speech models: @ylacombe, @eustlb - graph models: @clefourrier Library: - flax: @sanchit-gandhi - generate: @zucchini-nlp (visual-language models) or @gante (all others) - - pipelines: @Narsil + - pipelines: @Rocketknight1 - tensorflow: @gante and @Rocketknight1 - - tokenizers: @ArthurZucker + - tokenizers: @ArthurZucker and @itazap - trainer: @muellerzr @SunMarc Integrations: diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index cf638dc5925544..417f5a2e45b58c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -40,18 +40,19 @@ members/contributors who may be interested in your PR. Models: - text models: @ArthurZucker -- vision models: @amyeroberts -- speech models: @sanchit-gandhi +- vision models: @amyeroberts, @qubvel +- speech models: @ylacombe, @eustlb - graph models: @clefourrier Library: - flax: @sanchit-gandhi - generate: @zucchini-nlp (visual-language models) or @gante (all others) -- pipelines: @Narsil +- pipelines: @Rocketknight1 - tensorflow: @gante and @Rocketknight1 - tokenizers: @ArthurZucker - trainer: @muellerzr and @SunMarc +- chat templates: @Rocketknight1 Integrations: diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index df772db773e262..c21faf2d747942 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -20,7 +20,8 @@ concurrency: jobs: latest-docker: name: "Latest PyTorch + TensorFlow [dev]" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -68,7 +69,8 @@ jobs: latest-torch-deepspeed-docker: name: "Latest PyTorch + DeepSpeed" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -104,7 +106,8 @@ jobs: # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`) latest-torch-deepspeed-docker-for-push-ci-daily-build: name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -145,7 +148,8 @@ jobs: name: "Doc builder" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -180,7 +184,8 @@ jobs: name: "Latest PyTorch [dev]" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -215,7 +220,8 @@ jobs: latest-pytorch-amd: name: "Latest PyTorch (AMD) [dev]" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -265,7 +271,8 @@ jobs: name: "Latest TensorFlow [dev]" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -300,7 +307,8 @@ jobs: latest-pytorch-deepspeed-amd: name: "PyTorch + DeepSpeed (AMD) [dev]" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -350,7 +358,8 @@ jobs: name: "Latest Pytorch + Quantization [dev]" # Push CI doesn't need this image if: inputs.image_postfix != '-push-ci' - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build-nightly-ci-docker-images.yml b/.github/workflows/build-nightly-ci-docker-images.yml index 0b1b7df5f8a2ed..4b00a6d3fae366 100644 --- a/.github/workflows/build-nightly-ci-docker-images.yml +++ b/.github/workflows/build-nightly-ci-docker-images.yml @@ -13,7 +13,8 @@ concurrency: jobs: latest-with-torch-nightly-docker: name: "Nightly PyTorch + Stable TensorFlow" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -40,7 +41,8 @@ jobs: nightly-torch-deepspeed-docker: name: "Nightly PyTorch + DeepSpeed" - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -62,4 +64,4 @@ jobs: build-args: | REF=main push: true - tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu \ No newline at end of file + tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml index 6ee60b8a6b60f2..c4f0b78986caea 100644 --- a/.github/workflows/build-past-ci-docker-images.yml +++ b/.github/workflows/build-past-ci-docker-images.yml @@ -16,7 +16,8 @@ jobs: fail-fast: false matrix: version: ["1.13", "1.12", "1.11"] - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx @@ -60,7 +61,8 @@ jobs: fail-fast: false matrix: version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"] - runs-on: [intel-cpu, 8-cpu, ci] + runs-on: + group: aws-general-8-plus steps: - name: Set up Docker Buildx diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml index e3e3b5f2df37f1..c55638ded1497c 100644 --- a/.github/workflows/build_documentation.yml +++ b/.github/workflows/build_documentation.yml @@ -1,6 +1,7 @@ name: Build documentation on: + workflow_dispatch: push: branches: - main @@ -15,7 +16,7 @@ jobs: commit_sha: ${{ github.sha }} package: transformers notebook_folder: transformers_doc - languages: de en es fr hi it ko pt tr zh ja te + languages: ar de en es fr hi it ko pt tr zh ja te custom_container: huggingface/transformers-doc-builder secrets: token: ${{ secrets.HUGGINGFACE_PUSH }} diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index c8d073ea34688f..f698f860b2f93c 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -14,5 +14,5 @@ jobs: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: transformers - languages: de en es fr hi it ko pt tr zh ja te + languages: ar de en es fr hi it ko pt tr zh ja te custom_container: huggingface/transformers-doc-builder diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml index 454d03f4245681..001e2c531d9bc8 100644 --- a/.github/workflows/model_jobs.yml +++ b/.github/workflows/model_jobs.yml @@ -41,7 +41,8 @@ jobs: fail-fast: false matrix: folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }} - runs-on: ['${{ inputs.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] + runs-on: + group: '${{ inputs.machine_type }}' container: image: ${{ inputs.docker }} options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -97,25 +98,42 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ inputs.machine_type }}" + + if [ "${{ inputs.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ inputs.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all tests on GPU working-directory: /transformers - run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} + run: python3 -m pytest -rsfE -v --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - name: Run test shell: bash run: | - mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports - echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt - echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" + mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" - - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports diff --git a/.github/workflows/model_jobs_amd.yml b/.github/workflows/model_jobs_amd.yml new file mode 100644 index 00000000000000..a7e6c7b1ccd576 --- /dev/null +++ b/.github/workflows/model_jobs_amd.yml @@ -0,0 +1,129 @@ +name: model jobs + +on: + workflow_call: + inputs: + folder_slices: + required: true + type: string + machine_type: + required: true + type: string + slice_id: + required: true + type: number + runner: + required: true + type: string + docker: + required: true + type: string + +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes + # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. + # This token is created under the bot `hf-transformers-bot`. + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true + RUN_PT_TF_CROSS_TESTS: 1 + CUDA_VISIBLE_DEVICES: 0,1 + +jobs: + run_models_gpu: + name: " " + strategy: + max-parallel: 1 # For now, not to parallelize. Can change later if it works well. + fail-fast: false + matrix: + folders: ${{ fromJson(inputs.folder_slices)[inputs.slice_id] }} + runs-on: ['${{ inputs.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] + container: + image: ${{ inputs.docker }} + options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: Echo input and matrix info + shell: bash + run: | + echo "${{ inputs.folder_slices }}" + echo "${{ matrix.folders }}" + echo "${{ toJson(fromJson(inputs.folder_slices)[inputs.slice_id]) }}" + + - name: Echo folder ${{ matrix.folders }} + shell: bash + # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to + # set the artifact folder names (because the character `/` is not allowed). + run: | + echo "${{ matrix.folders }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'models/'/'models_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: Update / Install some packages (for Past CI) + if: ${{ contains(inputs.docker, '-past-') }} + working-directory: /transformers + run: | + python3 -m pip install -U datasets + + - name: Update / Install some packages (for Past CI) + if: ${{ contains(inputs.docker, '-past-') && contains(inputs.docker, '-pytorch-') }} + working-directory: /transformers + run: | + python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate + + - name: ROCM-SMI + run: | + rocm-smi + + - name: ROCM-INFO + run: | + rocminfo | grep "Agent" -A 14 + + - name: Show ROCR environment + run: | + echo "ROCR: $ROCR_VISIBLE_DEVICES" + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run all tests on GPU + working-directory: /transformers + run: python3 -m pytest -rsfE -v --make-reports=${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test" + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + + - name: Run test + shell: bash + run: | + mkdir -p /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + echo "hello" > /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt + echo "${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" + + - name: "Test suite reports artifacts: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ inputs.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 6d046b147830f3..6931c2f3eadcad 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -64,23 +64,24 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} test_map: ${{ steps.set-matrix.outputs.test_map }} + env: + # `CI_BRANCH_PUSH`: The branch name from the push event + # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event + # `CI_SHA_PUSH`: The commit SHA from the push event + # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables shell: bash - # `CI_BRANCH_PUSH`: The branch name from the push event - # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) - # `CI_SHA_PUSH`: The commit SHA from the push event - # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -159,6 +160,12 @@ jobs: container: image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -166,11 +173,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -256,6 +259,12 @@ jobs: # run_tests_torch_cuda_extensions_single_gpu, # run_tests_torch_cuda_extensions_multi_gpu ] + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: - name: Preliminary job status shell: bash @@ -271,11 +280,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index 31f68c291b5a0f..b328f65d34a5fe 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -40,23 +40,24 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} test_map: ${{ steps.set-matrix.outputs.test_map }} + env: + # `CI_BRANCH_PUSH`: The branch name from the push event + # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event + # `CI_SHA_PUSH`: The commit SHA from the push event + # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) - name: Prepare custom environment variables shell: bash - # `CI_BRANCH_PUSH`: The branch name from the push event - # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) - # `CI_SHA_PUSH`: The commit SHA from the push event - # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -135,6 +136,12 @@ jobs: container: image: huggingface/transformers-all-latest-gpu-push-ci options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -142,11 +149,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -228,6 +231,12 @@ jobs: container: image: huggingface/transformers-all-latest-gpu-push-ci options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -235,11 +244,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -321,6 +326,12 @@ jobs: container: image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -328,11 +339,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -411,6 +418,12 @@ jobs: container: image: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: # Necessary to get the correct branch name and commit SHA for `workflow_run` event # We also take into account the `push` event (we might want to test some changes in a branch) @@ -418,11 +431,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH @@ -500,6 +509,12 @@ jobs: run_tests_torch_cuda_extensions_single_gpu, run_tests_torch_cuda_extensions_multi_gpu ] + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} steps: - name: Preliminary job status shell: bash @@ -513,11 +528,7 @@ jobs: shell: bash # For the meaning of these environment variables, see the job `Setup` run: | - CI_BRANCH_PUSH=${{ github.event.ref }} CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - CI_BRANCH_WORKFLOW_RUN=${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH=${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN=${{ github.event.workflow_run.head_sha }} echo $CI_BRANCH_PUSH echo $CI_BRANCH_WORKFLOW_RUN echo $CI_SHA_PUSH diff --git a/.github/workflows/self-scheduled-amd-mi210-caller.yml b/.github/workflows/self-scheduled-amd-mi210-caller.yml index 6abba6894aaffa..1c79b38a314e0b 100644 --- a/.github/workflows/self-scheduled-amd-mi210-caller.yml +++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml @@ -10,11 +10,46 @@ on: - run_amd_scheduled_ci_caller* jobs: - run_amd_ci: - name: AMD mi210 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) + model-ci: + name: Model CI uses: ./.github/workflows/self-scheduled-amd.yml with: - gpu_flavor: mi210 + job: run_models_gpu slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + torch-pipeline: + name: Torch pipeline CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + example-ci: + name: Example CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-deepspeed-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 secrets: inherit diff --git a/.github/workflows/self-scheduled-amd-mi250-caller.yml b/.github/workflows/self-scheduled-amd-mi250-caller.yml index 36365d4a67f1e2..fd151305716396 100644 --- a/.github/workflows/self-scheduled-amd-mi250-caller.yml +++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml @@ -10,11 +10,46 @@ on: - run_amd_scheduled_ci_caller* jobs: - run_amd_ci: - name: AMD mi250 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_scheduled_ci_caller'))) + model-ci: + name: Model CI uses: ./.github/workflows/self-scheduled-amd.yml with: - gpu_flavor: mi250 + job: run_models_gpu slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + torch-pipeline: + name: Torch pipeline CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + example-ci: + name: Example CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: ./.github/workflows/self-scheduled-amd.yml + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-deepspeed-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 secrets: inherit diff --git a/.github/workflows/self-scheduled-amd-mi300-caller.yml b/.github/workflows/self-scheduled-amd-mi300-caller.yml deleted file mode 100644 index a9e7b934c34b77..00000000000000 --- a/.github/workflows/self-scheduled-amd-mi300-caller.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Self-hosted runner (AMD mi300 scheduled CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (AMD scheduled CI caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_scheduled_ci_caller* - -jobs: - run_amd_ci: - name: AMD mi300 - needs: build-docker-containers - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci')))) - uses: ./.github/workflows/self-scheduled-amd.yml - with: - gpu_flavor: mi300 - slack_report_channel: "#transformers-ci-daily-amd" - secrets: inherit diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml index f3b17bfbffb022..47f92cd6a2b086 100644 --- a/.github/workflows/self-scheduled-amd.yml +++ b/.github/workflows/self-scheduled-amd.yml @@ -3,10 +3,23 @@ name: Self-hosted runner (scheduled-amd) # Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the # CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes # us towards the limit of allowed jobs on GitHub Actions. + on: workflow_call: inputs: - gpu_flavor: + job: + required: true + type: string + slack_report_channel: + required: true + type: string + runner: + required: true + type: string + docker: + required: true + type: string + ci_event: required: true type: string @@ -18,7 +31,7 @@ env: RUN_SLOW: yes HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} - + NUM_SLICES: 2 # Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running. # This is done so that we avoid parallelizing the scheduled tests, to leave available @@ -42,7 +55,7 @@ jobs: strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: image: huggingface/transformers-pytorch-amd-gpu options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -50,25 +63,29 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" setup: + if: contains(fromJSON('["run_models_gpu"]'), inputs.job) name: Setup needs: check_runners strategy: matrix: machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: image: huggingface/transformers-pytorch-amd-gpu options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} + folder_slices: ${{ steps.set-matrix.outputs.folder_slices }} + slice_ids: ${{ steps.set-matrix.outputs.slice_ids }} steps: - name: Update clone working-directory: /transformers @@ -90,7 +107,8 @@ jobs: name: Identify models to test working-directory: /transformers/tests run: | - echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT + echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT + echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - name: ROCM-SMI run: | @@ -99,6 +117,7 @@ jobs: - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -108,99 +127,38 @@ jobs: run: | python3 utils/print_env.py - run_models_gpu_single_gpu: + run_models_gpu: + if: ${{ inputs.job == 'run_models_gpu' }} name: Single GPU tests + needs: setup strategy: max-parallel: 1 # For now, not to parallelize. Can change later if it works well. fail-fast: false matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [single-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: ROCM-SMI - run: | - rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + machine_type: [single-gpu, multi-gpu] + slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} + uses: ./.github/workflows/model_jobs_amd.yml + with: + folder_slices: ${{ needs.setup.outputs.folder_slices }} + machine_type: ${{ matrix.machine_type }} + slice_id: ${{ matrix.slice_id }} + runner: ${{ inputs.runner }} + docker: ${{ inputs.docker }} + secrets: inherit - run_models_gpu_multi_gpu: - name: Multi GPU tests + run_pipelines_torch_gpu: + if: ${{ inputs.job == 'run_pipelines_torch_gpu' }} + name: PyTorch pipelines + needs: check_runners strategy: - max-parallel: 1 fail-fast: false matrix: - folders: ${{ fromJson(needs.setup.outputs.matrix) }} - machine_type: [multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + machine_type: [single-gpu, multi-gpu] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: ${{ inputs.docker }} options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup steps: - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - name: Update clone working-directory: /transformers run: git fetch && git checkout ${{ github.sha }} @@ -212,9 +170,11 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -228,33 +188,35 @@ jobs: working-directory: /transformers run: pip freeze - - name: Run all tests on GPU + - name: Run all pipeline tests on GPU working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} -m "not not_device_test" + run: | + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test" - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports run_examples_gpu: - name: Examples tests + if: ${{ inputs.job == 'run_examples_gpu' }} + name: Examples directory + needs: check_runners strategy: fail-fast: false matrix: machine_type: [single-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: - image: huggingface/transformers-pytorch-amd-gpu + image: ${{ inputs.docker }} options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup steps: - name: Update clone working-directory: /transformers @@ -267,9 +229,11 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment run: | echo "ROCR: $ROCR_VISIBLE_DEVICES" @@ -301,73 +265,17 @@ jobs: name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports - run_pipelines_torch_gpu: - name: PyTorch pipelines tests - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - needs: setup - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: ROCM-SMI - run: | - rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all pipeline tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - run_torch_cuda_extensions_gpu: + if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} name: Torch ROCm deepspeed tests + needs: check_runners strategy: fail-fast: false matrix: machine_type: [single-gpu, multi-gpu] - - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - needs: setup + runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] container: - image: huggingface/transformers-pytorch-deepspeed-amd-gpu + image: ${{ inputs.docker }} options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ steps: - name: Update clone @@ -381,6 +289,7 @@ jobs: - name: ROCM-SMI run: | rocm-smi + - name: ROCM-INFO run: | rocminfo | grep "Agent" -A 14 @@ -414,107 +323,27 @@ jobs: name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - run_extract_warnings: - name: Extract warnings in CI artifacts - runs-on: ubuntu-22.04 - if: always() - needs: [ - check_runner_status, - check_runners, - setup, - run_models_gpu_single_gpu, - run_models_gpu_multi_gpu, - run_examples_gpu, - run_pipelines_torch_gpu, - run_torch_cuda_extensions_gpu - ] - steps: - - name: Checkout transformers - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - - name: Install transformers - run: pip install transformers - - - name: Show installed libraries and their versions - run: pip freeze - - - name: Create output directory - run: mkdir warnings_in_ci - - - uses: actions/download-artifact@v4 - with: - path: warnings_in_ci - - - name: Show artifacts - run: echo "$(python3 -c 'import os; d = os.listdir(); print(d)')" - working-directory: warnings_in_ci - - - name: Extract warnings in CI artifacts - run: | - python3 utils/extract_warnings.py --workflow_run_id ${{ github.run_id }} --output_dir warnings_in_ci --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} --from_gh - echo "$(python3 -c 'import os; import json; fp = open("warnings_in_ci/selected_warnings.json"); d = json.load(fp); d = "\n".join(d) ;print(d)')" - - - name: Upload artifact - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: warnings_in_ci - path: warnings_in_ci/selected_warnings.json - send_results: - name: Send results to webhook - runs-on: ubuntu-22.04 - if: always() + name: Slack Report needs: [ check_runner_status, check_runners, setup, - run_models_gpu_single_gpu, - run_models_gpu_multi_gpu, - run_examples_gpu, + run_models_gpu, run_pipelines_torch_gpu, - run_torch_cuda_extensions_gpu, - run_extract_warnings + run_examples_gpu, + run_torch_cuda_extensions_gpu ] - steps: - - name: Preliminary job status - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - echo "Runner availability: ${{ needs.check_runner_status.result }}" - echo "Runner status: ${{ needs.check_runners.result }}" - echo "Setup status: ${{ needs.setup.result }}" - - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID_DAILY_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY_AMD }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Scheduled CI (AMD) - ${{ inputs.gpu_flavor }} - CI_SHA: ${{ github.sha }} - CI_WORKFLOW_REF: ${{ github.workflow_ref }} - RUNNER_STATUS: ${{ needs.check_runner_status.result }} - RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} - SETUP_STATUS: ${{ needs.setup.result }} - # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - sudo apt-get install -y curl - pip install huggingface_hub - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" - - # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - - name: Failure table artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: test_failure_tables - path: test_failure_tables + if: ${{ always() }} + uses: ./.github/workflows/slack-report.yml + with: + job: ${{ inputs.job }} + # This would be `skipped` if `setup` is skipped. + setup_status: ${{ needs.setup.result }} + slack_report_channel: ${{ inputs.slack_report_channel }} + # This would be an empty string if `setup` is skipped. + folder_slices: ${{ needs.setup.outputs.folder_slices }} + quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }} + ci_event: ${{ inputs.ci_event }} + + secrets: inherit diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index b056759aa77379..1a6f4a485430d4 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -50,8 +50,9 @@ jobs: name: Setup strategy: matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-all-latest-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -83,7 +84,7 @@ jobs: run: | echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - + - id: set-matrix-quantization if: ${{ inputs.job == 'run_quantization_torch_gpu' }} name: Identify quantization method to test @@ -102,7 +103,7 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} uses: ./.github/workflows/model_jobs.yml with: @@ -119,8 +120,9 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-pytorch-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -146,22 +148,39 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports + name: ${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_pipelines_torch_gpu_test_reports run_pipelines_tf_gpu: if: ${{ inputs.job == 'run_pipelines_tf_gpu' }} @@ -169,8 +188,9 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-tensorflow-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -197,22 +217,39 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all pipeline tests on GPU working-directory: /transformers run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines + python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports tests/pipelines - name: Failure short reports if: ${{ always() }} run: | - cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt + cat /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_tf_gpu_test_reports + name: ${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_pipelines_tf_gpu_test_reports run_examples_gpu: if: ${{ inputs.job == 'run_examples_gpu' }} @@ -220,8 +257,9 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] + machine_type: [aws-g4dn-2xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-all-latest-gpu options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -247,23 +285,40 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run examples tests on GPU working-directory: /transformers run: | pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_examples_gpu_test_reports examples/pytorch - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_examples_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports + name: ${{ env.machine_type }}_run_examples_gpu_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_examples_gpu_test_reports run_torch_cuda_extensions_gpu: if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} @@ -271,8 +326,9 @@ jobs: strategy: fail-fast: false matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: ${{ inputs.docker }} options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -326,22 +382,39 @@ jobs: working-directory: ${{ inputs.working-directory-prefix }}/transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run all tests on GPU working-directory: ${{ inputs.working-directory-prefix }}/transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt + run: cat ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + name: ${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports + path: ${{ inputs.working-directory-prefix }}/transformers/reports/${{ env.machine_type }}_run_torch_cuda_extensions_gpu_test_reports run_quantization_torch_gpu: if: ${{ inputs.job == 'run_quantization_torch_gpu' }} @@ -352,8 +425,9 @@ jobs: fail-fast: false matrix: folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }} - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, '${{ inputs.runner }}'] + machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] + runs-on: + group: '${{ matrix.machine_type }}' container: image: huggingface/transformers-quantization-latest-gpu options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ @@ -388,22 +462,39 @@ jobs: working-directory: /transformers run: pip freeze + - name: Set `machine_type` for report and artifact names + working-directory: /transformers + shell: bash + run: | + echo "${{ matrix.machine_type }}" + + if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then + machine_type=single-gpu + elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then + machine_type=multi-gpu + else + machine_type=${{ matrix.machine_type }} + fi + + echo "$machine_type" + echo "machine_type=$machine_type" >> $GITHUB_ENV + - name: Run quantization tests on GPU working-directory: /transformers run: | - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} + python3 -m pytest -v --make-reports=${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} - name: Failure short reports if: ${{ failure() }} continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + run: cat /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports" + - name: "Test suite reports artifacts: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v4 with: - name: ${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports + name: ${{ env.machine_type }}_run_quantization_torch_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ env.machine_type }}_run_quantization_torch_gpu_${{ matrix.folders }}_test_reports run_extract_warnings: # Let's only do this for the job `run_models_gpu` to simplify the (already complex) logic. diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml index 7b47c0f437fa85..db649876f60492 100644 --- a/.github/workflows/ssh-runner.yml +++ b/.github/workflows/ssh-runner.yml @@ -53,11 +53,33 @@ jobs: - name: NVIDIA-SMI run: | nvidia-smi - + + - name: Store Slack infos + #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step + shell: bash + run: | + echo "${{ github.actor }}" + github_actor=${{ github.actor }} + github_actor=${github_actor/'-'/'_'} + echo "$github_actor" + echo "github_actor=$github_actor" >> $GITHUB_ENV + + - name: Store Slack infos + #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step + shell: bash + run: | + echo "${{ env.github_actor }}" + if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then + echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV + else + echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV + fi + - name: Tailscale # In order to be able to SSH when a test fails uses: huggingface/tailscale-action@main with: authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }} - slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} + slackChannel: ${{ env.SLACKCHANNEL }} slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} waitForSSH: true + sshTimeout: 15m diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index d0dfeb8b4b7129..65eaf755ab3a69 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -9,6 +9,8 @@ jobs: name: Close Stale Issues if: github.repository == 'huggingface/transformers' runs-on: ubuntu-22.04 + permissions: + issues: write env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: diff --git a/Makefile b/Makefile index cfa40b7bd6ee6e..710c555b74f6df 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ autogenerate_code: deps_table_update repo-consistency: python utils/check_copies.py + python utils/check_modular_conversion.py python utils/check_table.py python utils/check_dummies.py python utils/check_repo.py @@ -53,7 +54,6 @@ quality: @python -c "from transformers import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1) ruff check $(check_dirs) setup.py conftest.py ruff format --check $(check_dirs) setup.py conftest.py - python utils/custom_init_isort.py --check_only python utils/sort_auto_mappings.py --check_only python utils/check_doc_toc.py python utils/check_docstrings.py --check_all @@ -62,7 +62,6 @@ quality: # Format source code automatically and check is there are any problems left that need manual fixing extra_style_checks: - python utils/custom_init_isort.py python utils/sort_auto_mappings.py python utils/check_doc_toc.py --fix_and_overwrite @@ -82,6 +81,7 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency fix-copies: python utils/check_copies.py --fix_and_overwrite + python utils/check_modular_conversion.py --fix_and_overwrite python utils/check_table.py --fix_and_overwrite python utils/check_dummies.py --fix_and_overwrite python utils/check_doctest_list.py --fix_and_overwrite diff --git a/README.md b/README.md index f80835534496e3..a2325ae037624e 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,8 @@ limitations under the License. Français | Deutsch | Tiếng Việt | - العربية | + العربية | + اردو |

diff --git a/SECURITY.md b/SECURITY.md index fcb8b9b6f18f28..431b17a85042dc 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -36,5 +36,4 @@ Please inspect the code of the tools before passing them to the Agent to protect ## Reporting a Vulnerability -🤗 Please feel free to submit vulnerability reports to our private bug bounty program at https://hackerone.com/hugging_face. You'll need to request access to the program by emailing security@huggingface.co. -Note that you'll need to be invited to our program, so send us a quick email at security@huggingface.co if you've found a vulnerability. +Feel free to submit vulnerability reports to [security@huggingface.co](mailto:security@huggingface.co), where someone from the HF security team will review and recommend next steps. If reporting a vulnerability specific to open source, please note [Huntr](https://huntr.com) is a vulnerability disclosure program for open source software. diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile index 70c03c81370775..1f09626d8904f7 100644 --- a/docker/consistency.dockerfile +++ b/docker/consistency.dockerfile @@ -5,7 +5,7 @@ ARG REF=main RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs ENV UV_PYTHON=/usr/local/bin/python RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython -RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu +RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' 'torchvision' --index-url https://download.pytorch.org/whl/cpu # tensorflow pin matching setup.py RUN uv pip install --no-cache-dir pypi-kenlm RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16" @@ -13,4 +13,4 @@ RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transforme RUN git lfs install RUN pip uninstall -y transformers -RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean +RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean \ No newline at end of file diff --git a/docker/torch-light.dockerfile b/docker/torch-light.dockerfile index 524a68fd55407f..710a599abbe935 100644 --- a/docker/torch-light.dockerfile +++ b/docker/torch-light.dockerfile @@ -6,6 +6,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends libsndfile1-de ENV UV_PYTHON=/usr/local/bin/python RUN pip --no-cache-dir install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools RUN pip install --no-cache-dir 'torch' 'torchvision' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu -RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing]" +RUN uv pip install --no-deps timm accelerate --extra-index-url https://download.pytorch.org/whl/cpu +RUN uv pip install --no-cache-dir librosa "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[sklearn,sentencepiece,vision,testing,tiktoken]" RUN pip uninstall -y transformers \ No newline at end of file diff --git a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile index fc6f912235be10..d31e1cae553407 100644 --- a/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-amd-gpu/Dockerfile @@ -22,7 +22,7 @@ RUN apt update && \ apt clean && \ rm -rf /var/lib/apt/lists/* -RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic<2" +RUN python3 -m pip install --no-cache-dir --upgrade pip ninja "pydantic>=2.0.0" RUN python3 -m pip uninstall -y apex torch torchvision torchaudio RUN python3 -m pip install torch==$PYTORCH torchvision==$TORCH_VISION torchaudio==$TORCH_AUDIO --index-url https://download.pytorch.org/whl/rocm$ROCM --no-cache-dir diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile index 648aaa189d859e..eeaf728cab712a 100644 --- a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile +++ b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile @@ -49,5 +49,5 @@ RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed - RUN cd transformers && python3 setup.py develop # The base image ships with `pydantic==1.8.2` which is not working - i.e. the next command fails -RUN python3 -m pip install -U --no-cache-dir "pydantic<2" +RUN python3 -m pip install -U --no-cache-dir "pydantic>=2.0.0" RUN python3 -c "from deepspeed.launcher.runner import main" diff --git a/docs/source/ar/_config.py b/docs/source/ar/_config.py new file mode 100644 index 00000000000000..f49e4e4731965a --- /dev/null +++ b/docs/source/ar/_config.py @@ -0,0 +1,14 @@ +# docstyle-ignore +INSTALL_CONTENT = """ +# Transformers installation +! pip install transformers datasets evaluate accelerate +# To install from source instead of the last release, comment the command above and uncomment the following one. +# ! pip install git+https://github.com/huggingface/transformers.git +""" + +notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] +black_avoid_patterns = { + "{processor_class}": "FakeProcessorClass", + "{model_class}": "FakeModelClass", + "{object_class}": "FakeObjectClass", +} diff --git a/docs/source/ar/_toctree.yml b/docs/source/ar/_toctree.yml new file mode 100644 index 00000000000000..39e0ae14e19c29 --- /dev/null +++ b/docs/source/ar/_toctree.yml @@ -0,0 +1,892 @@ +- sections: + - local: index + title: 🤗 المحولات + - local: quicktour + title: جولة سريعة + - local: installation + title: التثبيت + title: البدء +- sections: + - local: pipeline_tutorial + title: تشغيل الاستنتاج باستخدام خطوط الأنابيب + - local: autoclass_tutorial + title: كتابة تعليمات برمجية متكيفه باستخدام AutoClass + - local: preprocessing + title: معالجة البيانات مسبقًا + - local: training + title: ضبط نموذج مسبق التدريب + - local: run_scripts + title: التدريب باستخدام نص برمجي + - local: accelerate + title: إعداد تدريب موزع باستخدام 🤗 Accelerate + - local: peft + title: تحميل النماذج المخصصة وتدريبها باستخدام 🤗 PEFT + - local: model_sharing + title: مشاركة نموذجك + - local: agents + title: الوكلاء + - local: llm_tutorial + title: التوليد باستخدام LLMs + - local: conversations + title: الدردشة مع المحولات + title: البرامج التعليمية +# - sections: +# - isExpanded: false +# sections: +# - local: tasks/sequence_classification +# title: تصنيف النصوص +# - local: tasks/token_classification +# title: تصنيف الرموز +# - local: tasks/question_answering +# title: الإجابة على الأسئلة +# - local: tasks/language_modeling +# title: نمذجة اللغة السببية +# - local: tasks/masked_language_modeling +# title: نمذجة اللغة المقنعة +# - local: tasks/translation +# title: الترجمة +# - local: tasks/summarization +# title: التلخيص +# - local: tasks/multiple_choice +# title: الاختيار المتعدد +# title: معالجة اللغات الطبيعية +# - isExpanded: false +# sections: +# - local: tasks/audio_classification +# title: تصنيف الصوت +# - local: tasks/asr +# title: التعرف التلقائي على الكلام +# title: الصوت +# - isExpanded: false +# sections: +# - local: tasks/image_classification +# title: تصنيف الصور +# - local: tasks/semantic_segmentation +# title: تجزئة الصور +# - local: tasks/video_classification +# title: تصنيف الفيديو +# - local: tasks/object_detection +# title: اكتشاف الأشياء +# - local: tasks/zero_shot_object_detection +# title: اكتشاف الأشياء بدون تدريب +# - local: tasks/zero_shot_image_classification +# title: تصنيف الصور بدون تدريب +# - local: tasks/monocular_depth_estimation +# title: تقدير العمق +# - local: tasks/image_to_image +# title: صورة إلى صورة +# - local: tasks/image_feature_extraction +# title: استخراج ميزات الصورة +# - local: tasks/mask_generation +# title: توليد القناع +# - local: tasks/knowledge_distillation_for_image_classification +# title: التقليل المعرفي للرؤية الحاسوبية +# title: الرؤية الحاسوبية +# - isExpanded: false +# sections: +# - local: tasks/image_captioning +# title: وصف الصور Image captioning +# - local: tasks/document_question_answering +# title: الإجابة على أسئلة المستندات +# - local: tasks/visual_question_answering +# title: الإجابة على الأسئلة المرئية +# - local: tasks/text-to-speech +# title: تحويل النص إلى كلام +# title: المتعددة الوسائط +# - isExpanded: false +# sections: +# - local: generation_strategies +# title: تخصيص استراتيجية التوليد +# - local: kv_cache +# title: أفضل الممارسات للتوليد باستخدام ذاكرة التخزين المؤقت +# title: التوليد +# - isExpanded: false +# sections: +# - local: tasks/idefics +# title: مهام الصور مع IDEFICS +# - local: tasks/prompting +# title: دليل إرشادي لمحفزات النماذج اللغوية الكبيرة +# title: الإرشاد +# title: أدلة المهام +# - sections: +# - local: fast_tokenizers +# title: استخدم برامج التجزئة السريعة من 🤗 Tokenizers +# - local: multilingual +# title: تشغيل الاستنتاج باستخدام نماذج متعددة اللغات +# - local: create_a_model +# title: استخدام واجهات برمجة التطبيقات الخاصة بالنموذج +# - local: custom_models +# title: مشاركة نموذج مخصص +# - local: chat_templating +# title: قوالب لنماذج الدردشة +# - local: trainer +# title: المدرب +# - local: sagemaker +# title: تشغيل التدريب على Amazon SageMaker +# - local: serialization +# title: التصدير إلى ONNX +# - local: tflite +# title: التصدير إلى TFLite +# - local: torchscript +# title: التصدير إلى TorchScript +# - local: benchmarks +# title: المعايير +# - local: notebooks +# title: دفاتر الملاحظات مع الأمثلة +# - local: community +# title: موارد المجتمع +# - local: troubleshooting +# title: استكشاف الأخطاء وإصلاحها +# - local: gguf +# title: التوافق مع ملفات GGUF +# title: أدلة المطورين +# - sections: +# - local: quantization/overview +# title: نظرة عامة +# - local: quantization/bitsandbytes +# title: bitsandbytes +# - local: quantization/gptq +# title: GPTQ +# - local: quantization/awq +# title: AWQ +# - local: quantization/aqlm +# title: AQLM +# - local: quantization/quanto +# title: Quanto +# - local: quantization/eetq +# title: EETQ +# - local: quantization/hqq +# title: HQQ +# - local: quantization/optimum +# title: Optimum +# - local: quantization/contribute +# title: المساهمة بطريقة جديدة للتكميم +# title: أساليب التكميم +# - sections: +# - local: performance +# title: الأداء-نظرة عامة +# - local: llm_optims +# title: تحسين الاستدلال LLM +# - sections: +# - local: perf_train_gpu_one +# title: استخدام عدة وحدات معالجة رسوميات (GPUs) بشكل متوازٍ +# - local: perf_train_gpu_many +# title: وحدات معالجة الرسومات (GPU) متعددة والتوازي +# - local: fsdp +# title: Fully Sharded Data Parallel +# - local: deepspeed +# title: DeepSpeed +# - local: perf_train_cpu +# title: التدريب الفعال على وحدة المعالجة المركزية (CPU) +# - local: perf_train_cpu_many +# title: التدريب الموزع لوحدة المعالجة المركزية (CPU) +# - local: perf_train_tpu_tf +# title: التدريب على (TPU) باستخدام TensorFlow +# - local: perf_train_special +# title: تدريب PyTorch على Apple silicon +# - local: perf_hardware +# title: الأجهزة المخصصة للتدريب +# - local: hpo_train +# title: البحث عن المعاملات المثلى باستخدام واجهة برمجة تطبيقات المدرب +# title: تقنيات التدريب الفعال +# - sections: +# - local: perf_infer_cpu +# title: الإستدلال على وحدة المعالجة المركزية (CPU) +# - local: perf_infer_gpu_one +# title: الإستدلال على وحدة معالجة الرسومات (GPU) +# title: تحسين الاستدلال +# - local: big_models +# title: إنشاء نموذج كبير +# - local: debugging +# title: تصحيح الأخطاء البرمجية +# - local: tf_xla +# title: تكامل XLA لنماذج TensorFlow +# - local: perf_torch_compile +# title: تحسين الاستدلال باستخدام `torch.compile()` +# title: الأداء وقابلية التوسع +# - sections: +# - local: contributing +# title: كيفية المساهمة في 🤗 المحولات؟ +# - local: add_new_model +# title: كيفية إضافة نموذج إلى 🤗 المحولات؟ +# - local: add_new_pipeline +# title: كيفية إضافة خط أنابيب إلى 🤗 المحولات؟ +# - local: testing +# title: الاختبار +# - local: pr_checks +# title: التحقق من طلب السحب +# title: المساهمة +- sections: + # - local: philosophy + # title: الفلسفة + - local: glossary + title: (قاموس المصطلحات (قائمة الكلمات + # - local: task_summary + # title: ما الذي يمكن أن تفعله 🤗 المحولات + # - local: tasks_explained + # title: كيف تحل المحولات المهام + # - local: model_summary + # title: عائلة نماذج المحول + # - local: tokenizer_summary + # title: ملخص برنامج مقسم النصوص (tokenizers) + # - local: attention + # title: الانتباه Attention + # - local: pad_truncation + # title: الحشو والتقليم + # - local: bertology + # title: BERTology + # - local: perplexity + # title: حيرة النماذج ذات الطول الثابت + # - local: pipeline_webserver + # title: خطوط الأنابيب للاستدلال على خادم الويب + # - local: model_memory_anatomy + # title: تشريح تدريب النموذج + # - local: llm_tutorial_optimization + # title: الاستفادة القصوى من LLMs + title: أطر مفاهيمية +# - sections: +# - sections: +# - local: main_classes/agent +# title: الوكلاء والأدوات +# - local: model_doc/auto +# title: فئات يتم إنشاؤها ديناميكيًا +# - local: main_classes/backbones +# title: العمود الفقري +# - local: main_classes/callback +# title: عمليات الاسترجاع +# - local: main_classes/configuration +# title: التكوين +# - local: main_classes/data_collator +# title: مجمع البيانات +# - local: main_classes/keras_callbacks +# title: استدعاءات Keras +# - local: main_classes/logging +# title: التسجيل +# - local: main_classes/model +# title: النماذج +# - local: main_classes/text_generation +# title: توليد النصوص +# - local: main_classes/onnx +# title: ONNX +# - local: main_classes/optimizer_schedules +# title: التحسين +# - local: main_classes/output +# title: مخرجات النموذج +# - local: main_classes/pipelines +# title: خطوط الأنابيب +# - local: main_classes/processors +# title: المعالجات +# - local: main_classes/quantization +# title: التكميم +# - local: main_classes/tokenizer +# title: برنامج مقسم النصوص +# - local: main_classes/trainer +# title: المدرب +# - local: main_classes/deepspeed +# title: DeepSpeed +# - local: main_classes/feature_extractor +# title: مستخرج الميزات +# - local: main_classes/image_processor +# title: معالج الصور +# title: الفئات الرئيسية +# - sections: +# - isExpanded: false +# sections: +# - local: model_doc/albert +# title: ALBERT +# - local: model_doc/bart +# title: BART +# - local: model_doc/barthez +# title: BARThez +# - local: model_doc/bartpho +# title: BARTpho +# - local: model_doc/bert +# title: BERT +# - local: model_doc/bert-generation +# title: BertGeneration +# - local: model_doc/bert-japanese +# title: BertJapanese +# - local: model_doc/bertweet +# title: Bertweet +# - local: model_doc/big_bird +# title: BigBird +# - local: model_doc/bigbird_pegasus +# title: BigBirdPegasus +# - local: model_doc/biogpt +# title: BioGpt +# - local: model_doc/blenderbot +# title: Blenderbot +# - local: model_doc/blenderbot-small +# title: Blenderbot Small +# - local: model_doc/bloom +# title: BLOOM +# - local: model_doc/bort +# title: BORT +# - local: model_doc/byt5 +# title: ByT5 +# - local: model_doc/camembert +# title: CamemBERT +# - local: model_doc/canine +# title: CANINE +# - local: model_doc/codegen +# title: CodeGen +# - local: model_doc/code_llama +# title: CodeLlama +# - local: model_doc/cohere +# title: Cohere +# - local: model_doc/convbert +# title: ConvBERT +# - local: model_doc/cpm +# title: CPM +# - local: model_doc/cpmant +# title: CPMANT +# - local: model_doc/ctrl +# title: CTRL +# - local: model_doc/dbrx +# title: DBRX +# - local: model_doc/deberta +# title: DeBERTa +# - local: model_doc/deberta-v2 +# title: DeBERTa-v2 +# - local: model_doc/dialogpt +# title: DialoGPT +# - local: model_doc/distilbert +# title: DistilBERT +# - local: model_doc/dpr +# title: DPR +# - local: model_doc/electra +# title: ELECTRA +# - local: model_doc/encoder-decoder +# title: Encoder Decoder Models +# - local: model_doc/ernie +# title: ERNIE +# - local: model_doc/ernie_m +# title: ErnieM +# - local: model_doc/esm +# title: ESM +# - local: model_doc/falcon +# title: Falcon +# - local: model_doc/fastspeech2_conformer +# title: FastSpeech2Conformer +# - local: model_doc/flan-t5 +# title: FLAN-T5 +# - local: model_doc/flan-ul2 +# title: FLAN-UL2 +# - local: model_doc/flaubert +# title: FlauBERT +# - local: model_doc/fnet +# title: FNet +# - local: model_doc/fsmt +# title: FSMT +# - local: model_doc/funnel +# title: Funnel Transformer +# - local: model_doc/fuyu +# title: Fuyu +# - local: model_doc/gemma +# title: Gemma +# - local: model_doc/openai-gpt +# title: GPT +# - local: model_doc/gpt_neo +# title: GPT Neo +# - local: model_doc/gpt_neox +# title: GPT NeoX +# - local: model_doc/gpt_neox_japanese +# title: GPT NeoX Japanese +# - local: model_doc/gptj +# title: GPT-J +# - local: model_doc/gpt2 +# title: GPT2 +# - local: model_doc/gpt_bigcode +# title: GPTBigCode +# - local: model_doc/gptsan-japanese +# title: GPTSAN Japanese +# - local: model_doc/gpt-sw3 +# title: GPTSw3 +# - local: model_doc/herbert +# title: HerBERT +# - local: model_doc/ibert +# title: I-BERT +# - local: model_doc/jamba +# title: Jamba +# - local: model_doc/jetmoe +# title: JetMoe +# - local: model_doc/jukebox +# title: Jukebox +# - local: model_doc/led +# title: LED +# - local: model_doc/llama +# title: LLaMA +# - local: model_doc/llama2 +# title: Llama2 +# - local: model_doc/llama3 +# title: Llama3 +# - local: model_doc/longformer +# title: Longformer +# - local: model_doc/longt5 +# title: LongT5 +# - local: model_doc/luke +# title: LUKE +# - local: model_doc/m2m_100 +# title: M2M100 +# - local: model_doc/madlad-400 +# title: MADLAD-400 +# - local: model_doc/mamba +# title: Mamba +# - local: model_doc/marian +# title: MarianMT +# - local: model_doc/markuplm +# title: MarkupLM +# - local: model_doc/mbart +# title: MBart and MBart-50 +# - local: model_doc/mega +# title: MEGA +# - local: model_doc/megatron-bert +# title: MegatronBERT +# - local: model_doc/megatron_gpt2 +# title: MegatronGPT2 +# - local: model_doc/mistral +# title: Mistral +# - local: model_doc/mixtral +# title: Mixtral +# - local: model_doc/mluke +# title: mLUKE +# - local: model_doc/mobilebert +# title: MobileBERT +# - local: model_doc/mpnet +# title: MPNet +# - local: model_doc/mpt +# title: MPT +# - local: model_doc/mra +# title: MRA +# - local: model_doc/mt5 +# title: MT5 +# - local: model_doc/mvp +# title: MVP +# - local: model_doc/nezha +# title: NEZHA +# - local: model_doc/nllb +# title: NLLB +# - local: model_doc/nllb-moe +# title: NLLB-MoE +# - local: model_doc/nystromformer +# title: Nyströmformer +# - local: model_doc/olmo +# title: OLMo +# - local: model_doc/open-llama +# title: Open-Llama +# - local: model_doc/opt +# title: OPT +# - local: model_doc/pegasus +# title: Pegasus +# - local: model_doc/pegasus_x +# title: PEGASUS-X +# - local: model_doc/persimmon +# title: Persimmon +# - local: model_doc/phi +# title: Phi +# - local: model_doc/phi3 +# title: Phi-3 +# - local: model_doc/phobert +# title: PhoBERT +# - local: model_doc/plbart +# title: PLBart +# - local: model_doc/prophetnet +# title: ProphetNet +# - local: model_doc/qdqbert +# title: QDQBert +# - local: model_doc/qwen2 +# title: Qwen2 +# - local: model_doc/qwen2_moe +# title: Qwen2MoE +# - local: model_doc/rag +# title: RAG +# - local: model_doc/realm +# title: REALM +# - local: model_doc/recurrent_gemma +# title: RecurrentGemma +# - local: model_doc/reformer +# title: Reformer +# - local: model_doc/rembert +# title: RemBERT +# - local: model_doc/retribert +# title: RetriBERT +# - local: model_doc/roberta +# title: RoBERTa +# - local: model_doc/roberta-prelayernorm +# title: RoBERTa-PreLayerNorm +# - local: model_doc/roc_bert +# title: RoCBert +# - local: model_doc/roformer +# title: RoFormer +# - local: model_doc/rwkv +# title: RWKV +# - local: model_doc/splinter +# title: Splinter +# - local: model_doc/squeezebert +# title: SqueezeBERT +# - local: model_doc/stablelm +# title: StableLm +# - local: model_doc/starcoder2 +# title: Starcoder2 +# - local: model_doc/switch_transformers +# title: SwitchTransformers +# - local: model_doc/t5 +# title: T5 +# - local: model_doc/t5v1.1 +# title: T5v1.1 +# - local: model_doc/tapex +# title: TAPEX +# - local: model_doc/transfo-xl +# title: Transformer XL +# - local: model_doc/ul2 +# title: UL2 +# - local: model_doc/umt5 +# title: UMT5 +# - local: model_doc/xmod +# title: X-MOD +# - local: model_doc/xglm +# title: XGLM +# - local: model_doc/xlm +# title: XLM +# - local: model_doc/xlm-prophetnet +# title: XLM-ProphetNet +# - local: model_doc/xlm-roberta +# title: XLM-RoBERTa +# - local: model_doc/xlm-roberta-xl +# title: XLM-RoBERTa-XL +# - local: model_doc/xlm-v +# title: XLM-V +# - local: model_doc/xlnet +# title: XLNet +# - local: model_doc/yoso +# title: YOSO +# title: Text models +# - isExpanded: false +# sections: +# - local: model_doc/beit +# title: BEiT +# - local: model_doc/bit +# title: BiT +# - local: model_doc/conditional_detr +# title: Conditional DETR +# - local: model_doc/convnext +# title: ConvNeXT +# - local: model_doc/convnextv2 +# title: ConvNeXTV2 +# - local: model_doc/cvt +# title: CVT +# - local: model_doc/deformable_detr +# title: Deformable DETR +# - local: model_doc/deit +# title: DeiT +# - local: model_doc/depth_anything +# title: Depth Anything +# - local: model_doc/deta +# title: DETA +# - local: model_doc/detr +# title: DETR +# - local: model_doc/dinat +# title: DiNAT +# - local: model_doc/dinov2 +# title: DINOV2 +# - local: model_doc/dit +# title: DiT +# - local: model_doc/dpt +# title: DPT +# - local: model_doc/efficientformer +# title: EfficientFormer +# - local: model_doc/efficientnet +# title: EfficientNet +# - local: model_doc/focalnet +# title: FocalNet +# - local: model_doc/glpn +# title: GLPN +# - local: model_doc/imagegpt +# title: ImageGPT +# - local: model_doc/levit +# title: LeViT +# - local: model_doc/mask2former +# title: Mask2Former +# - local: model_doc/maskformer +# title: MaskFormer +# - local: model_doc/mobilenet_v1 +# title: MobileNetV1 +# - local: model_doc/mobilenet_v2 +# title: MobileNetV2 +# - local: model_doc/mobilevit +# title: MobileViT +# - local: model_doc/mobilevitv2 +# title: MobileViTV2 +# - local: model_doc/nat +# title: NAT +# - local: model_doc/poolformer +# title: PoolFormer +# - local: model_doc/pvt +# title: Pyramid Vision Transformer (PVT) +# - local: model_doc/pvt_v2 +# title: Pyramid Vision Transformer v2 (PVTv2) +# - local: model_doc/regnet +# title: RegNet +# - local: model_doc/resnet +# title: ResNet +# - local: model_doc/segformer +# title: SegFormer +# - local: model_doc/seggpt +# title: SegGpt +# - local: model_doc/superpoint +# title: SuperPoint +# - local: model_doc/swiftformer +# title: SwiftFormer +# - local: model_doc/swin +# title: Swin Transformer +# - local: model_doc/swinv2 +# title: Swin Transformer V2 +# - local: model_doc/swin2sr +# title: Swin2SR +# - local: model_doc/table-transformer +# title: Table Transformer +# - local: model_doc/upernet +# title: UperNet +# - local: model_doc/van +# title: VAN +# - local: model_doc/vit +# title: Vision Transformer (ViT) +# - local: model_doc/vit_hybrid +# title: ViT Hybrid +# - local: model_doc/vitdet +# title: ViTDet +# - local: model_doc/vit_mae +# title: ViTMAE +# - local: model_doc/vitmatte +# title: ViTMatte +# - local: model_doc/vit_msn +# title: ViTMSN +# - local: model_doc/yolos +# title: YOLOS +# title: Vision models +# - isExpanded: false +# sections: +# - local: model_doc/audio-spectrogram-transformer +# title: Audio Spectrogram Transformer +# - local: model_doc/bark +# title: Bark +# - local: model_doc/clap +# title: CLAP +# - local: model_doc/encodec +# title: EnCodec +# - local: model_doc/hubert +# title: Hubert +# - local: model_doc/mctct +# title: MCTCT +# - local: model_doc/mms +# title: MMS +# - local: model_doc/musicgen +# title: MusicGen +# - local: model_doc/musicgen_melody +# title: MusicGen Melody +# - local: model_doc/pop2piano +# title: Pop2Piano +# - local: model_doc/seamless_m4t +# title: Seamless-M4T +# - local: model_doc/seamless_m4t_v2 +# title: SeamlessM4T-v2 +# - local: model_doc/sew +# title: SEW +# - local: model_doc/sew-d +# title: SEW-D +# - local: model_doc/speech_to_text +# title: Speech2Text +# - local: model_doc/speech_to_text_2 +# title: Speech2Text2 +# - local: model_doc/speecht5 +# title: SpeechT5 +# - local: model_doc/unispeech +# title: UniSpeech +# - local: model_doc/unispeech-sat +# title: UniSpeech-SAT +# - local: model_doc/univnet +# title: UnivNet +# - local: model_doc/vits +# title: VITS +# - local: model_doc/wav2vec2 +# title: Wav2Vec2 +# - local: model_doc/wav2vec2-bert +# title: Wav2Vec2-BERT +# - local: model_doc/wav2vec2-conformer +# title: Wav2Vec2-Conformer +# - local: model_doc/wav2vec2_phoneme +# title: Wav2Vec2Phoneme +# - local: model_doc/wavlm +# title: WavLM +# - local: model_doc/whisper +# title: Whisper +# - local: model_doc/xls_r +# title: XLS-R +# - local: model_doc/xlsr_wav2vec2 +# title: XLSR-Wav2Vec2 +# title: Audio models +# - isExpanded: false +# sections: +# - local: model_doc/timesformer +# title: TimeSformer +# - local: model_doc/videomae +# title: VideoMAE +# - local: model_doc/vivit +# title: ViViT +# title: Video models +# - isExpanded: false +# sections: +# - local: model_doc/align +# title: ALIGN +# - local: model_doc/altclip +# title: AltCLIP +# - local: model_doc/blip +# title: BLIP +# - local: model_doc/blip-2 +# title: BLIP-2 +# - local: model_doc/bridgetower +# title: BridgeTower +# - local: model_doc/bros +# title: BROS +# - local: model_doc/chinese_clip +# title: Chinese-CLIP +# - local: model_doc/clip +# title: CLIP +# - local: model_doc/clipseg +# title: CLIPSeg +# - local: model_doc/clvp +# title: CLVP +# - local: model_doc/data2vec +# title: Data2Vec +# - local: model_doc/deplot +# title: DePlot +# - local: model_doc/donut +# title: Donut +# - local: model_doc/flava +# title: FLAVA +# - local: model_doc/git +# title: GIT +# - local: model_doc/grounding-dino +# title: Grounding DINO +# - local: model_doc/groupvit +# title: GroupViT +# - local: model_doc/idefics +# title: IDEFICS +# - local: model_doc/idefics2 +# title: Idefics2 +# - local: model_doc/instructblip +# title: InstructBLIP +# - local: model_doc/kosmos-2 +# title: KOSMOS-2 +# - local: model_doc/layoutlm +# title: LayoutLM +# - local: model_doc/layoutlmv2 +# title: LayoutLMV2 +# - local: model_doc/layoutlmv3 +# title: LayoutLMV3 +# - local: model_doc/layoutxlm +# title: LayoutXLM +# - local: model_doc/lilt +# title: LiLT +# - local: model_doc/llava +# title: Llava +# - local: model_doc/llava_next +# title: LLaVA-NeXT +# - local: model_doc/lxmert +# title: LXMERT +# - local: model_doc/matcha +# title: MatCha +# - local: model_doc/mgp-str +# title: MGP-STR +# - local: model_doc/nougat +# title: Nougat +# - local: model_doc/oneformer +# title: OneFormer +# - local: model_doc/owlvit +# title: OWL-ViT +# - local: model_doc/owlv2 +# title: OWLv2 +# - local: model_doc/paligemma +# title: PaliGemma +# - local: model_doc/perceiver +# title: Perceiver +# - local: model_doc/pix2struct +# title: Pix2Struct +# - local: model_doc/sam +# title: Segment Anything +# - local: model_doc/siglip +# title: SigLIP +# - local: model_doc/speech-encoder-decoder +# title: Speech Encoder Decoder Models +# - local: model_doc/tapas +# title: TAPAS +# - local: model_doc/trocr +# title: TrOCR +# - local: model_doc/tvlt +# title: TVLT +# - local: model_doc/tvp +# title: TVP +# - local: model_doc/udop +# title: UDOP +# - local: model_doc/video_llava +# title: VideoLlava +# - local: model_doc/vilt +# title: ViLT +# - local: model_doc/vipllava +# title: VipLlava +# - local: model_doc/vision-encoder-decoder +# title: Vision Encoder Decoder Models +# - local: model_doc/vision-text-dual-encoder +# title: Vision Text Dual Encoder +# - local: model_doc/visual_bert +# title: VisualBERT +# - local: model_doc/xclip +# title: X-CLIP +# title: Multimodal models +# - isExpanded: false +# sections: +# - local: model_doc/decision_transformer +# title: محول القرار +# - local: model_doc/trajectory_transformer +# title: محول المسار +# title: نماذج التعلم التعزيزية +# - isExpanded: false +# sections: +# - local: model_doc/autoformer +# title: Autoformer +# - local: model_doc/informer +# title: Informer +# - local: model_doc/patchtsmixer +# title: PatchTSMixer +# - local: model_doc/patchtst +# title: PatchTST +# - local: model_doc/time_series_transformer +# title: محول السلاسل الزمنية +# title: نماذج السلاسل الزمنية +# - isExpanded: false +# sections: +# - local: model_doc/graphormer +# title: Graphormer +# title: نماذج الرسم البياني +# title: النماذج +# - sections: +# - local: internal/modeling_utils +# title: الطبقات المخصصة والمرافق +# - local: internal/pipelines_utils +# title: مرافق خطوط الأنابيب +# - local: internal/tokenization_utils +# title: مرافق مقسم النصوص +# - local: internal/trainer_utils +# title: مرافق المدرب +# - local: internal/generation_utils +# title: مرافق التوليد +# - local: internal/image_processing_utils +# title: مرافق معالجة الصور +# - local: internal/audio_utils +# title: مرافق معالجة الصوت +# - local: internal/file_utils +# title: مرافق عامة +# - local: internal/time_series_utils +# title: مرافق السلاسل الزمنية +# title: مساعدون داخليون +# title: API diff --git a/docs/source/ar/accelerate.md b/docs/source/ar/accelerate.md new file mode 100644 index 00000000000000..486c1efe59af60 --- /dev/null +++ b/docs/source/ar/accelerate.md @@ -0,0 +1,120 @@ +# التدريب الموزع باستخدام 🤗 Accelerate + + +مع تزايد حجم النماذج اللغوية، برز التوازي كأحد الاستراتيجيات لتدريب نماذج أكبر على أجهزة محدودة وتسريع عملية التدريب بمقدار كبير. أنشأنا في Hugging Face، قمنا بإنشاء مكتبة [ Accelerate](https://huggingface.co/docs/accelerate) لمساعدة المستخدمين على تدريب أي نموذج من Transformers بسهولة على أي نوع من الإعدادات الموزعة، سواء كان ذلك على عدة وحدات معالجة رسومات (GPUs) على جهاز واحد أو على عدة وحدات معالجة رسومات موزعة على عدة أجهزة. في هذا الدليل، تعلم كيفية تخصيص حلقة تدريب PyTorch الأصلية لتمكين التدريب في بيئة موزعة. + +## الإعداد + +ابدأ بتثبيت 🤗 Accelerate: + +```bash +pip install accelerate +``` + +ثم قم باستيراد وإنشاء كائن [`~accelerate.Accelerator`]. سيقوم [`~accelerate.Accelerator`] تلقائيًا باكتشاف نوع الإعداد الموزع الخاص بك وتهيئة جميع المكونات اللازمة للتدريب. لن تحتاج إلى وضع نموذجك على جهاز بشكل معين. + +```py +>>> from accelerate import Accelerator + +>>> accelerator = Accelerator() +``` + +## الاستعداد للتسريع + +الخطوة التالية هي تمرير جميع كائنات التدريب ذات الصلة إلى دالة الإعداد [`~accelerate.Accelerator.prepare`]. ويشمل ذلك DataLoaders للتدريب والتقييم، ونموذجًا ومُحَسِّنً المعاملات (optimizer): + +```py +>>> train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( +... train_dataloader, eval_dataloader, model, optimizer +... ) +``` + +## الخلفي Backward + +الإضافة الأخيرة هي استبدال الدالة المعتادة `loss.backward()` في حلقة التدريب الخاصة بك بدالة [`~accelerate.Accelerator.backward`] في 🤗 Accelerate: + +```py +>>> for epoch in range(num_epochs): +... for batch in train_dataloader: +... outputs = model(**batch) +... loss = outputs.loss +... accelerator.backward(loss) + +... optimizer.step() +... lr_scheduler.step() +... optimizer.zero_grad() +... progress_bar.update(1) +``` + +كما يمكنك أن ترى في الكود التالي، فأنت بحاجة فقط إلى إضافة أربعة أسطر من الكود إلى حلقة التدريب الخاصة بك لتمكين التدريب الموزع! + +```diff ++ from accelerate import Accelerator + from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler + ++ accelerator = Accelerator() + + model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2) + optimizer = AdamW(model.parameters(), lr=3e-5) + +- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") +- model.to(device) + ++ train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( ++ train_dataloader, eval_dataloader, model, optimizer ++ ) + + num_epochs = 3 + num_training_steps = num_epochs * len(train_dataloader) + lr_scheduler = get_scheduler( + "linear", + optimizer=optimizer, + num_warmup_steps=0, + num_training_steps=num_training_steps + ) + + progress_bar = tqdm(range(num_training_steps)) + + model.train() + for epoch in range(num_epochs): + for batch in train_dataloader: +- batch = {k: v.to(device) for k, v in batch.items()} + outputs = model(**batch) + loss = outputs.loss +- loss.backward() ++ accelerator.backward(loss) +optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + progress_bar.update(1) +``` + +## تدريب + +بمجرد إضافة أسطر الكود ذات الصلة، قم بتشغيل التدريب الخاص بك في أحد النصوص أو الدفاتر مثل Colaboratory. + +### التدريب باستخدام نص برمجي + +إذا كنت تشغل التدريب الخاص بك من نص برمجي، فقم بتشغيل الأمر التالي لإنشاء وحفظ ملف تكوين: + +```bash +accelerate config +``` + +ثم قم بتشغيل التدريب الخاص بك باستخدام: + +```bash +accelerate launch train.py +``` + +### التدريب باستخدام دفتر ملاحظات + +يمكن أيضًا تشغيل 🤗 Accelerate في دفاتر إذا كنت تخطط لاستخدام وحدات معالجة الرسوميات (TPUs) في Colaboratory. قم بتغليف كل الكود المسؤول عن التدريب في دالة، ومررها إلى [`~accelerate.notebook_launcher`]: + +```py +>>> from accelerate import notebook_launcher + +>>> notebook_launcher(training_function) +``` + +للحصول على مزيد من المعلومات حول 🤗 Accelerate وميزاته الغنية، يرجى الرجوع إلى [الوثائق](https://huggingface.co/docs/accelerate). \ No newline at end of file diff --git a/docs/source/ar/agents.md b/docs/source/ar/agents.md new file mode 100644 index 00000000000000..92b2a4715f6f07 --- /dev/null +++ b/docs/source/ar/agents.md @@ -0,0 +1,539 @@ +# الوكلاء والأدوات + +[[open-in-colab]] + +### ما هو الوكيل؟ + +يمكن للنظم اللغوية الكبيرة (LLMs) التي تم تدريبها على أداء [نمذجة اللغة السببية](./tasks/language_modeling.) التعامل مع مجموعة واسعة من المهام، ولكنها غالبًا ما تواجه صعوبات في المهام الأساسية مثل المنطق والحساب والبحث. وعندما يتم استدعاؤها في مجالات لا تؤدي فيها أداءً جيدًا، فإنها غالبًا ما تفشل في توليد الإجابة التي نتوقعها منها. + +يتمثل أحد النهج للتغلب على هذا القصور في إنشاء "وكيل". + +الوكيل هو نظام يستخدم LLM كمحرك له، ولديه حق الوصول إلى وظائف تسمى "أدوات". + +هذه "الأدوات" هي وظائف لأداء مهمة، وتحتوي على جميع الأوصاف اللازمة للوكيل لاستخدامها بشكل صحيح. + +يمكن برمجة الوكيل للقيام بما يلي: +- وضع سلسلة من الإجراءات/الأدوات وتشغيلها جميعًا في نفس الوقت مثل [`CodeAgent`] على سبيل المثال +- التخطيط للاجراءات/الأدوات وتنفيذها واحدة تلو الأخرى والانتظار حتى انتهاء كل إجراء قبل إطلاق التالي مثل [`ReactJsonAgent`] على سبيل المثال + +### أنواع الوكلاء + +#### الوكيل البرمجي (Code agent) + +يتمتع هذا الوكيل يتبع خطوات محددة: أولًا، يخطط لسلسلة من الإجراءات التي يريد تنفيذها، ثم شفرة Python لتنفيذ جميع الإجراءات في نفس الوقت. وهو يتعامل بشكل أصلي مع أنواع مختلفة من المدخلات والمخرجات للأدوات التي يستخدمها، وبالتالي فهو الخيار الموصى به للمهام متعددة الوسائط. + +#### وكلاء التفاعل + +هذا هو الوكيل الذي يتم اللجوء إليه لحل مهام الاستدلال، حيث يجعل إطار ReAct ([Yao et al.، 2022](https://huggingface.co/papers/2210.03629)) من الكفاءة حقًا التفكير على أساس ملاحظاته السابقة. + +نقوم بتنفيذ إصدارين من ReactJsonAgent: +- [`ReactJsonAgent`] يقوم بتوليد استدعاءات الأدوات كـ JSON في إخراجها. +- [`ReactCodeAgent`] هو نوع جديد من ReactJsonAgent يقوم بتوليد استدعاءات أدواته كمقاطع من التعليمات البرمجية، والتي تعمل بشكل جيد حقًا مع LLMs التي تتمتع بأداء قوي في البرمجة. + +> [!TIP] +> اقرأ منشور المدونة [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) لمعرفة المزيد عن وكيل ReAct. + +![إطار عمل وكيل ReAct](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png) + +على سبيل المثال، إليك كيف يعمل وكيل ReAct Code طريقه من خلال السؤال التالي. + +```py3 +>>> agent.run( +... "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?", +... ) +=====New task===== +How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need? +====Agent is executing the code below: +bert_blocks = search(query="number of blocks in BERT base encoder") +print("BERT blocks:", bert_blocks) +==== +Print outputs: +BERT blocks: twelve encoder blocks + +====Agent is executing the code below: +attention_layer = search(query="number of layers in Attention is All You Need") +print("Attention layers:", attention_layer) +==== +Print outputs: +Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture. + +====Agent is executing the code below: +bert_blocks = 12 +attention_layers = 6 +diff = bert_blocks - attention_layers +print("Difference in blocks:", diff) +final_answer(diff) +==== + +Print outputs: +Difference in blocks: 6 + +Final answer: 6 +``` + +### كيف يمكنني بناء وكيل؟ + +لتهيئة وكيل، تحتاج إلى هذه الوسائط: + +- نموذج لغوي كبير (LLM) يشكل المحرك الأساسي للوكيل. الوكيل نفسه ليس النموذج اللغوي، بل هو برنامج يستخدم النموذج اللغوي كمحرك له. +- موجه النظام (system prompt): هذه هي التعليمات التي يتم إعطاؤها للنموذج اللغوي لإنشاء مخرجاته. +- صندوق أدوات (toolbox) يختار الوكيل منه الأدوات لتنفيذها +- محلل (parser) لاستخراج الأدوات التي يجب استدعاؤها من مخرجات النموذج اللغوي LLM والأدوات التي يجب استخدامها + +عند تهيئة نظام الوكيل، يتم استخدام سمات الأداة لإنشاء وصف للأداة، ثم يتم دمجها في موجه النظام الخاص `system_prompt` للوكيل لإعلامه بالأدوات التي يمكنه استخدامها ولماذا. + +للبدء، يرجى تثبيت `agents` الإضافية لتثبيت جميع التبعيات الافتراضية. + +```bash +pip install transformers[agents] +``` + +قم ببناء محرك LLM الخاص بك من خلال تعريف طريقة `llm_engine` التي تقبل قائمة من [الرسائل](./chat_templating.) وتعيد النص. يجب أن تقبل هذه الدالة القابلة للاستدعاء أيضًا معامل `stop` يشير إلى متى يجب التوقف عن التوليد. + +```python +from huggingface_hub import login, InferenceClient + +login("") + +client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct") + +def llm_engine(messages, stop_sequences=["Task"]) -> str: + response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) + answer = response.choices[0].message.content + return answer +``` + +يمكنك استخدام أي طريقة `llm_engine` طالما أنها: +1. يتبع تنسيق [رسائل](./chat_templating.md) لإدخاله (`List [Dict [str، str]]`) ويعيد `str` +2. يتوقف عن توليد المخراجات من التسلسلات التي تم تمريرها في معامل `stop` + +أنت بحاجة أيضًا إلى معامل "الأدوات" الذي يقبل قائمة من "الأدوات". يمكنك توفير قائمة فارغة لـ "الأدوات"، ولكن استخدم صندوق الأدوات الافتراضي مع معامل اختياري `add_base_tools=True`. + +الآن يمكنك إنشاء وكيل، مثل [`CodeAgent`], وتشغيله. ولتسهيل الأمر، نقدم أيضًا فئة [`HfEngine`] التي تستخدم `huggingface_hub.InferenceClient` بشكل مخفى. + +```python +from transformers import CodeAgent, HfEngine + +llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct") +agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) + +agent.run( + "Could you translate this sentence from French, say it out loud and return the audio.", + sentence="Où est la boulangerie la plus proche?", +) +``` + +هذه الميزة ستكون مفيدة في حالة الحاجة الملحة! يمكنك حتى ترك معامل `llm_engine` غير محدد، وسيتم إنشاء [`HfEngine`] بشكل تلقائي. + +```python +from transformers import CodeAgent + +agent = CodeAgent(tools=[], add_base_tools=True) + +agent.run( + "Could you translate this sentence from French, say it out loud and give me the audio.", + sentence="Où est la boulangerie la plus proche?", +) +``` + +لاحظ أننا استخدمنا معامل "sentence" إضافي: يمكنك تمرير النص كمعامل إضافي إلى النموذج. + +يمكنك أيضًا استخدام هذا للإشارة إلى مسار الملفات المحلية أو البعيدة للنموذج لاستخدامها: + +```py +from transformers import ReactCodeAgent + +agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) + +agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3") +``` + + +تم تحديد موجه النظام ومحلل المخرجات تلقائيًا، ولكن يمكنك فحصهما بسهولة عن طريق استدعاء `system_prompt_template` على وكيلك. + +```python +print(agent.system_prompt_template) +``` + +من المهم أن تشرح بأكبر قدر ممكن من الوضوح المهمة التي تريد تنفيذها. +كل عملية [`~Agent.run`] مستقلة، وبما أن الوكيل مدعوم من LLM، فقد تؤدي الاختلافات الطفيفة في موجهك إلى نتائج مختلفة تمامًا. +يمكنك أيضًا تشغيل وكيل بشكل متتالي لمهام مختلفة: في كل مرة يتم فيها إعادة تهيئة سمتي `agent.task` و`agent.logs`. + + +#### تنفيذ التعليمات البرمجية + +يقوم مفسر Python بتنفيذ التعليمات البرمجية على مجموعة من المدخلات التي يتم تمريرها جنبًا إلى جنب مع أدواتك. +يجب أن يكون هذا الأمر آمنًا لأن الوظائف الوحيدة التي يمكن استدعاؤها هي الأدوات التي قدمتها (خاصة إذا كانت أدوات من Hugging Face فقط) ووظيفة الطباعة، لذا فأنت مقيد بالفعل بما يمكن تنفيذه. + +مفسر Python لا يسمح أيضًا باستدعاء دوال بشكل افتراضي خارج قائمة آمنة، لذا فإن جميع الهجمات الأكثر وضوحًا لا ينبغي أن تكون مشكلة. +يمكنك أيضًا الإذن باستيرادات إضافية عن طريق تمرير الوحدات النمطية المصرح بها كقائمة من السلاسل في معامل `additional_authorized_imports` عند تهيئة [`ReactCodeAgent`] أو [`CodeAgent`]: + +```py +>>> from transformers import ReactCodeAgent + +>>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4']) +>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") + +(...) +'Hugging Face – Blog' +``` + +سيتم إيقاف التنفيذ عند أي رمز يحاول تنفيذ عملية غير قانونية أو إذا كان هناك خطأ Python عادي في التعليمات البرمجية التي تم إنشاؤها بواسطة الوكيل. + +> [!WARNING] +> يمكن لـ LLM توليد شفرة برمجية عشوائية سيتم تنفيذها بعد ذلك: لا تقمب استدعاء أى دوال غير آمنة! + +### موجه النظام + +ينشئ الوكيل، أو بالأحرى LLM الذي يقود الوكيل، يولد مخرجات بناءً على موجه النظام. يمكن تخصيص موجه النظام وتصميمه للمهام المقصودة. على سبيل المثال، تحقق من موجه النظام لـ [`ReactCodeAgent`] (الإصدار أدناه مبسط قليلاً). + +```text +You will be given a task to solve as best you can. +You have access to the following tools: +<> + +To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences. + +At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use. +Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence. +During each intermediate step, you can use 'print()' to save whatever important information you will then need. +These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step. + +In the end you have to return a final answer using the `final_answer` tool. + +Here are a few examples using notional tools: +--- +{examples} + +Above example were using notional tools that might not exist for you. You only have acces to those tools: +<> +You also can perform computations in the python code you generate. + +Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```' sequence. You MUST provide at least the 'Code:' sequence to move forward. + +Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks. +Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result. + +Remember to make sure that variables you use are all defined. + +Now Begin! +``` + +يتضمن موجه النظام: +- *مقدمة* تشرح كيف يجب أن يتصرف الوكيل والأدوات التي يجب عليه استخدامها. +- وصف لجميع الأدوات التي يتم تحديدها بواسطة رمز `<>` الذي يتم استبداله ديناميكيًا في وقت التشغيل بالأدوات التي يحددها المستخدم أو يختارها. + - يأتي وصف الأداة من سمات الأداة، `name`، و`description`، و`inputs` و`output_type`، وقالب `jinja2` بسيط يمكنك تحسينه. +- شكل المخرج المتوقع. + +يمكنك تحسين موجه النظام، على سبيل المثال، عن طريق إضافة شرح لتنسيق المخرجات. + +للحصول على أقصى قدر من المرونة، يمكنك الكتابة فوق قالب موجه النظام بالكامل عن طريق تمرير موجه مخصص كمعامل إلى معلمة `system_prompt`. + +```python +from transformers import ReactJsonAgent +from transformers.agents import PythonInterpreterTool + +agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}") +``` + +> [!WARNING] +> يرجى التأكد من تحديد سلسلة `<>` في مكان ما في `template` حتى يكون الوكيل على علم +بالأدوات المتاحة. + + +### فحص تشغيل الوكيل + +فيما يلي بعض السمات المفيدة لفحص ما حدث بعد التشغيل: +- تخزن `agent.logs` سجلات مفصلة للوكيل. في كل خطوة من تشغيل الوكيل، يتم تخزين كل شيء في قاموس إلحاقه بـ `agent.logs`. +- تشغيل `agent.write_inner_memory_from_logs()` يخلق ذاكرة داخلية لسجلات الوكيل للنظام LLM لعرضها، كقائمة من رسائل الدردشة. تنتقل هذه الطريقة عبر كل خطوة من سجل الوكيل ولا تخزن سوى ما يهمها كرسالة: على سبيل المثال، سيحفظ موجه النظام والمهمة في رسائل منفصلة، ثم لكل خطوة سيخزن مخرج LLM كرسالة، ومخرج استدعاء الأداة كرسالة أخرى. استخدم هذا إذا كنت تريد عرضًا عامًا لما حدث - ولكن لن يتم نسخ كل سجل بواسطة هذه الطريقة. + +## الأدوات + +الأداة هي عبارة عن وظيفة أساسية يستخدمها الوكيل لتنفيذ مهمة محددة. + +يمكنك على سبيل المثال التحقق من [`PythonInterpreterTool`]: لديه اسم ووصف ووصف للمدخلات ونوع للمخرج، وطريقة `__call__` التي تقوم بتنفيذ المهمة المطلوبة. + +عند تهيئة الوكيل، يتم استخدام سمات الأداة لتوليد وصف للأداة يتم تضمينه في موجه النظام الخاص بالوكيل. يتيح هذا للوكيل معرفة الأدوات التي يمكنه استخدامها ولماذا. + +### صندوق الأدوات الافتراضي + +يأتي Transformers مع صندوق أدوات افتراضي لتمكين الوكلاء، والذي يمكنك إضافته إلى وكيلك عند التهيئة باستخدام معامل `add_base_tools = True`: + +- **الإجابة على أسئلة المستند**: الإجابة على سؤال حول المستند (مثل ملف PDF) بتنسيق صورة ([Donut](./model_doc/donut)) +- **الإجابة على أسئلة الصور**: الإجابة على سؤال حول صورة ([VILT](./model_doc/vilt)) +- **التحدث إلى النص**: قم بتفريغ الكلام إلى نص ([Whisper](./model_doc/whisper)) +- **النص إلى كلام**: تحويل النص إلى كلام ([SpeechT5](./model_doc/speecht5)) +- **الترجمة**: ترجمة جملة معينة من لغة المصدر إلى لغة الهدف. +- **مفسر كود Python**: تشغيل كود Python الذي تم إنشاؤه بواسطة LLM في بيئة آمنة. لن يتم إضافة هذه الأداة إلى [`ReactJsonAgent`] إلا إذا استخدمت `add_base_tools=True`، نظرًا لأن الأدوات المستندة إلى التعليمات البرمجية يمكنها بالفعل تنفيذ كود Python +لا تترجم النصوص الخاصة ولا الأكواد البرمجية ولا الروابط ولا رموز HTML وCSS: + +يمكنك استخدام أداة يدويًا عن طريق استدعاء دالة [`load_tool`] وتحديد مهمة لتنفيذها. + +```python +from transformers import load_tool + +tool = load_tool("text-to-speech") +audio = tool("This is a text to speech tool") +``` + +### إنشاء أداة جديدة + +يمكنك إنشاء أداتك الخاصة لتغطية حالات الاستخدام التي لا تغطيها الأدوات الافتراضية من Hugging Face. +على سبيل المثال، دعنا نقوم بإنشاء أداة تعرض النموذج الأكثر تنزيلًا لمهمة معينة من Hub. + +سوف نبدأ بالكود التالي. + +```python +from huggingface_hub import list_models + +task = "text-classification" + +model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) +print(model.id) +``` + +يمكن تحويل هذه الشيفرة إلى فئة ترث من الفئة العليا [`Tool`]. + +تحتاج الأداة المخصصة إلى: + +- اسم `name`، والتي تمثل اسم الأداة نفسها. عادةً ما يصف الاسم وظيفتها. بما أن الكود يعيد النموذج الأكثر تنزيلًا لمهمة ما، فلنسمها `model_download_counter`. +- تستخدم خاصية `description` لملء موجه نظام الوكيل. +- خاصية `inputs`، والتي هي عبارة عن قاموس بمفاتيح "type" و"description". يحتوي على معلومات تساعد المفسر Python على اتخاذ خيارات مستنيرة بشأن المدخلات. +- خاصية `output_type`، والتي تحدد نوع المخرج. +- طريقة `forward` والتي تحتوي على الكود الذي سيتم تنفيذه للحصول على النتيجة النهائية. + +```python +from transformers import Tool +from huggingface_hub import list_models + +class HFModelDownloadsTool(Tool): + name = "model_download_counter" + description = ( + "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. " + "It returns the name of the checkpoint." + ) + + inputs = { + "task": { + "type": "text", + "description": "the task category (such as text-classification, depth-estimation, etc)", + } + } + output_type = "text" + + def forward(self, task: str): + model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) + return model.id +``` + +الآن بعد أن أصبحت فئة `HfModelDownloadsTool` المخصصة جاهزة، يمكنك حفظها في ملف باسم `model_downloads.py` واستيرادها للاستخدام. + +```python +from model_downloads import HFModelDownloadsTool + +tool = HFModelDownloadsTool() +``` + +يمكنك أيضًا مشاركة أداتك المخصصة في Hub عن طريق استدعاء [`~Tool.push_to_hub`] على الأداة. تأكد من أنك قمت بإنشاء مستودع لها على Hub وأنك تستخدم رمز وصول للقراءة. + +```python +tool.push_to_hub("{your_username}/hf-model-downloads") +``` + +قم بتحميل الأداة باستخدام دالة [`~Tool.load_tool`] ومررها إلى معلمة `tools` في الوكيل الخاص بك. + +```python +from transformers import load_tool, CodeAgent + +model_download_tool = load_tool("m-ric/hf-model-downloads") +agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine) +agent.run( + "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" +) +``` + +ستحصل على ما يلي: + +```text +======== New task ======== +Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub? +==== Agent is executing the code below: +most_downloaded_model = model_download_counter(task="text-to-video") +print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.") +==== +``` + +والناتج: + +`"النموذج الأكثر تنزيلًا لمهمة `text-to-video` هو ByteDance/AnimateDiff-Lightning."` + +### إدارة صندوق أدوات الوكيل الخاص بك + +إذا كنت قد قمت بتهيئة وكيل، فمن غير الملائم إعادة تهيئته من البداية لإضافة أداة جديدة ترغب في استخدامها. باستخدام مكتبة Transformers، يمكنك إدارة صندوق أدوات الوكيل بإضافة أو استبدال أداة موجودة. + +دعنا نضيف الأداة `model_download_tool` إلى وكيل تم تهيئته مسبقًا باستخدام صندوق الأدوات الافتراضي. + +```python +from transformers import CodeAgent + +agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) +agent.toolbox.add_tool(model_download_tool) +``` + +الآن يمكننا الاستفادة من الأداة الجديدة وأداة تحويل النص إلى كلام السابقة: + +```python + agent.run( + "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?" + ) +``` + +| **Audio** | +|------------------------------------------------------------------------------------------------------------------------------------------------------| +|