Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate check for new pipelines and metadata update #19029

Merged
merged 2 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ jobs:
- run: python utils/check_config_docstrings.py
- run: make deps_table_check_updated
- run: python utils/tests_fetcher.py --sanity_check
- run: python utils/update_metadata.py --check-only

run_tests_layoutlmv2_and_v3:
working_directory: ~/transformers
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ repo-consistency:
python utils/check_inits.py
python utils/check_config_docstrings.py
python utils/tests_fetcher.py --sanity_check
python utils/update_metadata.py --check-only

# this target runs checks on all files

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def run(self):

extras["deepspeed-testing"] = extras["deepspeed"] + extras["testing"] + extras["optuna"]

extras["quality"] = deps_list("black", "isort", "flake8", "GitPython", "hf-doc-builder")
extras["quality"] = deps_list("black", "datasets", "isort", "flake8", "GitPython", "hf-doc-builder")

extras["all"] = (
extras["tf"]
Expand Down
33 changes: 32 additions & 1 deletion utils/update_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@
"MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES",
"AutoModelForDocumentQuestionAnswering",
),
(
"visual-question-answering",
"MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES",
"AutoModelForVisualQuestionAnswering",
),
("image-to-text", "MODEL_FOR_FOR_VISION_2_SEQ_MAPPING_NAMES", "AutoModelForVision2Seq"),
]


Expand Down Expand Up @@ -236,10 +242,35 @@ def update_metadata(token, commit_sha):
repo.push_to_hub(commit_message)


def check_pipeline_tags():
in_table = {tag: cls for tag, _, cls in PIPELINE_TAGS_AND_AUTO_MODELS}
pipeline_tasks = transformers_module.pipelines.SUPPORTED_TASKS
missing = []
for key in pipeline_tasks:
if key not in in_table:
model = pipeline_tasks[key]["pt"]
if isinstance(model, (list, tuple)):
model = model[0]
model = model.__name__
if model not in in_table.values():
missing.append(key)

if len(missing) > 0:
msg = ", ".join(missing)
raise ValueError(
"The following pipeline tags are not present in the `PIPELINE_TAGS_AND_AUTO_MODELS` constant inside "
f"`utils/update_metadata.py`: {msg}. Please add them!"
)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--token", type=str, help="The token to use to push to the transformers-metadata dataset.")
parser.add_argument("--commit_sha", type=str, help="The sha of the commit going with this update.")
parser.add_argument("--check-only", action="store_true", help="Activate to just check all pipelines are present.")
args = parser.parse_args()

update_metadata(args.token, args.commit_sha)
if args.check_only:
check_pipeline_tags()
else:
update_metadata(args.token, args.commit_sha)