From 3b3333d790800ddaa3bf386ee71dc800258c921c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Fri, 20 Oct 2023 15:57:38 +0200 Subject: [PATCH] Fix commit message formatting in multi-commit uploads (#6313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix commit message * fix the fix * Fix dataset too * Update src/datasets/dataset_dict.py --------- Co-authored-by: Mario Šaško --- src/datasets/arrow_dataset.py | 8 +++----- src/datasets/dataset_dict.py | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 445dc7452d4..65658c9ea94 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -5513,11 +5513,12 @@ def push_to_hub( dataset_card = DatasetCard(f"---\n{dataset_card_data}\n---\n") if dataset_card is None else dataset_card additions.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=str(dataset_card).encode())) + commit_message = commit_message if commit_message is not None else "Upload dataset" if len(additions) <= config.UPLOADS_MAX_NUMBER_PER_COMMIT: api.create_commit( repo_id, operations=additions + deletions, - commit_message=commit_message if commit_message is not None else "Upload dataset", + commit_message=commit_message, token=token, repo_type="dataset", revision=revision, @@ -5532,13 +5533,10 @@ def push_to_hub( operations = additions[ i * config.UPLOADS_MAX_NUMBER_PER_COMMIT : (i + 1) * config.UPLOADS_MAX_NUMBER_PER_COMMIT ] + (deletions if i == 0 else []) - commit_message = ( - commit_message if commit_message is not None else "Upload dataset" - ) + f" (part {i:05d}-of-{num_commits:05d})" api.create_commit( repo_id, operations=operations, - commit_message=commit_message, + commit_message=commit_message + f" (part {i:05d}-of-{num_commits:05d})", token=token, repo_type="dataset", revision=revision, diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index b117ec398ab..4ef3bd2ec60 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -1786,11 +1786,12 @@ def push_to_hub( dataset_card = DatasetCard(f"---\n{dataset_card_data}\n---\n") if dataset_card is None else dataset_card additions.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=str(dataset_card).encode())) + commit_message = commit_message if commit_message is not None else "Upload dataset" if len(additions) <= config.UPLOADS_MAX_NUMBER_PER_COMMIT: api.create_commit( repo_id, operations=additions + deletions, - commit_message=commit_message if commit_message is not None else "Upload dataset", + commit_message=commit_message, token=token, repo_type="dataset", revision=revision, @@ -1805,13 +1806,10 @@ def push_to_hub( operations = additions[ i * config.UPLOADS_MAX_NUMBER_PER_COMMIT : (i + 1) * config.UPLOADS_MAX_NUMBER_PER_COMMIT ] + (deletions if i == 0 else []) - commit_message = ( - commit_message if commit_message is not None else "Upload dataset" - ) + f" (part {i:05d}-of-{num_commits:05d})" api.create_commit( repo_id, operations=operations, - commit_message=commit_message, + commit_message=commit_message + f" (part {i:05d}-of-{num_commits:05d})", token=token, repo_type="dataset", revision=revision,