Skip to content

Commit

Permalink
Fix commit message formatting in multi-commit uploads (#6313)
Browse files Browse the repository at this point in the history
* fix commit message

* fix the fix

* Fix dataset too

* Update src/datasets/dataset_dict.py

---------

Co-authored-by: Mario Šaško <mariosasko777@gmail.com>
  • Loading branch information
qgallouedec and mariosasko authored Oct 20, 2023
1 parent c3a8a87 commit 3b3333d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 10 deletions.
8 changes: 3 additions & 5 deletions src/datasets/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5513,11 +5513,12 @@ def push_to_hub(
dataset_card = DatasetCard(f"---\n{dataset_card_data}\n---\n") if dataset_card is None else dataset_card
additions.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=str(dataset_card).encode()))

commit_message = commit_message if commit_message is not None else "Upload dataset"
if len(additions) <= config.UPLOADS_MAX_NUMBER_PER_COMMIT:
api.create_commit(
repo_id,
operations=additions + deletions,
commit_message=commit_message if commit_message is not None else "Upload dataset",
commit_message=commit_message,
token=token,
repo_type="dataset",
revision=revision,
Expand All @@ -5532,13 +5533,10 @@ def push_to_hub(
operations = additions[
i * config.UPLOADS_MAX_NUMBER_PER_COMMIT : (i + 1) * config.UPLOADS_MAX_NUMBER_PER_COMMIT
] + (deletions if i == 0 else [])
commit_message = (
commit_message if commit_message is not None else "Upload dataset"
) + f" (part {i:05d}-of-{num_commits:05d})"
api.create_commit(
repo_id,
operations=operations,
commit_message=commit_message,
commit_message=commit_message + f" (part {i:05d}-of-{num_commits:05d})",
token=token,
repo_type="dataset",
revision=revision,
Expand Down
8 changes: 3 additions & 5 deletions src/datasets/dataset_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -1786,11 +1786,12 @@ def push_to_hub(
dataset_card = DatasetCard(f"---\n{dataset_card_data}\n---\n") if dataset_card is None else dataset_card
additions.append(CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=str(dataset_card).encode()))

commit_message = commit_message if commit_message is not None else "Upload dataset"
if len(additions) <= config.UPLOADS_MAX_NUMBER_PER_COMMIT:
api.create_commit(
repo_id,
operations=additions + deletions,
commit_message=commit_message if commit_message is not None else "Upload dataset",
commit_message=commit_message,
token=token,
repo_type="dataset",
revision=revision,
Expand All @@ -1805,13 +1806,10 @@ def push_to_hub(
operations = additions[
i * config.UPLOADS_MAX_NUMBER_PER_COMMIT : (i + 1) * config.UPLOADS_MAX_NUMBER_PER_COMMIT
] + (deletions if i == 0 else [])
commit_message = (
commit_message if commit_message is not None else "Upload dataset"
) + f" (part {i:05d}-of-{num_commits:05d})"
api.create_commit(
repo_id,
operations=operations,
commit_message=commit_message,
commit_message=commit_message + f" (part {i:05d}-of-{num_commits:05d})",
token=token,
repo_type="dataset",
revision=revision,
Expand Down

0 comments on commit 3b3333d

Please sign in to comment.