Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dealing with empty midi file #110

Merged
merged 4 commits into from
Nov 26, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions miditok/midi_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,8 @@ def _tokens_to_ids(
:param tokens: list of tokens (str) to convert.
:return: list of corresponding ids (int).
"""
if len(tokens) == 0:
return []
if isinstance(tokens[0], (list, tuple)):
ids = []
for seq in tokens:
Expand All @@ -1037,6 +1039,8 @@ def _ids_to_tokens(
:return: the sequence of corresponding tokens (str or Event).
"""
tokens = []
if len(ids) == 0:
return tokens
if isinstance(ids[0], list): # multiple vocabularies
for (
multi_ids
Expand Down Expand Up @@ -1065,6 +1069,8 @@ def _events_to_tokens(
:return: the sequence of corresponding tokens (str).
"""
tokens = []
if len(events) == 0:
return tokens
if isinstance(events[0], list): # multiple vocabularies
for (
multi_event
Expand All @@ -1090,6 +1096,8 @@ def _ids_to_bytes(
:param as_one_str: will return the bytes all concatenated into one string. (default: False)
:return: the tokens converted into strings of unique bytes.
"""
if len(ids) == 0:
return ""
if isinstance(ids[0], list):
return [self._ids_to_bytes(item, as_one_str) for item in ids]
bytes_ = [self._vocab_base_id_to_byte[i] for i in ids]
Expand All @@ -1104,6 +1112,8 @@ def _bytes_to_tokens(
:param as_str: return the events as string objects, otherwise Event objects (default: True)
:return: the sequence of corresponding tokens (str).
"""
if len(bytes_) == 0:
return []
if isinstance(bytes_[0], list): # multiple vocabularies
return [self._bytes_to_tokens(byte_) for byte_ in bytes_]

Expand Down
Loading