forked from MoonInTheRiver/DiffSinger
-
Notifications
You must be signed in to change notification settings - Fork 294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
关于唱法模型数据集 #194
Comments
opencpop的原始标注格式有问题,所以要用也得用转换过的。 但依然不建议用它训练,因为它的标注质量不咋地 |
不建议用opencpop本身的标注训练,要用opencpop建议自己重新标注 |
如果您坚持用opencpop原始标注的话,这个能帮助你转换一部分 : ) import os
def parse_line(line):
parts = line.split('|')
return {
'name': parts[0],
'ph_seq': parts[2].split(' '),
'ph_dur': parts[-2].split(' '),
'slur': parts[-1].split(' ')
}
def handle_slur(wav):
for i, slur_val in enumerate(wav['slur']):
if slur_val == '1':
wav['ph_seq'][i] = ''
offset = 0
while True:
if wav['slur'][i - offset] == '0':
wav['ph_dur'][i - offset] = str(
float(wav['ph_dur'][i - offset]) + float(wav['ph_dur'][i])
)
wav['ph_dur'][i] = ''
break
offset += 1
def cleanup(wav, key):
wav[key] = [x for x in wav[key] if x]
def apply_map(ph_seq, mapping):
i = 0
while i < len(ph_seq) - 1:
pair = (ph_seq[i], ph_seq[i+1])
for old, new in mapping.items():
old_pair = tuple(old.split())
new_pair = new.split()
if pair == old_pair:
ph_seq[i], ph_seq[i+1] = new_pair
i += 1
def main():
transcription_in = os.path.join('path/to/your/opencpop/segments', 'transcriptions.txt')
transcription_out = os.path.join('/path/to/your/save', 'transcriptions.csv')
mapping = {
'ch i': 'ch ir',
'c i': 'c i0',
'r i': 'r ir',
'sh i': 'sh ir',
's i': 's i0',
'y an': 'y En',
'y e': 'y E',
'zh i': 'zh ir',
'z i': 'z i0',
}
segments = []
with open(transcription_in, 'r', encoding='utf8') as f:
for line in f:
wav = parse_line(line.strip())
handle_slur(wav)
cleanup(wav, 'ph_seq')
cleanup(wav, 'ph_dur')
apply_map(wav['ph_seq'], mapping)
segments.append([
wav['name'],
' '.join(wav['ph_seq']),
' '.join(wav['ph_dur'])
])
with open(transcription_out, 'w', encoding='utf8') as f:
f.write('name,ph_seq,ph_dur\n')
for name, ph_seq, ph_dur in segments:
f.write(f'{name},{ph_seq},{ph_dur}\n')
if __name__ == '__main__':
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The text was updated successfully, but these errors were encountered: