Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EN X-SAMPA Phonemizer] Multiple fixes #1165

Merged
merged 2 commits into from
Jun 9, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 40 additions & 4 deletions OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace OpenUtau.Plugin.Builtin {
/// </summary>
[Phonemizer("English X-SAMPA phonemizer", "EN X-SAMPA", "Lotte V", language: "EN")]
public class EnXSampaPhonemizer : SyllableBasedPhonemizer {
private readonly string[] vowels = "a,A,@,{,V,O,aU,aI,E,3,eI,I,i,oU,OI,U,u,Q,Ol,Ql,aUn,e@,eN,IN,e,o,Ar,Qr,Er,Ir,Or,Ur,ir,ur,aIr,aUr,A@,Q@,E@,I@,O@,U@,i@,u@,aI@,aU@,@r,@l,@m,@n,@N,1,e@m,e@n,y,I\\,M,U\\,Y,@\\,@`,3`,A`,Q`,E`,I`,O`,U`,i`,u`,aI`,aU`,},2,3\\,6,7,8,9,&,{~,I~,aU~,VI,VU,@U,ai,ei,Oi,au,ou,Ou,@u,i:,u:,O:,e@0,E~,e~,3r,ar,or,{l,Al,al,El,Il,il,ul,Ul,mm,nn,ll,NN".Split(',');
private readonly string[] vowels = "a,A,@,{,V,O,aU,aI,E,3,eI,I,i,oU,OI,U,u,Q,Ol,Ql,aUn,e@,eN,IN,e,o,Ar,Qr,Er,Ir,Or,Ur,ir,ur,aIr,aUr,A@,Q@,E@,I@,O@,U@,i@,u@,aI@,aU@,@r,@l,@m,@n,@N,1,e@m,e@n,y,I\\,M,U\\,Y,@\\,@`,3`,A`,Q`,E`,I`,O`,U`,i`,u`,aI`,aU`,},2,3\\,6,7,8,9,&,{~,I~,aU~,VI,VU,@U,ai,ei,Oi,au,ou,Ou,@u,i:,u:,O:,e@0,E~,e~,3r,ar,or,{l,Al,al,El,Il,il,ol,ul,Ul,oUl,mm,nn,ll,NN".Split(',');
private readonly string[] consonants = "b,tS,d,D,4,f,g,h,dZ,k,l,m,n,N,p,r,s,S,t,T,v,w,W,j,z,Z,t_},・,_".Split(',');
private readonly string[] affricates = "tS,dZ".Split(',');
private readonly string[] shortConsonants = "4".Split(",");
Expand Down Expand Up @@ -108,6 +108,15 @@ public class EnXSampaPhonemizer : SyllableBasedPhonemizer {

private bool isVelarNasalFallback = false;

// For Kasane Teto's missing sample
private readonly Dictionary<string, string> tetoException = "V=@".Split(';')
.Select(entry => entry.Split('='))
.Where(parts => parts.Length == 2)
.Where(parts => parts[0] != parts[1])
.ToDictionary(parts => parts[0], parts => parts[1]);

private bool isTetoException = false;

private readonly Dictionary<string, string> vvExceptions =
new Dictionary<string, string>() {
{"aI","j"},
Expand Down Expand Up @@ -227,10 +236,14 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
isMissingCanadianRaising = true;
}

if (!HasOto($"- V", syllable.vowelTone) && !HasOto($"V", syllable.vowelTone) || (!HasOto($"- bV", syllable.vowelTone) && !HasOto($"bV", syllable.vowelTone))) {
if (!HasOto($"- V", syllable.vowelTone) && !HasOto($"V", syllable.vowelTone)) {
isSimpleDelta = true;
}

if (!HasOto($"- bV", syllable.vowelTone) && !HasOto($"bV", syllable.vowelTone)) {
isTetoException = true;
}

if ((!HasOto($"- I", syllable.vowelTone) && !HasOto($"I", syllable.vowelTone)) || (!HasOto($"- U", syllable.vowelTone) && !HasOto($"U", syllable.vowelTone))) {
isMiniDelta = true;
}
Expand Down Expand Up @@ -370,9 +383,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
lastC = 0;
} else {
var cv = cc.Last() + v;
basePhoneme = cv;
if ((!HasOto(cv, syllable.vowelTone) && !HasOto(ValidateAlias(cv), syllable.vowelTone)) && (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) {
if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
basePhoneme = crv;
} else {
basePhoneme = cv;
}
// try CCV
if (cc.Length - firstC > 1) {
Expand Down Expand Up @@ -424,6 +438,8 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
var cc1 = $"{string.Join("", cc.Skip(i))}";
var ccv = string.Join("", cc.Skip(i)) + v;
var ucv = $"_{cc.Last()}{v}";
var crv = $"{cc.Last()} {v}";
var cv = $"{cc.Last()}{v}";
// Use [C1C2...] when current word starts with 2 consonants or more
if (CurrentWordCc.Length >= 2) {
cc1 = $"{string.Join("", cc.Skip(i))}";
Expand Down Expand Up @@ -452,6 +468,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
// Use _CV if it exists
} else if ((HasOto(ucv, syllable.vowelTone) || HasOto(ValidateAlias(ucv), syllable.vowelTone)) && HasOto(cc1, syllable.vowelTone) && !cc1.Contains($"{cc[i]} {cc[i + 1]}")) {
basePhoneme = ucv;
} else if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
basePhoneme = crv;
} else {
basePhoneme = cv;
}
if (i + 1 < lastC) {
var cc2 = $"{string.Join("", cc.Skip(i))}";
Expand Down Expand Up @@ -483,6 +503,10 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
// Use _CV if it exists
} else if ((HasOto(ucv, syllable.vowelTone) || HasOto(ValidateAlias(ucv), syllable.vowelTone)) && (HasOto(cc2, syllable.vowelTone) || HasOto(ValidateAlias(cc2), syllable.vowelTone)) && !cc2.Contains($"{cc[i + 1]} {cc[i + 2]}") && !PreviousWordCc.Contains(ucv)) {
basePhoneme = ucv;
} else if (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone)) {
basePhoneme = crv;
} else {
basePhoneme = cv;
}
if (HasOto(cc1, syllable.tone) && HasOto(cc2, syllable.tone) && !cc1.Contains($"{string.Join("", cc.Skip(i))}")) {
// like [V C1] [C1 C2] [C2 C3] [C3 ..]
Expand Down Expand Up @@ -707,6 +731,18 @@ protected override string ValidateAlias(string alias) {
}
}

if (isTetoException) {
foreach (var syllable in tetoException) {
alias = alias.Replace(syllable.Key, syllable.Value);
}
}

if (isMissingCanadianRaising) {
foreach (var syllable in CanadianRaising) {
alias = alias.Replace(syllable.Key, syllable.Value);
}
}

// Split diphthongs adjuster
if (alias.Contains("U^")) {
alias = alias.Replace("U^", "U");
Expand Down
Loading