diff --git a/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs new file mode 100644 index 000000000..465a0426d --- /dev/null +++ b/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs @@ -0,0 +1,53 @@ +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; +using OpenUtau.Core.Voicevox; + +namespace Voicevox { + [Phonemizer("Simple Voicevox Japanese Phonemizer", "S-VOICEVOX JA", language: "JA")] + public class SimpleVoicevoxPhonemizer : Phonemizer { + + protected VoicevoxSinger singer; + + public override void SetSinger(USinger singer) { + this.singer = singer as VoicevoxSinger; + if (this.singer != null) { + this.singer.voicevoxConfig.Tag = this.Tag; + } + } + + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { + Phoneme[] phonemes = new Phoneme[notes.Length]; + for (int i = 0; i < notes.Length; i++) { + var currentLyric = notes[i].lyric.Normalize(); //measures for Unicode + if (currentLyric.StartsWith("+")) { + continue; + } + int toneShift = 0; + int? alt = null; + if (notes[i].phonemeAttributes != null) { + var attr = notes[i].phonemeAttributes.FirstOrDefault(attr => attr.index == 0); + toneShift = attr.toneShift; + alt = attr.alternate; + } + + Note[][] simplenotes = new Note[1][]; + var lyricList = notes[i].lyric.Split(" "); + if (lyricList.Length > 1) { + notes[i].lyric = lyricList[1]; + } + if (VoicevoxUtils.IsHiraKana(notes[i].lyric)) { + phonemes[i] = new Phoneme { phoneme = notes[i].lyric }; + } else if (VoicevoxUtils.IsPau(notes[i].lyric)) { + phonemes[i] = new Phoneme { phoneme = notes[i].lyric }; + } else { + phonemes[i] = new Phoneme { + phoneme = "error", + }; + } + } + return new Result { phonemes = phonemes }; + } + } +} diff --git a/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs new file mode 100644 index 000000000..a1b24523c --- /dev/null +++ b/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs @@ -0,0 +1,103 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; + +namespace OpenUtau.Core.Voicevox { + [Phonemizer("Voicevox Japanese Phonemizer", "VOICEVOX JA", language: "JA")] + public class VoicevoxPhonemizer : Phonemizer { + + protected VoicevoxSinger singer; + Dictionary partResult = new Dictionary(); + + public override void SetSinger(USinger singer) { + this.singer = singer as VoicevoxSinger; + if (this.singer != null) { + this.singer.voicevoxConfig.Tag = this.Tag; + } + } + + public override void SetUp(Note[][] notes, UProject project, UTrack track) { + partResult.Clear(); + foreach(var lyric in notes) { + lyric[0].lyric = lyric[0].lyric.Normalize(); + var lyricList = lyric[0].lyric.Split(" "); + if (lyricList.Length > 1) { + lyric[0].lyric = lyricList[1]; + } + } + var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, timeAxis,this.singer); + var vvNotes = new VoicevoxNote(); + string singerID = VoicevoxUtils.defaultID; + if (this.singer.voicevoxConfig.base_singer_style != null) { + foreach (var s in this.singer.voicevoxConfig.base_singer_style) { + if (s.name.Equals(this.singer.voicevoxConfig.base_singer_name)) { + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, s.styles.id.ToString()); + if (s.styles.name.Equals(this.singer.voicevoxConfig.base_singer_style_name)) { + break; + } + } else { + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); + break; + } + } + } else { + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); + } + + var parentDirectory = Directory.GetParent(singer.Location).ToString(); + var yamlPath = Path.Join(parentDirectory, "phonemes.yaml"); + var yamlTxt = File.ReadAllText(yamlPath); + var phonemes_list = Yaml.DefaultDeserializer.Deserialize(yamlTxt); + + var list = new List(vvNotes.phonemes); + foreach (var note in qNotes.notes) { + if (note.vqnindex < 0) { + list.Remove(list[0]); + continue; + } + var noteGroup = notes[note.vqnindex]; + var phoneme = new List(); + int index = 0; + while (list.Count > 0) { + if (phonemes_list.vowels.Contains(list[0].phoneme)) { + phoneme.Add(new Phoneme() { phoneme = list[0].phoneme, position = noteGroup[0].position }); + index++; + list.Remove(list[0]); + break; + }else if (phonemes_list.consonants.Contains(list[0].phoneme)) { + phoneme.Add(new Phoneme() { phoneme = list[0].phoneme, position = noteGroup[0].position - (int)timeAxis.MsPosToTickPos((list[0].frame_length / VoicevoxUtils.fps) * 1000) }); + } + list.Remove(list[0]); + } + partResult[noteGroup] = phoneme.ToArray(); + } + } + + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + var ps = new List(); + if (partResult.TryGetValue(notes, out var phonemes)) { + return new Result { + phonemes = phonemes.Select(p => { + p.position = p.position - notes[0].position; + return p; + }).ToArray(), + }; + } + return new Result { + phonemes = new Phoneme[] { + new Phoneme { + phoneme = "error", + } + }, + }; + + } + + public override void CleanUp() { + partResult.Clear(); + } + } +} diff --git a/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs deleted file mode 100644 index ed89bca28..000000000 --- a/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs +++ /dev/null @@ -1,68 +0,0 @@ -using System.Linq; -using OpenUtau.Api; -using OpenUtau.Core.Ustx; -using OpenUtau.Core.Voicevox; - -namespace Voicevox { - [Phonemizer("Simple Voicevox Japanese Phonemizer", "S-VOICEVOX JA", language: "JA")] - public class SimpleVoicevoxPhonemizer : Phonemizer { - - protected VoicevoxSinger singer; - - public override void SetSinger(USinger singer) { - this.singer = singer as VoicevoxSinger; - if (this.singer != null) { - this.singer.voicevoxConfig.Tag = this.Tag; - } - } - - public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { - var note = notes[0]; - var currentLyric = note.lyric.Normalize(); //measures for Unicode - - Dictionary_list dic = new Dictionary_list(); - dic.Loaddic(singer.Location); - int toneShift = 0; - int? alt = null; - if (note.phonemeAttributes != null) { - var attr = note.phonemeAttributes.FirstOrDefault(attr => attr.index == 0); - toneShift = attr.toneShift; - alt = attr.alternate; - } - - //currentLyric = note.phoneticHint.Normalize(); - Note[][] simplenotes = new Note[1][]; - var lyricList = notes[0].lyric.Split(" "); - if (lyricList.Length > 1) { - notes[0].lyric = lyricList[1]; - } - if (VoicevoxUtils.IsHiraKana(notes[0].lyric)) { - return new Result { - phonemes = new Phoneme[] { - new Phoneme { - phoneme = notes[0].lyric, - } - }, - }; - } else if (VoicevoxUtils.IsPau(notes[0].lyric)) { - return new Result { - phonemes = new Phoneme[] { - new Phoneme { - phoneme = "R", - } - }, - }; - } - else - { - return new Result { - phonemes = new Phoneme[] { - new Phoneme { - phoneme = "error", - } - }, - }; - } - } - } -} diff --git a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs index 23e5ed7d6..572915b70 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs @@ -20,8 +20,6 @@ public class VoicevoxConfig { public string version = string.Empty; public string policy = string.Empty; public string portraitPath = string.Empty; - //So that the renderer can distinguish between phonemizers. - public string Tag = "DEFAULT"; public List style_infos; //Prepare for future additions of Teacher Singer. @@ -29,6 +27,10 @@ public class VoicevoxConfig { public string base_singer_name = string.Empty; public string base_singer_style_name = string.Empty; + //So that the renderer can distinguish between phonemizers. + public string Tag = "DEFAULT"; + public Phoneme_list phonemes_list; + public static VoicevoxConfig Load(USinger singer) { try { var response = VoicevoxClient.Inst.SendRequest(new VoicevoxURL() { method = "GET", path = "/singers" }); @@ -66,6 +68,7 @@ public static VoicevoxConfig Load(USinger singer) { } catch { Log.Error("Could not load VOICEVOX singer."); } + return new VoicevoxConfig(); } public void LoadInfo(VoicevoxConfig voicevoxConfig, string location) { @@ -89,7 +92,7 @@ public void LoadInfo(VoicevoxConfig voicevoxConfig, string location) { public class Phoneme_list { public string[] vowels; public string[] consonants; - public string[] kana; + public Dictionary kanas; } public class Dictionary_list { diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index 0ffc4f4fd..d6dad03f4 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Reflection; using System.Threading; using System.Threading.Tasks; using K4os.Hash.xxHash; @@ -16,15 +17,17 @@ namespace OpenUtau.Core.Voicevox { public class VoicevoxRenderer : IRenderer { - const string VOLC = VoicevoxUtils.VOLC; + const string VOLSC = VoicevoxUtils.VOLSC; + const string IVOLC = VoicevoxUtils.IVOLC; const string PITD = Format.Ustx.PITD; static readonly HashSet supportedExp = new HashSet(){ Format.Ustx.DYN, - //PITD, + PITD, Format.Ustx.CLR, Format.Ustx.VOL, - //VOLC, + VOLSC, + IVOLC, //Format.Ustx.SHFC, Format.Ustx.SHFT }; @@ -63,7 +66,6 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra if (singer != null) { Log.Information($"Starting Voicevox synthesis"); VoicevoxNote vvNotes = new VoicevoxNote(); - string singerID = VoicevoxUtils.defaultID; if (!singer.voicevoxConfig.Tag.Equals("VOICEVOX JA")) { Note[][] notes = new Note[phrase.phones.Length][]; for (int i = 0; i < phrase.phones.Length; i++) { @@ -79,22 +81,17 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis, singer); //Prepare for future additions of Teacher Singer. - if (singer.voicevoxConfig.base_singer_style != null) { - foreach (var s in singer.voicevoxConfig.base_singer_style) { - if (s.name.Equals(singer.voicevoxConfig.base_singer_name)) { - if (s.styles.name.Equals(singer.voicevoxConfig.base_singer_style_name)) { - vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, s.styles.id.ToString()); - break; - } - } - } - } - if (vvNotes.phonemes.Count() == 0) { - vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); + string baseSingerID = VoicevoxUtils.getBaseSingerID(singer); + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, baseSingerID); + + if (!phrase.phones[0].direct) { + double frameMs = 1 / 10d * VoicevoxUtils.fps; + vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvNotes.volume.Count(), 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); + } else { + //Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing + vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList(); } - //Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing - vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList(); //Volume parameter for synthesis. Scheduled to be revised vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList(); } else { @@ -111,7 +108,7 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra } if (style.name.Equals(phrase.phones[0].suffix) && style.type.Equals("frame_decode")) { speaker = style.id; - } else if((style.name + "_" + style.type).Equals(phrase.phones[0].suffix)){ + } else if ((style.name + "_" + style.type).Equals(phrase.phones[0].suffix)) { speaker = style.id; } }); @@ -161,79 +158,112 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra //Synthesize with parameters of phoneme, F0, and volume. Under development static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { - VoicevoxNote notes = new VoicevoxNote(); + Note[][] notes = new Note[phrase.notes.Length][]; + for (int i = 0; i < phrase.phones.Length; i++) { + int noteindex = phrase.phones[i].noteIndex; + if (notes[noteindex] == null) { + notes[noteindex] = new Note[1]; + notes[noteindex][0] = new Note() { + lyric = phrase.notes[noteindex].lyric, + position = phrase.notes[noteindex].position, + duration = phrase.notes[noteindex].duration, + tone = (int)(phrase.notes[noteindex].tone + phrase.phones[i].toneShift) + }; + } + } + + foreach (var note in notes) { + note[0].lyric = note[0].lyric.Normalize(); + var lyricList = note[0].lyric.Split(" "); + if (lyricList.Length > 1) { + note[0].lyric = lyricList[1]; + } + } + VoicevoxNote vnotes = new VoicevoxNote(); + var singer = phrase.singer as VoicevoxSinger; + var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis, singer); + + //Prepare for future additions of Teacher Singer. + string baseSingerID = VoicevoxUtils.getBaseSingerID(singer); + VoicevoxNote vnotestemp = VoicevoxUtils.VoicevoxVoiceBase(qNotes, baseSingerID); int headFrames = (int)(VoicevoxUtils.headS * VoicevoxUtils.fps); int tailFrames = (int)(VoicevoxUtils.tailS * VoicevoxUtils.fps); - notes.phonemes.Add(new Phonemes { + vnotes.phonemes.Add(new Phonemes { phoneme = "pau", frame_length = headFrames }); foreach (var phone in phrase.phones) { - notes.phonemes.Add(new Phonemes { + vnotes.phonemes.Add(new Phonemes { phoneme = phone.phoneme, frame_length = (int)(phone.durationMs / 1000d * VoicevoxUtils.fps), }); } - notes.phonemes.Add(new Phonemes { + vnotes.phonemes.Add(new Phonemes { phoneme = "pau", frame_length = tailFrames }); int vvTotalFrames = -(headFrames + tailFrames); - notes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); - double frameMs = 1 / 1000d * VoicevoxUtils.fps; + vnotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); + double frameMs = VoicevoxUtils.fps;//1 / 1000d * int totalFrames = (int)(vvTotalFrames / VoicevoxUtils.fps * 1000d); int frameRatio = vvTotalFrames / totalFrames; const int pitchInterval = 5; - //var curve = phrase.pitches.SelectMany(item => Enumerable.Repeat(item, 5)).ToArray(); - notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); - //notes.f0 = f0.Where((x, i) => i % frameRatio == 0).ToList(); - float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray(); + vnotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); + float[] f0Shifted = vnotes.f0.Select(f => (float)f).ToArray(); if (phrase.toneShift != null) { - for (int i = 0; i < notes.f0.Count; i++) { + for (int i = 0; i < vnotes.f0.Count; i++) { double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs; int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading); int index = Math.Max(0, (int)((double)ticks / pitchInterval)); if (index < phrase.pitches.Length) { - f0Shifted[i] = (float)MusicMath.ToneToFreq((phrase.pitches[index] + phrase.toneShift[index]) * 0.01); + f0Shifted[i] = (float)(phrase.pitches[index] * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))); } } } - var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC); + var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == IVOLC); if (volumeCurve != null) { - notes.volume = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.DecibelToLinear(x)).ToList(); - //notes.volume = volume.Where((x, i) => i % frameRatio == 0).ToList(); + vnotes.volume = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.DecibelToLinear(x)).ToList(); } else { - notes.volume = Enumerable.Repeat(1d, vvTotalFrames).ToList(); + vnotes.volume = Enumerable.Repeat(1d, vvTotalFrames).ToList(); } - notes.outputStereo = false; - notes.outputSamplingRate = 44100; - notes.volumeScale = 1; - return notes; + vnotes.outputStereo = false; + vnotes.outputSamplingRate = 44100; + vnotes.volumeScale = 1; + return vnotes; } public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) { return new UExpressionDescriptor[] { }; //under development - //var result = new List { - // new UExpressionDescriptor{ - // name="volume (curve)", - // abbr=VOLC, - // type=UExpressionType.Curve, - // min=-20, - // max=20, - // defaultValue=0, - // isFlag=false, - // }, - //}; + var result = new List { + new UExpressionDescriptor{ + name="volume scale (curve)", + abbr=VOLSC, + type=UExpressionType.Curve, + min=-20, + max=20, + defaultValue=0, + isFlag=false, + }, + new UExpressionDescriptor{ + name="input volume (curve)", + abbr=IVOLC, + type=UExpressionType.Curve, + min=-20, + max=20, + defaultValue=0, + isFlag=false, + }, + }; //return result.ToArray(); } diff --git a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs index 3250d84da..bae30d3cc 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs @@ -84,16 +84,16 @@ void Load() { var parentDirectory = Directory.GetParent(this.Location).ToString(); var yamlPath = Path.Join(parentDirectory, "phonemes.yaml"); var yamlTxt = File.ReadAllText(yamlPath); - var phonemes_list = Yaml.DefaultDeserializer.Deserialize(yamlTxt); + voicevoxConfig.phonemes_list = Yaml.DefaultDeserializer.Deserialize(yamlTxt); //Prepared for planned changes or additions to phonemizers. - foreach (var str in phonemes_list.vowels) { + foreach (var str in voicevoxConfig.phonemes_list.vowels) { phonemes.Add(str); } - foreach (var str in phonemes_list.consonants) { + foreach (var str in voicevoxConfig.phonemes_list.consonants) { phonemes.Add(str); } - foreach (var str in phonemes_list.kana) { - phonemes.Add(str); + foreach (var kana in voicevoxConfig.phonemes_list.kanas) { + phonemes.Add(kana.Key); } } catch (Exception e) { Log.Error(e, $"Failed to load phonemes.yaml for {Name}"); diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index adee594ff..488e16820 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -37,11 +37,13 @@ public class VoicevoxQueryMain { internal static class VoicevoxUtils { - public const string VOLC = "volc"; + public const string VOLSC = "volsc"; + public const string IVOLC = "ivolc"; public const int headS = 1; public const int tailS = 1; public const double fps = 93.75; public const string defaultID = "6000"; + static Dictionary_list dic = new Dictionary_list(); public static VoicevoxNote VoicevoxVoiceBase(VoicevoxQueryMain qNotes, string id) { var queryurl = new VoicevoxURL() { method = "POST", path = "/sing_frame_audio_query", query = new Dictionary { { "speaker", id } }, body = JsonConvert.SerializeObject(qNotes) }; @@ -62,7 +64,6 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti BaseChinesePhonemizer.RomanizeNotes(notes); } VoicevoxQueryMain qnotes = new VoicevoxQueryMain(); - Dictionary_list dic = new Dictionary_list(); dic.Loaddic(singer.Location); int index = 0; int duration = 0; @@ -116,7 +117,7 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti } public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, Func convert) { - const int interval = 5; + double interval = curve.Length / length; var result = new double[length]; if (curve == null) { Array.Fill(result, defaultValue); @@ -124,9 +125,9 @@ public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double de } for (int i = 0; i < length; i++) { - double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs; + double posMs = phrase.positionMs - phrase.leadingMs + (i * interval); int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading); - int index = Math.Max(0, (int)((double)ticks / interval)); + int index = Math.Max(0, (int)((double)ticks )); if (index < curve.Length) { result[i] = convert(curve[index]); } @@ -153,5 +154,18 @@ public static bool IsPau(string s) { } return false; } + + public static string getBaseSingerID(VoicevoxSinger singer) { + if (singer.voicevoxConfig.base_singer_style != null) { + foreach (var s in singer.voicevoxConfig.base_singer_style) { + if (s.name.Equals(singer.voicevoxConfig.base_singer_name)) { + if (s.styles.name.Equals(singer.voicevoxConfig.base_singer_style_name)) { + return s.styles.id.ToString(); + } + } + } + } + return defaultID; + } } } diff --git a/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs b/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs new file mode 100644 index 000000000..157a8d1f0 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs @@ -0,0 +1,346 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.G2p; +using WanaKanaNet; +using OpenUtau.Plugin.Builtin; + +namespace OpenUtau.Core.Voicevox { + [Phonemizer("Simple Voicevox ENtoJA Phonemizer", "VOICEVOX EN to JA", "", language: "EN")] + public class SimpleVoicevoxENtoJAPhonemizer : SyllableBasedPhonemizer { + protected override string[] GetVowels() => vowels; + private static readonly string[] vowels = + "a i u e o ay ey oy ow aw".Split(); + protected override string[] GetConsonants() => consonants; + private static readonly string[] consonants = + "b by ch d dh f g gy h hy j k ky l ly m my n ny ng p py r ry s sh t ts th v w y z zh".Split(); + protected override string GetDictionaryName() => "cmudict-0_7b.txt"; + protected override Dictionary GetDictionaryPhonemesReplacement() => dictionaryPhonemesReplacement; + private static readonly Dictionary dictionaryPhonemesReplacement = new Dictionary { + { "aa", "a" }, + { "ae", "e" }, + { "ah", "a" }, + { "ao", "o" }, + { "aw", "aw" }, + { "ay", "ay" }, + { "b", "b" }, + { "ch", "ch" }, + { "d", "d" }, + { "dh", "dh" }, + { "eh", "e" }, + { "er", "o" }, + { "ey", "ey" }, + { "f", "f" }, + { "g", "g" }, + { "hh", "h" }, + { "ih", "e" }, + { "iy", "i" }, + { "jh", "j" }, + { "k", "k" }, + { "l", "l" }, + { "m", "m" }, + { "n", "n" }, + { "ng", "ng" }, + { "ow", "ow" }, + { "oy", "oy" }, + { "p", "p" }, + { "r", "r" }, + { "s", "s" }, + { "sh", "sh" }, + { "t", "t" }, + { "th", "th" }, + { "uh", "o" }, + { "uw", "u" }, + { "v", "v" }, + { "w", "w" }, + { "y", "y" }, + { "z", "z" }, + { "zh", "zh" }, + }; + + protected override IG2p LoadBaseDictionary() => new ArpabetG2p(); + + private Dictionary StartingConsonant => startingConsonant; + private static readonly Dictionary startingConsonant = new Dictionary { + { "", "" }, + { "b", "b" }, + { "by", "by" }, + { "ch", "ch" }, + { "d", "d" }, + { "dh", "d" }, + { "f", "f" }, + { "g", "g" }, + { "gy", "gy" }, + { "h", "h" }, + { "hy", "hy" }, + { "j", "j" }, + { "k", "k" }, + { "ky", "ky" }, + { "l", "r" }, + { "ly", "ry" }, + { "m", "m" }, + { "my", "my" }, + { "n", "n" }, + { "ny", "ny" }, + { "ng", "n" }, + { "p", "p" }, + { "py", "py" }, + { "r", "rr" }, + { "ry", "ry" }, + { "s", "s" }, + { "sh", "sh" }, + { "t", "t" }, + { "ts", "ts" }, + { "th", "s" }, + { "v", "v" }, + { "w", "w" }, + { "y", "y" }, + { "z", "z" }, + { "zh", "sh" }, + }; + + private Dictionary SoloConsonant => soloConsonant; + private static readonly Dictionary soloConsonant = new Dictionary { + { "b", "ぶ" }, + { "by", "び" }, + { "ch", "ちゅ" }, + { "d", "ど" }, + { "dh", "ず" }, + { "f", "ふ" }, + { "g", "ぐ" }, + { "gy", "ぎ" }, + { "h", "ほ" }, + { "hy", "ひ" }, + { "j", "じゅ" }, + { "k", "く" }, + { "ky", "き" }, + { "l", "う" }, + { "ly", "り" }, + { "m", "む" }, + { "my", "み" }, + { "n", "ん" }, + { "ny", "に" }, + { "ng", "ん" }, + { "p", "ぷ" }, + { "py", "ぴ" }, + { "r", "う" }, + { "ry", "り" }, + { "s", "す" }, + { "sh", "しゅ" }, + { "t", "と" }, + { "ts", "つ" }, + { "th", "す" }, + { "v", "ヴ" }, + { "w", "う" }, + { "y", "い" }, + { "z", "ず" }, + { "zh", "しゅ" }, + }; + + private string[] SpecialClusters = "ky gy ts ny hy by py my ry ly".Split(); + + private Dictionary AltCv => altCv; + private static readonly Dictionary altCv = new Dictionary { + {"si", "suli" }, + {"zi", "zuli" }, + {"ti", "teli" }, + {"tu", "tolu" }, + {"di", "deli" }, + {"du", "dolu" }, + {"hu", "holu" }, + {"yi", "i" }, + {"wu", "u" }, + {"wo", "ulo" }, + {"rra", "wa" }, + {"rri", "wi" }, + {"rru", "ru" }, + {"rre", "we" }, + {"rro", "ulo" }, + }; + + private Dictionary ConditionalAlt => conditionalAlt; + private static readonly Dictionary conditionalAlt = new Dictionary { + {"ulo", "wo"}, + {"va", "fa"}, + {"vi", "fi"}, + {"vu", "fu"}, + {"ヴ", "ふ"}, + {"ve", "fe"}, + {"vo", "fo"}, + }; + + private Dictionary ExtraCv => extraCv; + private static readonly Dictionary extraCv = new Dictionary { + {"kye", new [] { "ki", "e" } }, + {"gye", new [] { "gi", "e" } }, + {"suli", new [] { "se", "i" } }, + {"she", new [] { "si", "e" } }, + {"zuli", new [] { "ze", "i" } }, + {"je", new [] { "ji", "e" } }, + {"teli", new [] { "te", "i" } }, + {"tolu", new [] { "to", "u" } }, + {"che", new [] { "chi", "e" } }, + {"tsa", new [] { "tsu", "a" } }, + {"tsi", new [] { "tsu", "i" } }, + {"tse", new [] { "tsu", "e" } }, + {"tso", new [] { "tsu", "o" } }, + {"deli", new [] { "de", "i" } }, + {"dolu", new [] { "do", "u" } }, + {"nye", new [] { "ni", "e" } }, + {"hye", new [] { "hi", "e" } }, + {"holu", new [] { "ho", "u" } }, + {"fa", new [] { "fu", "a" } }, + {"fi", new [] { "fu", "i" } }, + {"fe", new [] { "fu", "e" } }, + {"fo", new [] { "fu", "o" } }, + {"bye", new [] { "bi", "e" } }, + {"pye", new [] { "pi", "e" } }, + {"mye", new [] { "mi", "e" } }, + {"ye", new [] { "i", "e" } }, + {"rye", new [] { "ri", "e" } }, + {"wi", new [] { "u", "i" } }, + {"we", new [] { "u", "e" } }, + {"ulo", new [] { "u", "o" } }, + }; + + private string[] affricates = "ts ch j".Split(); + + protected override string[] GetSymbols(Note note) { + string[] original = base.GetSymbols(note); + if (original == null) { + return null; + } + List modified = new List(); + string[] diphthongs = new[] { "ay", "ey", "oy", "ow", "aw" }; + foreach (string s in original) { + if (diphthongs.Contains(s)) { + modified.AddRange(new string[] { s[0].ToString(), s[1].ToString() }); + } else { + modified.Add(s); + } + } + return modified.ToArray(); + } + + protected override List ProcessSyllable(Syllable syllable) { + // Skip processing if this note extends the prevous syllable + if (CanMakeAliasExtension(syllable)) { + return new List { null }; + } + + var cc = syllable.cc; + var v = syllable.v; + var phonemes = new List(); + + // Check CCs for special clusters + var adjustedCC = new List(); + for (var i = 0; i < cc.Length; i++) { + if (i == cc.Length - 1) { + adjustedCC.Add(cc[i]); + } else { + if (cc[i] == cc[i + 1]) { + adjustedCC.Add(cc[i]); + i++; + continue; + } + var diphone = $"{cc[i]}{cc[i + 1]}"; + if (SpecialClusters.Contains(diphone)) { + adjustedCC.Add(diphone); + i++; + } else { + adjustedCC.Add(cc[i]); + } + } + } + cc = adjustedCC.ToArray(); + + // Separate CCs and main CV + var finalCons = ""; + if (cc.Length > 0) { + finalCons = cc[cc.Length - 1]; + + var start = 0; + + for (var i = start; i < cc.Length - 1; i++) { + var cons = SoloConsonant[cc[i]]; + if (HasOto(cons, syllable.tone)) { + phonemes.Add(cons); + } + } + } + + // Convert to hiragana + var cv = $"{StartingConsonant[finalCons]}{v}"; + cv = AltCv.ContainsKey(cv) ? AltCv[cv] : cv; + var hiragana = ToHiragana(cv); + + // Check for nonstandard CV + var split = false; + if (HasOto(hiragana, syllable.vowelTone)) { + phonemes.Add(hiragana); + } else { + split = true; + } + // Handle nonstandard CV + if (split && ExtraCv.ContainsKey(cv)) { + var splitCv = ExtraCv[cv]; + for (var i = 0; i < splitCv.Length; i++) { + var converted = ToHiragana(splitCv[i]); + phonemes.Add(converted); + } + } + + return phonemes; + } + + protected override List ProcessEnding(Ending ending) { + var cc = ending.cc; + var phonemes = new List(); + + // Check CCs for special clusters + var adjustedCC = new List(); + for (var i = 0; i < cc.Length; i++) { + if (i == cc.Length - 1) { + adjustedCC.Add(cc[i]); + } else { + if (cc[i] == cc[i + 1]) { + adjustedCC.Add(cc[i]); + i++; + continue; + } + var diphone = $"{cc[i]}{cc[i + 1]}"; + if (SpecialClusters.Contains(diphone)) { + adjustedCC.Add(diphone); + i++; + } else { + adjustedCC.Add(cc[i]); + } + } + } + cc = adjustedCC.ToArray(); + + // Convert to hiragana + for (var i = 0; i < cc.Length; i++) { + var symbol = cc[i]; + + var solo = SoloConsonant[symbol]; + + if (HasOto(solo, ending.tone)) { + phonemes.Add(solo); + } else if (ConditionalAlt.ContainsKey(solo)) { + solo = ConditionalAlt[solo]; + phonemes.Add(solo); + } + } + + return phonemes; + } + + private string ToHiragana(string romaji) { + var hiragana = WanaKana.ToHiragana(romaji); + hiragana = hiragana.Replace("ゔ", "ヴ"); + return hiragana; + } + } +}