From cde86f46796c12f2dbbc9c1678ca95861610422f Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Sun, 24 Mar 2024 20:42:16 +0900 Subject: [PATCH 1/3] Preparation for s sler support --- .../Voicevox/SimpleVoicevoxPhonemizer.cs | 73 +++++++------------ OpenUtau.Core/Voicevox/VoicevoxConfig.cs | 9 ++- OpenUtau.Core/Voicevox/VoicevoxSinger.cs | 10 +-- OpenUtau.Core/Voicevox/VoicevoxUtils.cs | 2 +- 4 files changed, 40 insertions(+), 54 deletions(-) diff --git a/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs index ed89bca28..29ca99f64 100644 --- a/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs +++ b/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs @@ -1,4 +1,5 @@ -using System.Linq; +using System.Collections.Generic; +using System.Linq; using OpenUtau.Api; using OpenUtau.Core.Ustx; using OpenUtau.Core.Voicevox; @@ -17,52 +18,34 @@ public override void SetSinger(USinger singer) { } public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { - var note = notes[0]; - var currentLyric = note.lyric.Normalize(); //measures for Unicode + Phoneme[] phonemes = new Phoneme[notes.Length]; + for (int i = 0; i < notes.Length; i++) { + var currentLyric = notes[i].lyric.Normalize(); //measures for Unicode + int toneShift = 0; + int? alt = null; + if (notes[i].phonemeAttributes != null) { + var attr = notes[i].phonemeAttributes.FirstOrDefault(attr => attr.index == 0); + toneShift = attr.toneShift; + alt = attr.alternate; + } - Dictionary_list dic = new Dictionary_list(); - dic.Loaddic(singer.Location); - int toneShift = 0; - int? alt = null; - if (note.phonemeAttributes != null) { - var attr = note.phonemeAttributes.FirstOrDefault(attr => attr.index == 0); - toneShift = attr.toneShift; - alt = attr.alternate; - } - - //currentLyric = note.phoneticHint.Normalize(); - Note[][] simplenotes = new Note[1][]; - var lyricList = notes[0].lyric.Split(" "); - if (lyricList.Length > 1) { - notes[0].lyric = lyricList[1]; - } - if (VoicevoxUtils.IsHiraKana(notes[0].lyric)) { - return new Result { - phonemes = new Phoneme[] { - new Phoneme { - phoneme = notes[0].lyric, - } - }, - }; - } else if (VoicevoxUtils.IsPau(notes[0].lyric)) { - return new Result { - phonemes = new Phoneme[] { - new Phoneme { - phoneme = "R", - } - }, - }; - } - else - { - return new Result { - phonemes = new Phoneme[] { - new Phoneme { - phoneme = "error", - } - }, - }; + //currentLyric = note.phoneticHint.Normalize(); + Note[][] simplenotes = new Note[1][]; + var lyricList = notes[i].lyric.Split(" "); + if (lyricList.Length > 1) { + notes[i].lyric = lyricList[1]; + } + if (VoicevoxUtils.IsHiraKana(notes[i].lyric)) { + phonemes[i] = new Phoneme { phoneme = notes[i].lyric }; + } else if (VoicevoxUtils.IsPau(notes[i].lyric)) { + phonemes[i] = new Phoneme { phoneme = notes[i].lyric }; + } else { + phonemes[i] = new Phoneme { + phoneme = "error", + }; + } } + return new Result { phonemes = phonemes }; } } } diff --git a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs index 23e5ed7d6..572915b70 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs @@ -20,8 +20,6 @@ public class VoicevoxConfig { public string version = string.Empty; public string policy = string.Empty; public string portraitPath = string.Empty; - //So that the renderer can distinguish between phonemizers. - public string Tag = "DEFAULT"; public List style_infos; //Prepare for future additions of Teacher Singer. @@ -29,6 +27,10 @@ public class VoicevoxConfig { public string base_singer_name = string.Empty; public string base_singer_style_name = string.Empty; + //So that the renderer can distinguish between phonemizers. + public string Tag = "DEFAULT"; + public Phoneme_list phonemes_list; + public static VoicevoxConfig Load(USinger singer) { try { var response = VoicevoxClient.Inst.SendRequest(new VoicevoxURL() { method = "GET", path = "/singers" }); @@ -66,6 +68,7 @@ public static VoicevoxConfig Load(USinger singer) { } catch { Log.Error("Could not load VOICEVOX singer."); } + return new VoicevoxConfig(); } public void LoadInfo(VoicevoxConfig voicevoxConfig, string location) { @@ -89,7 +92,7 @@ public void LoadInfo(VoicevoxConfig voicevoxConfig, string location) { public class Phoneme_list { public string[] vowels; public string[] consonants; - public string[] kana; + public Dictionary kanas; } public class Dictionary_list { diff --git a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs index 91f8e8143..e0160c726 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs @@ -84,16 +84,16 @@ void Load() { var parentDirectory = Directory.GetParent(this.Location).ToString(); var yamlPath = Path.Join(parentDirectory, "phonemes.yaml"); var yamlTxt = File.ReadAllText(yamlPath); - var phonemes_list = Yaml.DefaultDeserializer.Deserialize(yamlTxt); + voicevoxConfig.phonemes_list = Yaml.DefaultDeserializer.Deserialize(yamlTxt); //Prepared for planned changes or additions to phonemizers. - foreach (var str in phonemes_list.vowels) { + foreach (var str in voicevoxConfig.phonemes_list.vowels) { phonemes.Add(str); } - foreach (var str in phonemes_list.consonants) { + foreach (var str in voicevoxConfig.phonemes_list.consonants) { phonemes.Add(str); } - foreach (var str in phonemes_list.kana) { - phonemes.Add(str); + foreach (var kana in voicevoxConfig.phonemes_list.kanas) { + phonemes.Add(kana.Key); } } catch (Exception e) { Log.Error(e, $"Failed to load phonemes.yaml for {Name}"); diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index 6e550021b..0915c176b 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -42,6 +42,7 @@ internal static class VoicevoxUtils { public const int tailS = 1; public const double fps = 93.75; public const string defaultID = "6000"; + static Dictionary_list dic = new Dictionary_list(); public static VoicevoxNote VoicevoxVoiceBase(VoicevoxQueryMain qNotes, string id) { var queryurl = new VoicevoxURL() { method = "POST", path = "/sing_frame_audio_query", query = new Dictionary { { "speaker", id } }, body = JsonConvert.SerializeObject(qNotes) }; @@ -62,7 +63,6 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti BaseChinesePhonemizer.RomanizeNotes(notes); } VoicevoxQueryMain qnotes = new VoicevoxQueryMain(); - Dictionary_list dic = new Dictionary_list(); dic.Loaddic(singer.Location); int index = 0; int duration = 0; From 467ec9c5fde6a9d973db7956d6235f8ab8b3ebbc Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Sat, 30 Mar 2024 14:39:04 +0900 Subject: [PATCH 2/3] Add Phonemizer --- OpenUtau.Core/Util/Base64.cs | 2 +- .../SimpleVoicevoxPhonemizer.cs | 4 +- .../Phonemizers/VoicevoxPhonemizer.cs | 103 ++++++ OpenUtau.Core/Voicevox/VoicevoxClient.cs | 2 +- OpenUtau.Core/Voicevox/VoicevoxRenderer.cs | 120 +++--- OpenUtau.Core/Voicevox/VoicevoxSinger.cs | 4 + OpenUtau.Core/Voicevox/VoicevoxUtils.cs | 16 +- .../SimpleVoicevoxENtoJAPhonemizer.cs | 346 ++++++++++++++++++ 8 files changed, 545 insertions(+), 52 deletions(-) rename OpenUtau.Core/Voicevox/{ => Phonemizers}/SimpleVoicevoxPhonemizer.cs (95%) create mode 100644 OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs create mode 100644 OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs diff --git a/OpenUtau.Core/Util/Base64.cs b/OpenUtau.Core/Util/Base64.cs index 3d5f8020e..b2b7f7419 100644 --- a/OpenUtau.Core/Util/Base64.cs +++ b/OpenUtau.Core/Util/Base64.cs @@ -70,7 +70,7 @@ public static void Base64ToFile(string base64str,string filePath) { File.WriteAllBytes(filePath, bytes); } catch (Exception ex) { - Log.Error(@"{ex}"); + Log.Error($"{ex}"); } } } diff --git a/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs similarity index 95% rename from OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs rename to OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs index 29ca99f64..465a0426d 100644 --- a/OpenUtau.Core/Voicevox/SimpleVoicevoxPhonemizer.cs +++ b/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs @@ -21,6 +21,9 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN Phoneme[] phonemes = new Phoneme[notes.Length]; for (int i = 0; i < notes.Length; i++) { var currentLyric = notes[i].lyric.Normalize(); //measures for Unicode + if (currentLyric.StartsWith("+")) { + continue; + } int toneShift = 0; int? alt = null; if (notes[i].phonemeAttributes != null) { @@ -29,7 +32,6 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN alt = attr.alternate; } - //currentLyric = note.phoneticHint.Normalize(); Note[][] simplenotes = new Note[1][]; var lyricList = notes[i].lyric.Split(" "); if (lyricList.Length > 1) { diff --git a/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs new file mode 100644 index 000000000..2cd597251 --- /dev/null +++ b/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs @@ -0,0 +1,103 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.Ustx; + +namespace OpenUtau.Core.Voicevox { + [Phonemizer("Voicevox Japanese Phonemizer", "VOICEVOX JA", language: "JA")] + public class VoicevoxPhonemizer : Phonemizer { + + protected VoicevoxSinger singer; + Dictionary partResult = new Dictionary(); + + public override void SetSinger(USinger singer) { + this.singer = singer as VoicevoxSinger; + if (this.singer != null) { + this.singer.voicevoxConfig.Tag = this.Tag; + } + } + + public override void SetUp(Note[][] notes) { + partResult.Clear(); + foreach(var lyric in notes) { + lyric[0].lyric = lyric[0].lyric.Normalize(); + var lyricList = lyric[0].lyric.Split(" "); + if (lyricList.Length > 1) { + lyric[0].lyric = lyricList[1]; + } + } + var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, timeAxis,this.singer); + var vvNotes = new VoicevoxNote(); + string singerID = VoicevoxUtils.defaultID; + if (this.singer.voicevoxConfig.base_singer_style != null) { + foreach (var s in this.singer.voicevoxConfig.base_singer_style) { + if (s.name.Equals(this.singer.voicevoxConfig.base_singer_name)) { + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, s.styles.id.ToString()); + if (s.styles.name.Equals(this.singer.voicevoxConfig.base_singer_style_name)) { + break; + } + } else { + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); + break; + } + } + } else { + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); + } + + var parentDirectory = Directory.GetParent(singer.Location).ToString(); + var yamlPath = Path.Join(parentDirectory, "phonemes.yaml"); + var yamlTxt = File.ReadAllText(yamlPath); + var phonemes_list = Yaml.DefaultDeserializer.Deserialize(yamlTxt); + + var list = new List(vvNotes.phonemes); + foreach (var note in qNotes.notes) { + if (note.vqnindex < 0) { + list.Remove(list[0]); + continue; + } + var noteGroup = notes[note.vqnindex]; + var phoneme = new List(); + int index = 0; + while (list.Count > 0) { + if (phonemes_list.vowels.Contains(list[0].phoneme)) { + phoneme.Add(new Phoneme() { phoneme = list[0].phoneme, position = noteGroup[0].position }); + index++; + list.Remove(list[0]); + break; + }else if (phonemes_list.consonants.Contains(list[0].phoneme)) { + phoneme.Add(new Phoneme() { phoneme = list[0].phoneme, position = noteGroup[0].position - (int)timeAxis.MsPosToTickPos((list[0].frame_length / VoicevoxUtils.fps) * 1000) }); + } + list.Remove(list[0]); + } + partResult[noteGroup] = phoneme.ToArray(); + } + } + + public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { + var ps = new List(); + if (partResult.TryGetValue(notes, out var phonemes)) { + return new Result { + phonemes = phonemes.Select(p => { + p.position = p.position - notes[0].position; + return p; + }).ToArray(), + }; + } + return new Result { + phonemes = new Phoneme[] { + new Phoneme { + phoneme = "error", + } + }, + }; + + } + + public override void CleanUp() { + partResult.Clear(); + } + } +} diff --git a/OpenUtau.Core/Voicevox/VoicevoxClient.cs b/OpenUtau.Core/Voicevox/VoicevoxClient.cs index 1aefa31d3..bb4872a45 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxClient.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxClient.cs @@ -28,7 +28,7 @@ internal Tuple SendRequest(VoicevoxURL voicevoxURL) { } } } catch (Exception ex) { - Log.Error(@"{ex}"); + Log.Error($"{ex}"); } return new Tuple("", new byte[0]); } diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index 0ffc4f4fd..180ad6c86 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Reflection; using System.Threading; using System.Threading.Tasks; using K4os.Hash.xxHash; @@ -16,15 +17,17 @@ namespace OpenUtau.Core.Voicevox { public class VoicevoxRenderer : IRenderer { - const string VOLC = VoicevoxUtils.VOLC; + const string VOLSC = VoicevoxUtils.VOLSC; + const string IVOLC = VoicevoxUtils.IVOLC; const string PITD = Format.Ustx.PITD; static readonly HashSet supportedExp = new HashSet(){ Format.Ustx.DYN, - //PITD, + PITD, Format.Ustx.CLR, Format.Ustx.VOL, - //VOLC, + VOLSC, + IVOLC, //Format.Ustx.SHFC, Format.Ustx.SHFT }; @@ -63,7 +66,6 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra if (singer != null) { Log.Information($"Starting Voicevox synthesis"); VoicevoxNote vvNotes = new VoicevoxNote(); - string singerID = VoicevoxUtils.defaultID; if (!singer.voicevoxConfig.Tag.Equals("VOICEVOX JA")) { Note[][] notes = new Note[phrase.phones.Length][]; for (int i = 0; i < phrase.phones.Length; i++) { @@ -79,19 +81,8 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis, singer); //Prepare for future additions of Teacher Singer. - if (singer.voicevoxConfig.base_singer_style != null) { - foreach (var s in singer.voicevoxConfig.base_singer_style) { - if (s.name.Equals(singer.voicevoxConfig.base_singer_name)) { - if (s.styles.name.Equals(singer.voicevoxConfig.base_singer_style_name)) { - vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, s.styles.id.ToString()); - break; - } - } - } - } - if (vvNotes.phonemes.Count() == 0) { - vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); - } + string baseSingerID = VoicevoxUtils.getBaseSingerID(singer); + vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, baseSingerID); //Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList(); @@ -111,7 +102,7 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra } if (style.name.Equals(phrase.phones[0].suffix) && style.type.Equals("frame_decode")) { speaker = style.id; - } else if((style.name + "_" + style.type).Equals(phrase.phones[0].suffix)){ + } else if ((style.name + "_" + style.type).Equals(phrase.phones[0].suffix)) { speaker = style.id; } }); @@ -161,79 +152,112 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra //Synthesize with parameters of phoneme, F0, and volume. Under development static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { - VoicevoxNote notes = new VoicevoxNote(); + Note[][] notes = new Note[phrase.notes.Length][]; + for (int i = 0; i < phrase.phones.Length; i++) { + int noteindex = phrase.phones[i].noteIndex; + if (notes[noteindex] == null) { + notes[noteindex] = new Note[1]; + notes[noteindex][0] = new Note() { + lyric = phrase.notes[noteindex].lyric, + position = phrase.notes[noteindex].position, + duration = phrase.notes[noteindex].duration, + tone = (int)(phrase.notes[noteindex].tone + phrase.phones[i].toneShift) + }; + } + } + + foreach (var note in notes) { + note[0].lyric = note[0].lyric.Normalize(); + var lyricList = note[0].lyric.Split(" "); + if (lyricList.Length > 1) { + note[0].lyric = lyricList[1]; + } + } + VoicevoxNote vnotes = new VoicevoxNote(); + var singer = phrase.singer as VoicevoxSinger; + var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis, singer); + + //Prepare for future additions of Teacher Singer. + string baseSingerID = VoicevoxUtils.getBaseSingerID(singer); + VoicevoxNote vnotestemp = VoicevoxUtils.VoicevoxVoiceBase(qNotes, baseSingerID); int headFrames = (int)(VoicevoxUtils.headS * VoicevoxUtils.fps); int tailFrames = (int)(VoicevoxUtils.tailS * VoicevoxUtils.fps); - notes.phonemes.Add(new Phonemes { + vnotes.phonemes.Add(new Phonemes { phoneme = "pau", frame_length = headFrames }); foreach (var phone in phrase.phones) { - notes.phonemes.Add(new Phonemes { + vnotes.phonemes.Add(new Phonemes { phoneme = phone.phoneme, frame_length = (int)(phone.durationMs / 1000d * VoicevoxUtils.fps), }); } - notes.phonemes.Add(new Phonemes { + vnotes.phonemes.Add(new Phonemes { phoneme = "pau", frame_length = tailFrames }); int vvTotalFrames = -(headFrames + tailFrames); - notes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); + vnotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); double frameMs = 1 / 1000d * VoicevoxUtils.fps; int totalFrames = (int)(vvTotalFrames / VoicevoxUtils.fps * 1000d); int frameRatio = vvTotalFrames / totalFrames; const int pitchInterval = 5; - //var curve = phrase.pitches.SelectMany(item => Enumerable.Repeat(item, 5)).ToArray(); - notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); - //notes.f0 = f0.Where((x, i) => i % frameRatio == 0).ToList(); - float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray(); + vnotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); + float[] f0Shifted = vnotes.f0.Select(f => (float)f).ToArray(); if (phrase.toneShift != null) { - for (int i = 0; i < notes.f0.Count; i++) { + for (int i = 0; i < vnotes.f0.Count; i++) { double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs; int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading); int index = Math.Max(0, (int)((double)ticks / pitchInterval)); if (index < phrase.pitches.Length) { - f0Shifted[i] = (float)MusicMath.ToneToFreq((phrase.pitches[index] + phrase.toneShift[index]) * 0.01); + f0Shifted[i] = (float)(phrase.pitches[index] * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))); } } } - var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC); + var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == IVOLC); if (volumeCurve != null) { - notes.volume = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.DecibelToLinear(x)).ToList(); - //notes.volume = volume.Where((x, i) => i % frameRatio == 0).ToList(); + vnotes.volume = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.DecibelToLinear(x)).ToList(); } else { - notes.volume = Enumerable.Repeat(1d, vvTotalFrames).ToList(); + vnotes.volume = Enumerable.Repeat(1d, vvTotalFrames).ToList(); } - notes.outputStereo = false; - notes.outputSamplingRate = 44100; - notes.volumeScale = 1; - return notes; + vnotes.outputStereo = false; + vnotes.outputSamplingRate = 44100; + vnotes.volumeScale = 1; + return vnotes; } public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) { return new UExpressionDescriptor[] { }; //under development - //var result = new List { - // new UExpressionDescriptor{ - // name="volume (curve)", - // abbr=VOLC, - // type=UExpressionType.Curve, - // min=-20, - // max=20, - // defaultValue=0, - // isFlag=false, - // }, - //}; + var result = new List { + new UExpressionDescriptor{ + name="volume scale (curve)", + abbr=VOLSC, + type=UExpressionType.Curve, + min=-20, + max=20, + defaultValue=0, + isFlag=false, + }, + new UExpressionDescriptor{ + name="input volume (curve)", + abbr=IVOLC, + type=UExpressionType.Curve, + min=-20, + max=20, + defaultValue=0, + isFlag=false, + }, + }; //return result.ToArray(); } diff --git a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs index e0160c726..89107d65d 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs @@ -156,6 +156,10 @@ void Load() { } public override bool TryGetOto(string phoneme, out UOto oto) { + if(phoneme == null) { + oto = null; + return false; + } var parts = phoneme.Split(); if (parts.All(p => phonemes.Contains(p))) { oto = UOto.OfDummy(phoneme); diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index 0915c176b..17b7c56e4 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -37,7 +37,8 @@ public class VoicevoxQueryMain { internal static class VoicevoxUtils { - public const string VOLC = "volc"; + public const string VOLSC = "volsc"; + public const string IVOLC = "ivolc"; public const int headS = 1; public const int tailS = 1; public const double fps = 93.75; @@ -149,5 +150,18 @@ public static bool IsPau(string s) { } return false; } + + public static string getBaseSingerID(VoicevoxSinger singer) { + if (singer.voicevoxConfig.base_singer_style != null) { + foreach (var s in singer.voicevoxConfig.base_singer_style) { + if (s.name.Equals(singer.voicevoxConfig.base_singer_name)) { + if (s.styles.name.Equals(singer.voicevoxConfig.base_singer_style_name)) { + return s.styles.id.ToString(); + } + } + } + } + return defaultID; + } } } diff --git a/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs b/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs new file mode 100644 index 000000000..157a8d1f0 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs @@ -0,0 +1,346 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.G2p; +using WanaKanaNet; +using OpenUtau.Plugin.Builtin; + +namespace OpenUtau.Core.Voicevox { + [Phonemizer("Simple Voicevox ENtoJA Phonemizer", "VOICEVOX EN to JA", "", language: "EN")] + public class SimpleVoicevoxENtoJAPhonemizer : SyllableBasedPhonemizer { + protected override string[] GetVowels() => vowels; + private static readonly string[] vowels = + "a i u e o ay ey oy ow aw".Split(); + protected override string[] GetConsonants() => consonants; + private static readonly string[] consonants = + "b by ch d dh f g gy h hy j k ky l ly m my n ny ng p py r ry s sh t ts th v w y z zh".Split(); + protected override string GetDictionaryName() => "cmudict-0_7b.txt"; + protected override Dictionary GetDictionaryPhonemesReplacement() => dictionaryPhonemesReplacement; + private static readonly Dictionary dictionaryPhonemesReplacement = new Dictionary { + { "aa", "a" }, + { "ae", "e" }, + { "ah", "a" }, + { "ao", "o" }, + { "aw", "aw" }, + { "ay", "ay" }, + { "b", "b" }, + { "ch", "ch" }, + { "d", "d" }, + { "dh", "dh" }, + { "eh", "e" }, + { "er", "o" }, + { "ey", "ey" }, + { "f", "f" }, + { "g", "g" }, + { "hh", "h" }, + { "ih", "e" }, + { "iy", "i" }, + { "jh", "j" }, + { "k", "k" }, + { "l", "l" }, + { "m", "m" }, + { "n", "n" }, + { "ng", "ng" }, + { "ow", "ow" }, + { "oy", "oy" }, + { "p", "p" }, + { "r", "r" }, + { "s", "s" }, + { "sh", "sh" }, + { "t", "t" }, + { "th", "th" }, + { "uh", "o" }, + { "uw", "u" }, + { "v", "v" }, + { "w", "w" }, + { "y", "y" }, + { "z", "z" }, + { "zh", "zh" }, + }; + + protected override IG2p LoadBaseDictionary() => new ArpabetG2p(); + + private Dictionary StartingConsonant => startingConsonant; + private static readonly Dictionary startingConsonant = new Dictionary { + { "", "" }, + { "b", "b" }, + { "by", "by" }, + { "ch", "ch" }, + { "d", "d" }, + { "dh", "d" }, + { "f", "f" }, + { "g", "g" }, + { "gy", "gy" }, + { "h", "h" }, + { "hy", "hy" }, + { "j", "j" }, + { "k", "k" }, + { "ky", "ky" }, + { "l", "r" }, + { "ly", "ry" }, + { "m", "m" }, + { "my", "my" }, + { "n", "n" }, + { "ny", "ny" }, + { "ng", "n" }, + { "p", "p" }, + { "py", "py" }, + { "r", "rr" }, + { "ry", "ry" }, + { "s", "s" }, + { "sh", "sh" }, + { "t", "t" }, + { "ts", "ts" }, + { "th", "s" }, + { "v", "v" }, + { "w", "w" }, + { "y", "y" }, + { "z", "z" }, + { "zh", "sh" }, + }; + + private Dictionary SoloConsonant => soloConsonant; + private static readonly Dictionary soloConsonant = new Dictionary { + { "b", "ぶ" }, + { "by", "び" }, + { "ch", "ちゅ" }, + { "d", "ど" }, + { "dh", "ず" }, + { "f", "ふ" }, + { "g", "ぐ" }, + { "gy", "ぎ" }, + { "h", "ほ" }, + { "hy", "ひ" }, + { "j", "じゅ" }, + { "k", "く" }, + { "ky", "き" }, + { "l", "う" }, + { "ly", "り" }, + { "m", "む" }, + { "my", "み" }, + { "n", "ん" }, + { "ny", "に" }, + { "ng", "ん" }, + { "p", "ぷ" }, + { "py", "ぴ" }, + { "r", "う" }, + { "ry", "り" }, + { "s", "す" }, + { "sh", "しゅ" }, + { "t", "と" }, + { "ts", "つ" }, + { "th", "す" }, + { "v", "ヴ" }, + { "w", "う" }, + { "y", "い" }, + { "z", "ず" }, + { "zh", "しゅ" }, + }; + + private string[] SpecialClusters = "ky gy ts ny hy by py my ry ly".Split(); + + private Dictionary AltCv => altCv; + private static readonly Dictionary altCv = new Dictionary { + {"si", "suli" }, + {"zi", "zuli" }, + {"ti", "teli" }, + {"tu", "tolu" }, + {"di", "deli" }, + {"du", "dolu" }, + {"hu", "holu" }, + {"yi", "i" }, + {"wu", "u" }, + {"wo", "ulo" }, + {"rra", "wa" }, + {"rri", "wi" }, + {"rru", "ru" }, + {"rre", "we" }, + {"rro", "ulo" }, + }; + + private Dictionary ConditionalAlt => conditionalAlt; + private static readonly Dictionary conditionalAlt = new Dictionary { + {"ulo", "wo"}, + {"va", "fa"}, + {"vi", "fi"}, + {"vu", "fu"}, + {"ヴ", "ふ"}, + {"ve", "fe"}, + {"vo", "fo"}, + }; + + private Dictionary ExtraCv => extraCv; + private static readonly Dictionary extraCv = new Dictionary { + {"kye", new [] { "ki", "e" } }, + {"gye", new [] { "gi", "e" } }, + {"suli", new [] { "se", "i" } }, + {"she", new [] { "si", "e" } }, + {"zuli", new [] { "ze", "i" } }, + {"je", new [] { "ji", "e" } }, + {"teli", new [] { "te", "i" } }, + {"tolu", new [] { "to", "u" } }, + {"che", new [] { "chi", "e" } }, + {"tsa", new [] { "tsu", "a" } }, + {"tsi", new [] { "tsu", "i" } }, + {"tse", new [] { "tsu", "e" } }, + {"tso", new [] { "tsu", "o" } }, + {"deli", new [] { "de", "i" } }, + {"dolu", new [] { "do", "u" } }, + {"nye", new [] { "ni", "e" } }, + {"hye", new [] { "hi", "e" } }, + {"holu", new [] { "ho", "u" } }, + {"fa", new [] { "fu", "a" } }, + {"fi", new [] { "fu", "i" } }, + {"fe", new [] { "fu", "e" } }, + {"fo", new [] { "fu", "o" } }, + {"bye", new [] { "bi", "e" } }, + {"pye", new [] { "pi", "e" } }, + {"mye", new [] { "mi", "e" } }, + {"ye", new [] { "i", "e" } }, + {"rye", new [] { "ri", "e" } }, + {"wi", new [] { "u", "i" } }, + {"we", new [] { "u", "e" } }, + {"ulo", new [] { "u", "o" } }, + }; + + private string[] affricates = "ts ch j".Split(); + + protected override string[] GetSymbols(Note note) { + string[] original = base.GetSymbols(note); + if (original == null) { + return null; + } + List modified = new List(); + string[] diphthongs = new[] { "ay", "ey", "oy", "ow", "aw" }; + foreach (string s in original) { + if (diphthongs.Contains(s)) { + modified.AddRange(new string[] { s[0].ToString(), s[1].ToString() }); + } else { + modified.Add(s); + } + } + return modified.ToArray(); + } + + protected override List ProcessSyllable(Syllable syllable) { + // Skip processing if this note extends the prevous syllable + if (CanMakeAliasExtension(syllable)) { + return new List { null }; + } + + var cc = syllable.cc; + var v = syllable.v; + var phonemes = new List(); + + // Check CCs for special clusters + var adjustedCC = new List(); + for (var i = 0; i < cc.Length; i++) { + if (i == cc.Length - 1) { + adjustedCC.Add(cc[i]); + } else { + if (cc[i] == cc[i + 1]) { + adjustedCC.Add(cc[i]); + i++; + continue; + } + var diphone = $"{cc[i]}{cc[i + 1]}"; + if (SpecialClusters.Contains(diphone)) { + adjustedCC.Add(diphone); + i++; + } else { + adjustedCC.Add(cc[i]); + } + } + } + cc = adjustedCC.ToArray(); + + // Separate CCs and main CV + var finalCons = ""; + if (cc.Length > 0) { + finalCons = cc[cc.Length - 1]; + + var start = 0; + + for (var i = start; i < cc.Length - 1; i++) { + var cons = SoloConsonant[cc[i]]; + if (HasOto(cons, syllable.tone)) { + phonemes.Add(cons); + } + } + } + + // Convert to hiragana + var cv = $"{StartingConsonant[finalCons]}{v}"; + cv = AltCv.ContainsKey(cv) ? AltCv[cv] : cv; + var hiragana = ToHiragana(cv); + + // Check for nonstandard CV + var split = false; + if (HasOto(hiragana, syllable.vowelTone)) { + phonemes.Add(hiragana); + } else { + split = true; + } + // Handle nonstandard CV + if (split && ExtraCv.ContainsKey(cv)) { + var splitCv = ExtraCv[cv]; + for (var i = 0; i < splitCv.Length; i++) { + var converted = ToHiragana(splitCv[i]); + phonemes.Add(converted); + } + } + + return phonemes; + } + + protected override List ProcessEnding(Ending ending) { + var cc = ending.cc; + var phonemes = new List(); + + // Check CCs for special clusters + var adjustedCC = new List(); + for (var i = 0; i < cc.Length; i++) { + if (i == cc.Length - 1) { + adjustedCC.Add(cc[i]); + } else { + if (cc[i] == cc[i + 1]) { + adjustedCC.Add(cc[i]); + i++; + continue; + } + var diphone = $"{cc[i]}{cc[i + 1]}"; + if (SpecialClusters.Contains(diphone)) { + adjustedCC.Add(diphone); + i++; + } else { + adjustedCC.Add(cc[i]); + } + } + } + cc = adjustedCC.ToArray(); + + // Convert to hiragana + for (var i = 0; i < cc.Length; i++) { + var symbol = cc[i]; + + var solo = SoloConsonant[symbol]; + + if (HasOto(solo, ending.tone)) { + phonemes.Add(solo); + } else if (ConditionalAlt.ContainsKey(solo)) { + solo = ConditionalAlt[solo]; + phonemes.Add(solo); + } + } + + return phonemes; + } + + private string ToHiragana(string romaji) { + var hiragana = WanaKana.ToHiragana(romaji); + hiragana = hiragana.Replace("ゔ", "ヴ"); + return hiragana; + } + } +} From f227b3dececc191f82a17f7f16ba6cd30132030b Mon Sep 17 00:00:00 2001 From: rokujyushi Date: Thu, 4 Apr 2024 23:48:21 +0900 Subject: [PATCH 3/3] Pitch Experiment --- .../Voicevox/Phonemizers/VoicevoxPhonemizer.cs | 2 +- OpenUtau.Core/Voicevox/VoicevoxRenderer.cs | 12 +++++++++--- OpenUtau.Core/Voicevox/VoicevoxUtils.cs | 6 +++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs index 2cd597251..a1b24523c 100644 --- a/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs +++ b/OpenUtau.Core/Voicevox/Phonemizers/VoicevoxPhonemizer.cs @@ -19,7 +19,7 @@ public override void SetSinger(USinger singer) { } } - public override void SetUp(Note[][] notes) { + public override void SetUp(Note[][] notes, UProject project, UTrack track) { partResult.Clear(); foreach(var lyric in notes) { lyric[0].lyric = lyric[0].lyric.Normalize(); diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index 180ad6c86..d6dad03f4 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -84,8 +84,14 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra string baseSingerID = VoicevoxUtils.getBaseSingerID(singer); vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, baseSingerID); - //Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing - vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList(); + if (!phrase.phones[0].direct) { + double frameMs = 1 / 10d * VoicevoxUtils.fps; + vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvNotes.volume.Count(), 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); + } else { + //Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing + vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList(); + } + //Volume parameter for synthesis. Scheduled to be revised vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList(); } else { @@ -201,7 +207,7 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { int vvTotalFrames = -(headFrames + tailFrames); vnotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); - double frameMs = 1 / 1000d * VoicevoxUtils.fps; + double frameMs = VoicevoxUtils.fps;//1 / 1000d * int totalFrames = (int)(vvTotalFrames / VoicevoxUtils.fps * 1000d); int frameRatio = vvTotalFrames / totalFrames; const int pitchInterval = 5; diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index 17b7c56e4..efdd3b9fe 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -113,7 +113,7 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti } public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, Func convert) { - const int interval = 5; + double interval = curve.Length / length; var result = new double[length]; if (curve == null) { Array.Fill(result, defaultValue); @@ -121,9 +121,9 @@ public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double de } for (int i = 0; i < length; i++) { - double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs; + double posMs = phrase.positionMs - phrase.leadingMs + (i * interval); int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading); - int index = Math.Max(0, (int)((double)ticks / interval)); + int index = Math.Max(0, (int)((double)ticks )); if (index < curve.Length) { result[i] = convert(curve[index]); }