From 196ec0ad832b62badcc96bb07803fe44d92ca6b9 Mon Sep 17 00:00:00 2001 From: oxygen-dioxide <54425948+oxygen-dioxide@users.noreply.github.com> Date: Sun, 8 Jan 2023 22:53:17 +0800 Subject: [PATCH] diffsinger vocoder installer --- OpenUtau.Core/DiffSinger/DiffSingerConfig.cs | 21 ---------- .../DiffSingerMandarinPhonemizer.cs | 19 +++------ .../DiffSinger/DiffSingerRenderer.cs | 10 ++--- OpenUtau.Core/DiffSinger/DiffSingerSinger.cs | 23 +++++++---- OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs | 31 +++++++++------ .../DiffSinger/DiffSingerVocoderInstaller.cs | 39 +++++++++++++++++++ OpenUtau/Views/MainWindow.axaml.cs | 8 +++- 7 files changed, 92 insertions(+), 59 deletions(-) delete mode 100644 OpenUtau.Core/DiffSinger/DiffSingerConfig.cs create mode 100644 OpenUtau.Core/DiffSinger/DiffSingerVocoderInstaller.cs diff --git a/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs b/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs deleted file mode 100644 index 8f47cd496..000000000 --- a/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs +++ /dev/null @@ -1,21 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace OpenUtau.Core.DiffSinger { - [Serializable] - public class DsConfig { - public string phonemes = "phonemes.txt"; - public string acoustic = "acoustic.onnx"; - public string vocoder = "nsf_hifigan"; - } - - [Serializable] - public class DsVocoderConfig { - public string name = "vocoder"; - public string model = "model.onnx"; - public int num_mel_bins = 128; - public int hop_size = 512; - public int sample_rate = 44100; - } -} diff --git a/OpenUtau.Core/DiffSinger/DiffSingerMandarinPhonemizer.cs b/OpenUtau.Core/DiffSinger/DiffSingerMandarinPhonemizer.cs index 9ebae73c9..b2426a886 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerMandarinPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerMandarinPhonemizer.cs @@ -1,6 +1,5 @@ using System; using System.IO; -using K4os.Hash.xxHash; using TinyPinyin; using OpenUtau.Api; @@ -51,17 +50,9 @@ public override void SetSinger(USinger singer) { } public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) { - /*if (!partResult.TryGetValue(notes[0].position, out var phonemes)) { - throw new Exception("Part result not found"); - } - return new Result { - phonemes = phonemes - .Select((tu) => new Phoneme() { - phoneme = tu.Item1, - position = tu.Item2, - }) - .ToArray(), - };*/ + float frameMs = 1000f*512/44100; + //TODO:变速曲可能会产生错误结果 + int frameTick = MsToTick(frameMs); string lyric = notes[0].lyric; //汉字转拼音 if (lyric.Length > 0 && PinyinHelper.IsChinese(lyric[0])) { @@ -77,9 +68,11 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN } else { //使用vogen的辅音时间 Result VogenResult = base.Process(notes, prev, next, prevNeighbour, nextNeighbour, prevs); + //辅音长度至少为1帧 return new Result { phonemes = new Phoneme[] { - new Phoneme {phoneme = phones.Item1, position = VogenResult.phonemes[0].position}, + new Phoneme {phoneme = phones.Item1, + position = Math.Min(VogenResult.phonemes[0].position,-frameTick)}, new Phoneme {phoneme = phones.Item2, position = 0} }, }; diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs index 95d6202a4..feac671d6 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs @@ -11,7 +11,6 @@ using OpenUtau.Core.Render; using OpenUtau.Core.SignalChain; using OpenUtau.Core.Ustx; -using OpenUtau.Core.Util; using Serilog; namespace OpenUtau.Core.DiffSinger { @@ -63,7 +62,7 @@ public Task Render(RenderPhrase phrase, Progress progress, Cancell } var result = Layout(phrase); int speedup = Core.Util.Preferences.Default.DiffsingerSpeedup; - var wavPath = Path.Join(PathManager.Inst.CachePath, $"vog-{phrase.hash:x16}-{speedup}x.wav"); + var wavPath = Path.Join(PathManager.Inst.CachePath, $"ds-{phrase.hash:x16}-{speedup}x.wav"); string progressInfo = $"{this}{speedup}x \"{string.Join(" ", phrase.phones.Select(p => p.phoneme))}\""; if (File.Exists(wavPath)) { try { @@ -96,7 +95,8 @@ public Task Render(RenderPhrase phrase, Progress progress, Cancell float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) { var singer = phrase.singer as DiffSingerSinger; - var frameMs = singer.vocoder.frameMs(); + var vocoder = singer.getVocoder(); + var frameMs = vocoder.frameMs(); var frameSec = frameMs / 1000; int headFrames = (int)(headMs / frameMs); int tailFrames = (int)(tailMs / frameMs); @@ -143,7 +143,7 @@ float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) { vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("mel", mel)); vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("f0",f0tensor)); float[] samples; - using (var session = new InferenceSession(singer.vocoder.getModel())) { + using (var session = new InferenceSession(vocoder.model)) { using var vocoderOutputs = session.Run(vocoderInputs); samples = vocoderOutputs.First().AsTensor().ToArray(); } @@ -153,7 +153,7 @@ float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) { //参数曲线采样 double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, int length, int headFrames, int tailFrames, Func convert) { var singer = phrase.singer as DiffSingerSinger; - var frameMs = singer.vocoder.frameMs(); + var frameMs = singer.getVocoder().frameMs(); const int interval = 5; var result = new double[length]; if (curve == null) { diff --git a/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs b/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs index cf4ac6793..c3433aa3d 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs @@ -1,11 +1,8 @@ -using Newtonsoft.Json; -using System; +using System; using System.Collections.Generic; using System.IO; using System.Linq; -using System.Reflection; using System.Text; -using System.Text.RegularExpressions; using OpenUtau.Classic; using OpenUtau.Core.Ustx; using Serilog; @@ -39,7 +36,7 @@ class DiffSingerSinger : USinger { public List phonemes = new List(); public DsConfig dsConfig; public byte[] acousticModel = new byte[0]; - public DsVocoder vocoder; + public DsVocoder vocoder = null; public DiffSingerSinger(Voicebank voicebank) { this.voicebank = voicebank; @@ -67,8 +64,6 @@ public DiffSingerSinger(Voicebank voicebank) { //导入音素列表 string phonemesPath = Path.Combine(Location, dsConfig.phonemes); phonemes = File.ReadLines(phonemesPath).ToList(); - //获取声码器 - vocoder = new DsVocoder(dsConfig.vocoder); found = true; loaded = true; @@ -108,5 +103,19 @@ public byte[] getAcousticModel() { return acousticModel; } + public DsVocoder getVocoder() { + //获取声码器 + if(vocoder is null) { + vocoder = new DsVocoder(dsConfig.vocoder); + } + return vocoder; + } + } + + [Serializable] + public class DsConfig { + public string phonemes = "phonemes.txt"; + public string acoustic = "acoustic.onnx"; + public string vocoder = "nsf_hifigan"; } } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs b/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs index 51a5a0ae0..cdff4027e 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs @@ -1,5 +1,6 @@ -using System.IO; -using System.Text; +using System; +using System.IO; + namespace OpenUtau.Core.DiffSinger { public class DsVocoder { public string Location; @@ -8,17 +9,16 @@ public class DsVocoder { //通过名称获取声码器 public DsVocoder(string name) { - Location = Path.Combine(PathManager.Inst.VocodersPath,name); - config = Core.Yaml.DefaultDeserializer.Deserialize( - File.ReadAllText(Path.Combine(Location, "vocoder.yaml"), - System.Text.Encoding.UTF8)); - } - - public byte[] getModel() { - if (model.Length == 0) { + try { + Location = Path.Combine(PathManager.Inst.VocodersPath, name); + config = Core.Yaml.DefaultDeserializer.Deserialize( + File.ReadAllText(Path.Combine(Location, "vocoder.yaml"), + System.Text.Encoding.UTF8)); model = File.ReadAllBytes(Path.Combine(Location, config.model)); } - return model; + catch (Exception ex) { + throw new Exception($"Error loading vocoder {name}. Please download vocoder from https://github.com/xunmengshe/OpenUtau/wiki/Vocoders"); + } } public float frameMs() { @@ -26,5 +26,12 @@ public float frameMs() { } } - + [Serializable] + public class DsVocoderConfig { + public string name = "vocoder"; + public string model = "model.onnx"; + public int num_mel_bins = 128; + public int hop_size = 512; + public int sample_rate = 44100; + } } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerVocoderInstaller.cs b/OpenUtau.Core/DiffSinger/DiffSingerVocoderInstaller.cs new file mode 100644 index 000000000..f4e7f1e6d --- /dev/null +++ b/OpenUtau.Core/DiffSinger/DiffSingerVocoderInstaller.cs @@ -0,0 +1,39 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.IO; +using System.Linq; +using System.Text; +using SharpCompress.Archives; + +namespace OpenUtau.Core.DiffSinger { + public class DiffSingerVocoderInstaller { + public static string FileExt = ".dsvocoder"; + public static void Install(string archivePath) { + DsVocoderConfig vocoderConfig; + using (var archive = ArchiveFactory.Open(archivePath)) { + var configEntry = archive.Entries.First(e => e.Key == "vocoder.yaml"); + if (configEntry == null) { + throw new ArgumentException("missing vocoder.yaml"); + } + using (var stream = configEntry.OpenEntryStream()) { + using var reader = new StreamReader(stream, Encoding.UTF8); + vocoderConfig = Core.Yaml.DefaultDeserializer.Deserialize(reader); + } + string name = vocoderConfig.name; + var basePath = Path.Combine(PathManager.Inst.VocodersPath, name); + foreach (var entry in archive.Entries) { + if (entry.Key.Contains("..")) { + // Prevent zipSlip attack + continue; + } + var filePath = Path.Combine(basePath, entry.Key); + Directory.CreateDirectory(Path.GetDirectoryName(filePath)); + if (!entry.IsDirectory) { + entry.WriteToFile(Path.Combine(basePath, entry.Key)); + } + } + } + } + } +} diff --git a/OpenUtau/Views/MainWindow.axaml.cs b/OpenUtau/Views/MainWindow.axaml.cs index cb3f46fb3..34b43bebb 100644 --- a/OpenUtau/Views/MainWindow.axaml.cs +++ b/OpenUtau/Views/MainWindow.axaml.cs @@ -489,7 +489,7 @@ async void OnMenuInstallSinger(object sender, RoutedEventArgs args) { Filters = new List() { new FileDialogFilter() { Name = "Archive File", - Extensions = new List(){ "zip", "rar", "uar", "vogeon" }, + Extensions = new List(){ "zip", "rar", "uar", "vogeon", "dsvocoder" }, }, }, AllowMultiple = false, @@ -502,6 +502,10 @@ async void OnMenuInstallSinger(object sender, RoutedEventArgs args) { Core.Vogen.VogenSingerInstaller.Install(files[0]); return; } + if (files[0].EndsWith(Core.DiffSinger.DiffSingerVocoderInstaller.FileExt)) { + Core.DiffSinger.DiffSingerVocoderInstaller.Install(files[0]); + return; + } try { var setup = new SingerSetupDialog() { DataContext = new SingerSetupViewModel() { @@ -709,6 +713,8 @@ async void OnDrop(object? sender, DragEventArgs args) { } } else if (ext == Core.Vogen.VogenSingerInstaller.FileExt) { Core.Vogen.VogenSingerInstaller.Install(file); + } else if (ext == Core.DiffSinger.DiffSingerVocoderInstaller.FileExt) { + Core.DiffSinger.DiffSingerVocoderInstaller.Install(file); } else if (ext == ".mp3" || ext == ".wav" || ext == ".ogg" || ext == ".flac") { try { viewModel.ImportAudio(file);