Skip to content

Commit

Permalink
Merge pull request #5 from oxygen-dioxide/diffsinger-nomidi
Browse files Browse the repository at this point in the history
diffsinger vocoder installer
  • Loading branch information
oxygen-dioxide authored Jan 8, 2023
2 parents eabc74a + 196ec0a commit 3a837a0
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 59 deletions.
21 changes: 0 additions & 21 deletions OpenUtau.Core/DiffSinger/DiffSingerConfig.cs

This file was deleted.

19 changes: 6 additions & 13 deletions OpenUtau.Core/DiffSinger/DiffSingerMandarinPhonemizer.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using System;
using System.IO;
using K4os.Hash.xxHash;
using TinyPinyin;

using OpenUtau.Api;
Expand Down Expand Up @@ -51,17 +50,9 @@ public override void SetSinger(USinger singer) {
}

public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevs) {
/*if (!partResult.TryGetValue(notes[0].position, out var phonemes)) {
throw new Exception("Part result not found");
}
return new Result {
phonemes = phonemes
.Select((tu) => new Phoneme() {
phoneme = tu.Item1,
position = tu.Item2,
})
.ToArray(),
};*/
float frameMs = 1000f*512/44100;
//TODO:变速曲可能会产生错误结果
int frameTick = MsToTick(frameMs);
string lyric = notes[0].lyric;
//汉字转拼音
if (lyric.Length > 0 && PinyinHelper.IsChinese(lyric[0])) {
Expand All @@ -77,9 +68,11 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN
} else {
//使用vogen的辅音时间
Result VogenResult = base.Process(notes, prev, next, prevNeighbour, nextNeighbour, prevs);
//辅音长度至少为1帧
return new Result {
phonemes = new Phoneme[] {
new Phoneme {phoneme = phones.Item1, position = VogenResult.phonemes[0].position},
new Phoneme {phoneme = phones.Item1,
position = Math.Min(VogenResult.phonemes[0].position,-frameTick)},
new Phoneme {phoneme = phones.Item2, position = 0}
},
};
Expand Down
10 changes: 5 additions & 5 deletions OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
using OpenUtau.Core.Render;
using OpenUtau.Core.SignalChain;
using OpenUtau.Core.Ustx;
using OpenUtau.Core.Util;
using Serilog;

namespace OpenUtau.Core.DiffSinger {
Expand Down Expand Up @@ -63,7 +62,7 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, Cancell
}
var result = Layout(phrase);
int speedup = Core.Util.Preferences.Default.DiffsingerSpeedup;
var wavPath = Path.Join(PathManager.Inst.CachePath, $"vog-{phrase.hash:x16}-{speedup}x.wav");
var wavPath = Path.Join(PathManager.Inst.CachePath, $"ds-{phrase.hash:x16}-{speedup}x.wav");
string progressInfo = $"{this}{speedup}x \"{string.Join(" ", phrase.phones.Select(p => p.phoneme))}\"";
if (File.Exists(wavPath)) {
try {
Expand Down Expand Up @@ -96,7 +95,8 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, Cancell

float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) {
var singer = phrase.singer as DiffSingerSinger;
var frameMs = singer.vocoder.frameMs();
var vocoder = singer.getVocoder();
var frameMs = vocoder.frameMs();
var frameSec = frameMs / 1000;
int headFrames = (int)(headMs / frameMs);
int tailFrames = (int)(tailMs / frameMs);
Expand Down Expand Up @@ -143,7 +143,7 @@ float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) {
vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("mel", mel));
vocoderInputs.Add(NamedOnnxValue.CreateFromTensor("f0",f0tensor));
float[] samples;
using (var session = new InferenceSession(singer.vocoder.getModel())) {
using (var session = new InferenceSession(vocoder.model)) {
using var vocoderOutputs = session.Run(vocoderInputs);
samples = vocoderOutputs.First().AsTensor<float>().ToArray();
}
Expand All @@ -153,7 +153,7 @@ float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) {
//参数曲线采样
double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, int length, int headFrames, int tailFrames, Func<double, double> convert) {
var singer = phrase.singer as DiffSingerSinger;
var frameMs = singer.vocoder.frameMs();
var frameMs = singer.getVocoder().frameMs();
const int interval = 5;
var result = new double[length];
if (curve == null) {
Expand Down
23 changes: 16 additions & 7 deletions OpenUtau.Core/DiffSinger/DiffSingerSinger.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
using Newtonsoft.Json;
using System;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using OpenUtau.Classic;
using OpenUtau.Core.Ustx;
using Serilog;
Expand Down Expand Up @@ -39,7 +36,7 @@ class DiffSingerSinger : USinger {
public List<string> phonemes = new List<string>();
public DsConfig dsConfig;
public byte[] acousticModel = new byte[0];
public DsVocoder vocoder;
public DsVocoder vocoder = null;

public DiffSingerSinger(Voicebank voicebank) {
this.voicebank = voicebank;
Expand Down Expand Up @@ -67,8 +64,6 @@ public DiffSingerSinger(Voicebank voicebank) {
//导入音素列表
string phonemesPath = Path.Combine(Location, dsConfig.phonemes);
phonemes = File.ReadLines(phonemesPath).ToList();
//获取声码器
vocoder = new DsVocoder(dsConfig.vocoder);

found = true;
loaded = true;
Expand Down Expand Up @@ -108,5 +103,19 @@ public byte[] getAcousticModel() {
return acousticModel;
}

public DsVocoder getVocoder() {
//获取声码器
if(vocoder is null) {
vocoder = new DsVocoder(dsConfig.vocoder);
}
return vocoder;
}
}

[Serializable]
public class DsConfig {
public string phonemes = "phonemes.txt";
public string acoustic = "acoustic.onnx";
public string vocoder = "nsf_hifigan";
}
}
31 changes: 19 additions & 12 deletions OpenUtau.Core/DiffSinger/DiffSingerVocoder.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System.IO;
using System.Text;
using System;
using System.IO;

namespace OpenUtau.Core.DiffSinger {
public class DsVocoder {
public string Location;
Expand All @@ -8,23 +9,29 @@ public class DsVocoder {

//通过名称获取声码器
public DsVocoder(string name) {
Location = Path.Combine(PathManager.Inst.VocodersPath,name);
config = Core.Yaml.DefaultDeserializer.Deserialize<DsVocoderConfig>(
File.ReadAllText(Path.Combine(Location, "vocoder.yaml"),
System.Text.Encoding.UTF8));
}

public byte[] getModel() {
if (model.Length == 0) {
try {
Location = Path.Combine(PathManager.Inst.VocodersPath, name);
config = Core.Yaml.DefaultDeserializer.Deserialize<DsVocoderConfig>(
File.ReadAllText(Path.Combine(Location, "vocoder.yaml"),
System.Text.Encoding.UTF8));
model = File.ReadAllBytes(Path.Combine(Location, config.model));
}
return model;
catch (Exception ex) {
throw new Exception($"Error loading vocoder {name}. Please download vocoder from https://github.com/xunmengshe/OpenUtau/wiki/Vocoders");
}
}

public float frameMs() {
return 1000f * config.hop_size / config.sample_rate;
}
}


[Serializable]
public class DsVocoderConfig {
public string name = "vocoder";
public string model = "model.onnx";
public int num_mel_bins = 128;
public int hop_size = 512;
public int sample_rate = 44100;
}
}
39 changes: 39 additions & 0 deletions OpenUtau.Core/DiffSinger/DiffSingerVocoderInstaller.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using SharpCompress.Archives;

namespace OpenUtau.Core.DiffSinger {
public class DiffSingerVocoderInstaller {
public static string FileExt = ".dsvocoder";
public static void Install(string archivePath) {
DsVocoderConfig vocoderConfig;
using (var archive = ArchiveFactory.Open(archivePath)) {
var configEntry = archive.Entries.First(e => e.Key == "vocoder.yaml");
if (configEntry == null) {
throw new ArgumentException("missing vocoder.yaml");
}
using (var stream = configEntry.OpenEntryStream()) {
using var reader = new StreamReader(stream, Encoding.UTF8);
vocoderConfig = Core.Yaml.DefaultDeserializer.Deserialize<DsVocoderConfig>(reader);
}
string name = vocoderConfig.name;
var basePath = Path.Combine(PathManager.Inst.VocodersPath, name);
foreach (var entry in archive.Entries) {
if (entry.Key.Contains("..")) {
// Prevent zipSlip attack
continue;
}
var filePath = Path.Combine(basePath, entry.Key);
Directory.CreateDirectory(Path.GetDirectoryName(filePath));
if (!entry.IsDirectory) {
entry.WriteToFile(Path.Combine(basePath, entry.Key));
}
}
}
}
}
}
8 changes: 7 additions & 1 deletion OpenUtau/Views/MainWindow.axaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ async void OnMenuInstallSinger(object sender, RoutedEventArgs args) {
Filters = new List<FileDialogFilter>() {
new FileDialogFilter() {
Name = "Archive File",
Extensions = new List<string>(){ "zip", "rar", "uar", "vogeon" },
Extensions = new List<string>(){ "zip", "rar", "uar", "vogeon", "dsvocoder" },
},
},
AllowMultiple = false,
Expand All @@ -502,6 +502,10 @@ async void OnMenuInstallSinger(object sender, RoutedEventArgs args) {
Core.Vogen.VogenSingerInstaller.Install(files[0]);
return;
}
if (files[0].EndsWith(Core.DiffSinger.DiffSingerVocoderInstaller.FileExt)) {
Core.DiffSinger.DiffSingerVocoderInstaller.Install(files[0]);
return;
}
try {
var setup = new SingerSetupDialog() {
DataContext = new SingerSetupViewModel() {
Expand Down Expand Up @@ -709,6 +713,8 @@ async void OnDrop(object? sender, DragEventArgs args) {
}
} else if (ext == Core.Vogen.VogenSingerInstaller.FileExt) {
Core.Vogen.VogenSingerInstaller.Install(file);
} else if (ext == Core.DiffSinger.DiffSingerVocoderInstaller.FileExt) {
Core.DiffSinger.DiffSingerVocoderInstaller.Install(file);
} else if (ext == ".mp3" || ext == ".wav" || ext == ".ogg" || ext == ".flac") {
try {
viewModel.ImportAudio(file);
Expand Down

0 comments on commit 3a837a0

Please sign in to comment.