Skip to content

Commit

Permalink
Merge pull request #57 from oxygen-dioxide/diffsinger
Browse files Browse the repository at this point in the history
Diffsinger
  • Loading branch information
oxygen-dioxide authored Aug 17, 2023
2 parents 0013d6a + e7912b3 commit 90c0f52
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 5 deletions.
21 changes: 21 additions & 0 deletions OpenUtau.Core/DiffSinger/DiffSingerEnglishPhonemizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using OpenUtau.Api;
using OpenUtau.Core.G2p;

namespace OpenUtau.Core.DiffSinger
{
[Phonemizer("DiffSinger English Phonemizer", "DIFFS EN", language: "EN")]
public class DiffSingerEnglishPhonemizer : DiffSingerG2pPhonemizer
{
protected override string GetDictionaryName()=>"dsdict-en.yaml";
protected override IG2p LoadBaseG2p() => new ArpabetG2p();
protected override string[] GetBaseG2pVowels() => new string[] {
"aa", "ae", "ah", "ao", "aw", "ay", "eh", "er",
"ey", "ih", "iy", "ow", "oy", "uh", "uw"
};

protected override string[] GetBaseG2pConsonants() => new string[] {
"b", "ch", "d", "dh", "f", "g", "hh", "jh", "k", "l", "m", "n",
"ng", "p", "r", "s", "sh", "t", "th", "v", "w", "y", "z", "zh"
};
}
}
92 changes: 92 additions & 0 deletions OpenUtau.Core/DiffSinger/DiffSingerG2pPhonemizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
using Serilog;
using System;
using System.Collections.Generic;
using System.IO;

using OpenUtau.Api;

namespace OpenUtau.Core.DiffSinger
{
public class G2pReplacementsData{
public struct Replacement{
public string from;
public string to;
}
public Replacement[]? replacements;

public static G2pReplacementsData Load(string text){
return OpenUtau.Core.Yaml.DefaultDeserializer.Deserialize<G2pReplacementsData>(text);
}

public Dictionary<string, string> toDict(){
var dict = new Dictionary<string, string>();
if(replacements!=null){
foreach(var r in replacements){
dict[r.from] = r.to;
}
}
return dict;
}
}

public abstract class DiffSingerG2pPhonemizer : DiffSingerPhonemizer
{
protected virtual string GetDictionaryName()=>"dsdict.yaml";

protected virtual IG2p LoadBaseG2p()=>null;
//vowels and consonants of BaseG2p
protected virtual string[] GetBaseG2pVowels()=>new string[]{};
protected virtual string[] GetBaseG2pConsonants()=>new string[]{};

protected override IG2p LoadG2p(string rootPath) {
var dictionaryName = GetDictionaryName();
var g2ps = new List<IG2p>();
// Load dictionary from plugin folder.
string path = Path.Combine(PluginDir, dictionaryName);
if (File.Exists(path)) {
try {
g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(path)).Build());
} catch (Exception e) {
Log.Error(e, $"Failed to load {path}");
}
}

// Load dictionary from singer folder.
var replacements = new Dictionary<string,string>();
string file = Path.Combine(rootPath, dictionaryName);
if (File.Exists(file)) {
try {
g2ps.Add(G2pDictionary.NewBuilder().Load(File.ReadAllText(file)).Build());
replacements = G2pReplacementsData.Load(File.ReadAllText(file)).toDict();
} catch (Exception e) {
Log.Error(e, $"Failed to load {file}");
}
}

// Load base g2p.
var baseG2p = LoadBaseG2p();
if(baseG2p == null){
return new G2pFallbacks(g2ps.ToArray());
}
var phonemeSymbols = new Dictionary<string, bool>();
foreach(var v in GetBaseG2pVowels()){
phonemeSymbols[v]=true;
}
foreach(var c in GetBaseG2pConsonants()){
phonemeSymbols[c]=false;
}
foreach(var from in replacements.Keys){
var to = replacements[from];
if(baseG2p.IsValidSymbol(to)){
if(baseG2p.IsVowel(to)){
phonemeSymbols[from]=true;
}else{
phonemeSymbols[from]=false;
}
}
}
g2ps.Add(new G2pRemapper(baseG2p,phonemeSymbols, replacements));
return new G2pFallbacks(g2ps.ToArray());
}
}
}
2 changes: 1 addition & 1 deletion OpenUtau.Core/DiffSinger/DiffSingerPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public override void SetSinger(USinger singer) {
}
}

protected IG2p LoadG2p(string rootPath) {
protected virtual IG2p LoadG2p(string rootPath) {
var g2ps = new List<IG2p>();
// Load dictionary from singer folder.
string file = Path.Combine(rootPath, "dsdict.yaml");
Expand Down
8 changes: 4 additions & 4 deletions OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) {
var vocoder = singer.getVocoder();
var frameMs = vocoder.frameMs();
var frameSec = frameMs / 1000;
int headFrames = (int)(headMs / frameMs);
int tailFrames = (int)(tailMs / frameMs);
int headFrames = (int)Math.Round(headMs / frameMs);
int tailFrames = (int)Math.Round(tailMs / frameMs);
var result = Layout(phrase);
//acoustic
//mel = session.run(['mel'], {'tokens': tokens, 'durations': durations, 'f0': f0, 'speedup': speedup})[0]
Expand All @@ -128,11 +128,11 @@ float[] InvokeDiffsinger(RenderPhrase phrase,int speedup) {
.Select(x => (long)(singer.phonemes.IndexOf(x)))
.ToList();
var durations = phrase.phones
.Select(p => (int)(p.endMs / frameMs) - (int)(p.positionMs / frameMs))//prevent cumulative error
.Select(p => (int)Math.Round(p.endMs / frameMs) - (int)Math.Round(p.positionMs / frameMs))//prevent cumulative error
.Prepend(headFrames)
.Append(tailFrames)
.ToList();
var totalFrames = (int)(durations.Sum());
int totalFrames = durations.Sum();
float[] f0 = DiffSingerUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, totalFrames, headFrames, tailFrames,
x => MusicMath.ToneToFreq(x * 0.01))
.Select(f => (float)f).ToArray();
Expand Down

0 comments on commit 90c0f52

Please sign in to comment.