Skip to content

Commit

Permalink
afs.core: Rewrite the method of matching ass font info with physical …
Browse files Browse the repository at this point in the history
…font info
  • Loading branch information
MIRIMIRIM committed May 11, 2024
1 parent de1a983 commit 84f13b4
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 174 deletions.
97 changes: 74 additions & 23 deletions AssFontSubset.Core/src/AssFont.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using Mobsub.SubtitleParse;
using Mobsub.SubtitleParse.AssTypes;
using System.Text;
using ZLogger;

namespace AssFontSubset.Core;

Expand All @@ -23,45 +24,95 @@ public static Dictionary<AssFontInfo, List<Rune>> GetAssFonts(string file, out A
return AssFontParse.GetUsedFontInfos(ass.Events.Collection, ass.Styles.Collection);
}

public static bool IsMatch(AssFontInfo afi, FontInfo fi)
public static bool IsMatch(AssFontInfo afi, FontInfo fi, bool single, int? minimalWeight = null, bool? hadItalic = null, ILogger? logger = null)
{
var boldMatch = false;
var italicMatch = false;
if (!single) { if (minimalWeight is null || hadItalic is null) throw new ArgumentNullException(); }

var assFn = afi.Name.StartsWith('@') ? afi.Name.AsSpan(1) : afi.Name.AsSpan();
if ((assFn.SequenceEqual(fi.FamilyName.AsSpan()) || assFn.SequenceEqual(fi.FamilyNameChs.AsSpan())))
logger?.ZLogDebug($"Try match {afi.ToString()} and {fi.FamilyName}_w{fi.Weight}_b{(fi.Bold ? 1 : 0)}_i{(fi.Italic ? 1 : 0)}");
switch (afi.Weight)
{
if (afi.Weight == 0)
{
boldMatch = !fi.Bold;
}
else if (afi.Weight == 1)
{
boldMatch = fi.Bold || (!fi.MaybeHasTrueBoldOrItalic && !fi.Bold && !fi.Italic);
}
else if (afi.Weight == fi.Weight)
{
// Maybe wrong
boldMatch = true;
}
case 0:
boldMatch = fi.Bold ? single : true; // cant get only true bold
break;
case 1:
if (single)
{
// maybe faux bold
if (fi.Weight >= 550) { logger?.ZLogWarning($"{afi.Name} use \\b1 will not get faux bold"); }
boldMatch = true;
}
else
{
// strict
boldMatch = fi.Bold;
}
break;
default:
if (afi.Weight == fi.Weight)
{
boldMatch = true;
}
else
{
if (fi.Weight > (afi.Weight + 150)) { logger?.ZLogDebug($"{afi.Name} should use \\b{fi.Weight}"); }
}
break;
}

if (afi.Italic == fi.Italic)
{
italicMatch = true;
}
else if (afi.Italic == true && (!fi.MaybeHasTrueBoldOrItalic && !fi.Bold && !fi.Italic))
if (afi.Italic)
{
if (fi.Italic)
{
italicMatch = true;
}
else if (afi.Italic == true && fi.MaybeHasTrueBoldOrItalic && fi.FamilyName != fi.FamilyNameChs)
else
{
italicMatch = true;
// maybe faux italic
if (single) { italicMatch = true; }
else
{
if (!(bool)hadItalic!) { italicMatch = true; }
else if (!(fi.MaxpNumGlyphs < 6000 && fi.FamilyName == fi.FamilyNameChs))
{
// maybe cjk fonts
italicMatch = true;
logger?.ZLogDebug($"{afi.Name} use \\i1 maybe get faux italic");
}
}
}
}
else
{
if (!fi.Italic) { italicMatch = true; }
}

return boldMatch && italicMatch;
}

public static FontInfo? GetMatchedFontInfo(AssFontInfo afi, IGrouping<string, FontInfo> fig, ILogger? logger = null)
{
var assFn = afi.Name.StartsWith('@') ? afi.Name.AsSpan(1) : afi.Name.AsSpan();
if (!(assFn.SequenceEqual(fig.Key.AsSpan()) || assFn.SequenceEqual(fig.First().FamilyNameChs.AsSpan()))) { return null; }

if (fig.Count() == 1)
{
if (IsMatch(afi, fig.First(), true, null, null, logger)) { return fig.First(); }
else { return null; }
}
else
{
var minimalWeight = fig.Select(fig => fig.Weight).Min();
var hadItalic = fig.Select(fig => fig.Italic is true).Count() > 0;
foreach (var fi in fig)
{
if (IsMatch(afi, fi, false, minimalWeight, hadItalic, logger)) { return fi; }
}
return null;
}
}

private static HashSet<string> GetUsedStyles(List<AssEvent> events)
{
var styles = new HashSet<string>();
Expand Down
97 changes: 28 additions & 69 deletions AssFontSubset.Core/src/FontParse.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ public struct FontInfo
{
public string FamilyName;
public string FamilyNameChs;
//public bool Regular;
public bool Bold;
public bool Italic;
public int Weight;
public bool MaybeHasTrueBoldOrItalic;
//public bool MaybeHasTrueBoldOrItalic;
public string FileName;
public uint Index;
public ushort MaxpNumGlyphs;
Expand All @@ -20,10 +21,11 @@ public override bool Equals(object? obj)
return obj is FontInfo info &&
FamilyName == info.FamilyName &&
FamilyNameChs == info.FamilyNameChs &&
//Regular == info.Regular &&
Bold == info.Bold &&
Italic == info.Italic &&
Weight == info.Weight &&
MaybeHasTrueBoldOrItalic == info.MaybeHasTrueBoldOrItalic &&
//MaybeHasTrueBoldOrItalic == info.MaybeHasTrueBoldOrItalic &&
FileName == info.FileName &&
Index == info.Index &&
MaxpNumGlyphs == info.MaxpNumGlyphs;
Expand All @@ -34,10 +36,11 @@ public override int GetHashCode()
HashCode hash = new HashCode();
hash.Add(FamilyName);
hash.Add(FamilyNameChs);
//hash.Add(Regular);
hash.Add(Bold);
hash.Add(Italic);
hash.Add(Weight);
hash.Add(MaybeHasTrueBoldOrItalic);
//hash.Add(MaybeHasTrueBoldOrItalic);
hash.Add(FileName);
hash.Add(Index);
hash.Add(MaxpNumGlyphs);
Expand All @@ -58,98 +61,54 @@ public class FontParse(string fontFile)
public uint GetNumFonts() => FontData.GetNumFonts();
public OTFont GetFont(uint index) => FontData.GetFont(index)!;

public FontInfo GetFontInfo(uint index, HashSet<string>? trueRecord = null)
public FontInfo GetFontInfo(uint index)
{
var font = GetFont(index);
var infoFileBased = GetFontInfo(font);

var familyName = infoFileBased["family_name"];
var weight = int.Parse(infoFileBased["weight"]);

if (!infoFileBased.TryGetValue("family_name_loc", out var familyNameLoc))
{
familyNameLoc = familyName;
}

var infoAssLike = new FontInfo()
{
FamilyName = familyName,
FamilyNameChs = familyNameLoc,
Bold = false,
Italic = false,
Weight = weight,
MaybeHasTrueBoldOrItalic = false,
FileName = FontFile,
Index = index,
MaxpNumGlyphs = font.GetMaxpNumGlyphs(),
};

if (infoFileBased["subfamily_name"].Contains("Bold"))
{
// 600 DB maybe regular+bold
if (weight == 700 || weight == 600)
{
// maybe only sign style (such as morisawa), normal is DB/B/ED, hanyi use 75J/F/W/S
// UD Digi Kyokasho N-B maybe correct regular+bold
string[] boldIndicators = [" B", " DB", " EB", "75W", "75S", "75J", "75F"];
// but some morisawa fonts is weird, such as A-OTF Jun Pro 501, will exclude all
string[] excludedPrefixes = ["A-OTF", "A P-OTF", "G-OTF"];

if (!(boldIndicators.Any(familyName.EndsWith) || excludedPrefixes.Any(familyName.StartsWith)))
{
infoAssLike.Bold = true;
trueRecord?.Add(familyName);
}
}
}

if (infoFileBased["subfamily_name"].Contains("Italic"))
{
infoAssLike.Italic = true;
trueRecord?.Add(familyName);
}

return infoAssLike;
}
public FontInfo GetFontInfo(uint index) => GetFontInfo(index, null);

public static Dictionary<string, string> GetFontInfo(OTFont font)
{
var nameTable = (Table_name)font.GetTable("name")!;
//var fullName = nameTable.GetString
var os2Table = (Table_OS2)font.GetTable("OS/2")!;
var fsSel = os2Table.fsSelection;

var ids = new Dictionary<string, GetStringParams>
{
{ "postscript_name", new GetStringParams { EncID = 0xffff, LangID = (ushort)LanguageIDWindows.en_US, NameID = (ushort)NameID.postScriptName } },
{ "full_name", new GetStringParams { EncID = 0xffff, LangID = (ushort)LanguageIDWindows.en_US, NameID = (ushort)NameID.fullName } },
//{ "full_name", new GetStringParams { EncID = 0xffff, LangID = (ushort)LanguageIDWindows.en_US, NameID = (ushort)NameID.fullName } },
{ "family_name", new GetStringParams { EncID = 0xffff, LangID = (ushort)LanguageIDWindows.en_US, NameID = (ushort)NameID.familyName } },
{ "family_name_loc", new GetStringParams { EncID = 0xffff, LangID = (ushort)LanguageIDWindows.zh_Hans_CN, NameID = (ushort)NameID.familyName } },
{ "subfamily_name", new GetStringParams { EncID = 0xffff, LangID = (ushort)LanguageIDWindows.en_US, NameID = (ushort)NameID.subfamilyName } },
//{ "subfamily_name", new GetStringParams { EncID = 0xffff, LangID = (ushort)LanguageIDWindows.en_US, NameID = (ushort)NameID.subfamilyName } },
};

var result = GetBuffers(nameTable, ids);

var stringDict = new Dictionary<string, string>();
var nameDict = new Dictionary<string, string>();
foreach (var kv in result)
{
if (kv.Value.buf != null)
{
var s = DecodeString(kv.Value.curPlatID, kv.Value.curEncID, kv.Value.curLangID, kv.Value.buf);
stringDict.Add(kv.Key, s!);
nameDict.Add(kv.Key, s!);
}
}

if (stringDict.Count > 0)
var familyName = nameDict["family_name"];
if (!nameDict.TryGetValue("family_name_loc", out var familyNameLoc)){ familyNameLoc = familyName; }

return new FontInfo()
{
var os2Table = (Table_OS2)font.GetTable("OS/2")!;
stringDict.Add("weight", os2Table.usWeightClass.ToString());
}

return stringDict;
FamilyName = familyName,
FamilyNameChs = familyNameLoc,
//Regular = ((fsSel & 0b_0100_0000) >> 6) == 1, // bit 6
Bold = ((fsSel & 0b_0010_0000) >> 5) == 1, // bit 5
Italic = (fsSel & 0b_1) == 1, // bit 0
Weight = os2Table.usWeightClass,
//MaybeHasTrueBoldOrItalic = false,
FileName = FontFile,
Index = index,
MaxpNumGlyphs = font.GetMaxpNumGlyphs(),
};
}



private struct GetStringParams
{
//public ushort PlatID;
Expand Down
74 changes: 17 additions & 57 deletions AssFontSubset.Core/src/SubsetByPyFT.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,6 @@ await Task.Run(() =>
});
}

static void GetFontInfo(string fontFile)
{
var fp = new FontParse(fontFile);
if (!fp.Open()) { throw new FileNotFoundException(); };

var fontInfos = new Dictionary<string, string>[fp.GetNumFonts()];
for (uint i = 0; i < fontInfos.Length; i++)
{
fontInfos[i] = FontParse.GetFontInfo(fp.GetFont(i)!);
}
}

List<FontInfo> GetFontInfoFromFiles(string dir)
{
string[] supportFonts = [".ttf", ".otf", ".ttc", "otc"];
Expand All @@ -79,45 +67,13 @@ List<FontInfo> GetFontInfoFromFiles(string dir)
if (!fp.Open()) { throw new FormatException(); };
for (uint i = 0; i < fp.GetNumFonts(); i++)
{
fontInfos.Add(fp.GetFontInfo(i, HasTrueBoldOrItalicRecord));
fontInfos.Add(fp.GetFontInfo(i));
}
}
}
_stopwatch.Stop();
var pass1 = _stopwatch.ElapsedMilliseconds;
_logger?.ZLogDebug($"初次扫描和解析完成,用时 {pass1} ms");
//_stopwatch.Reset();
_logger?.ZLogDebug($"开始分析记录可能有多种变体的 fontfamily");
_stopwatch.Restart();
for (var i = 0; i < fontInfos.Count; i++)
{
var info = fontInfos[i];
if (!info.Bold && !info.Italic)
{
if (HasTrueBoldOrItalicRecord.Contains(info.FamilyName))
{
info.MaybeHasTrueBoldOrItalic = true;
fontInfos[i] = info;
_logger?.ZLogDebug($"{info.FileName} 中的 {info.FamilyName} 检测到其他变体");
}
else
{
string[] prefix = ["Arial", "Avenir Next", "Microsoft YaHei", "Source Han", "Noto", "Yu Gothic"];
if ((info.Weight == 500 && info.FamilyName.StartsWith("Avenir Next"))
|| (info.Weight == 400 && (prefix.Any(info.FamilyName.StartsWith) || (info.FamilyName.StartsWith("FZ") && info.FamilyName.EndsWith("JF")) || (info.MaxpNumGlyphs < 6000 && (info.FamilyName == info.FamilyNameChs)))))
{
info.MaybeHasTrueBoldOrItalic = true;
fontInfos[i] = info;
_logger?.ZLogDebug($"{info.FileName} 中的 {info.FamilyName} 未在现有字体中检测到其他变体");
}
}
}
}
_stopwatch.Stop();
_logger?.ZLogDebug($"变体分析完成,用时 {_stopwatch.ElapsedMilliseconds} ms");
_logger?.ZLogInformation($"字体文件扫描完成,用时 {pass1 + _stopwatch.ElapsedMilliseconds} ms");
_logger?.ZLogDebug($"字体文件扫描完成,用时 {_stopwatch.ElapsedMilliseconds} ms");
_stopwatch.Reset();

return fontInfos;
}

Expand Down Expand Up @@ -168,21 +124,25 @@ Dictionary<string, List<SubsetFont>> GetSubsetFonts(List<FontInfo> fontInfos, Di
_logger?.ZLogDebug($"开始对字体文件信息与 ass 定义的字体进行匹配");
fontMap = [];
List<AssFontInfo> matchedAssFontInfos = [];
foreach (FontInfo fontInfo in fontInfos)

var fiGroups = fontInfos.GroupBy(fontInfo => fontInfo.FamilyName);
foreach (var fig in fiGroups)
{
foreach (var assFont in assFonts)
foreach (var afi in assFonts.Keys)
{
if (!matchedAssFontInfos.Contains(assFont.Key) && AssFont.IsMatch(assFont.Key, fontInfo))
{
if (!fontMap.TryGetValue(fontInfo, out var _))
{
fontMap.Add(fontInfo, []);
}
fontMap[fontInfo].Add(assFont.Key);
if (matchedAssFontInfos.Contains(afi)) { continue; }
var _fontInfo = AssFont.GetMatchedFontInfo(afi, fig, _logger);
if (_fontInfo == null) { continue; }
var fontInfo = (FontInfo) _fontInfo;

matchedAssFontInfos.Add(assFont.Key);
_logger?.ZLogDebug($"{assFont.Key.ToString()} 匹配到了 {fontInfo.FileName} 的索引 {fontInfo.Index}");
if (!fontMap.TryGetValue(fontInfo, out var _))
{
fontMap.Add(fontInfo, []);
}
fontMap[fontInfo].Add(afi);

matchedAssFontInfos.Add(afi);
_logger?.ZLogDebug($"{afi.ToString()} 匹配到了 {fontInfo.FileName} 的索引 {fontInfo.Index}");
}
}
_logger?.ZLogDebug($"匹配完成");
Expand Down
Loading

0 comments on commit 84f13b4

Please sign in to comment.