diff --git a/tools/schemagen.py b/tools/schemagen.py index b8caebc..4695af7 100644 --- a/tools/schemagen.py +++ b/tools/schemagen.py @@ -129,7 +129,7 @@ def handle_gen_chars(): opencc_for_pinyin = None def word_to_pinyin(word): global opencc_for_pinyin - if args and args.opencc_for_pinyin: + if args and 'opencc_for_pinyin' in args: if not opencc_for_pinyin: opencc_for_pinyin = opencc.OpenCC(args.opencc_for_pinyin) maybe_pinyin = ' '.join(lazy_pinyin(opencc_for_pinyin.convert(word))) @@ -368,6 +368,33 @@ def handle_update_compact_dict(): print(f'{word}\t{newcode}') +def handle_update_char_weight(): + initialize_pinyin_table() + with open(args.rime_dict) as f: + for l in f: + l = l.strip() + m = regex.match(r'^([^\t])\t([a-z][a-z];[a-z][a-z])\t(\d+)(.*)$', l) + if not m: + print(l) + else: + char = m[1] + code = m[2] + weight = int(m[3]) + comment = m[4] + + sp = code.split(';')[0] + wt = pinyin_table.get(char, {}) + for (py, w) in wt.items(): + try: + if to_double_pinyin(py) == sp: + weight = w + break + except: + weight = w + + print(f'{char}\t{code}\t{weight}{comment}') + + ############### ### 程序入口 ### ############### @@ -404,6 +431,9 @@ def handle_update_compact_dict(): update_compact_dict = subparsers.add_parser('update-compact-dict', help='更新 *compact* 詞庫中的輔助碼爲新輔助碼') update_compact_dict.add_argument('--rime-dict', help='輸入rime格式詞庫(無frontmatter)', required=True) +update_char_weight = subparsers.add_parser('update-char-weight', help='更新 chars 詞庫中的詞頻') +update_char_weight.add_argument('--rime-dict', help='輸入rime格式詞庫', required=True) + if __name__ == '__main__': args = parser.parse_args() if args.command == 'gen-chars': @@ -414,3 +444,5 @@ def handle_update_compact_dict(): handle_gen_fixed() elif args.command == 'update-compact-dict': handle_update_compact_dict() + elif args.command == 'update-char-weight': + handle_update_char_weight() diff --git a/tools/zrmify.py b/tools/zrmify.py index 4997e50..0cd3b3f 100644 --- a/tools/zrmify.py +++ b/tools/zrmify.py @@ -63,7 +63,7 @@ def 韻母轉換(pinyin: str) -> str: 'un': 'p', 'ang': 'h', 'eng': 'g', 'ing': 'y', 'ong': 's', 'ia': 'w', 'iao': 'c', 'ian': 'm', 'iang': 'd', 'iong': 's', - 'ua': 'w', 'uo': 'o', 'uai': 'y', 'uan': 'r', 'uang': 'd' + 'ua': 'w', 'uo': 'o', 'uai': 'y', 'uan': 'r', 'van': 'r', 'uang': 'd' } if pinyin in 映射表: return 映射表[pinyin]