From 6f78611ec0163caebd16c8852fc280ab9a6726cb Mon Sep 17 00:00:00 2001 From: ksqsf Date: Thu, 30 Nov 2023 21:54:25 +0800 Subject: [PATCH] ci: build simplified moran --- .github/workflows/main.yml | 13 +++++++ make_simp_dist.sh | 78 ++++++++++++++++++++++++++++++++++++++ moran_sentence.schema.yaml | 3 -- tools/schemagen.py | 1 - 4 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/main.yml create mode 100755 make_simp_dist.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..16e85bf --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,13 @@ +on: [push] +jobs: + simplified-moran: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: pip3 install opencc pypinyin regex + - run: sudo apt install opencc p7zip-full + - run: ./make_simp_dist.sh + - uses: actions/upload-artifact@v3 + with: + name: "MoranSimplified" + path: MoranSimplified*.7z diff --git a/make_simp_dist.sh b/make_simp_dist.sh new file mode 100755 index 0000000..bc2e83e --- /dev/null +++ b/make_simp_dist.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +git archive HEAD -o archive.tar +mkdir -p dist +tar xf archive.tar -C dist +rm archive.tar +cd dist + +# 更新单字字频 +echo 更新单字字频... +cd tools +python3 schemagen.py --pinyin-table=./data/pinyin_simp.txt update-char-weight --rime-dict=../moran.chars.dict.yaml > ../moran.chars.dict.yaml.bak +mv ../moran.chars.dict.yaml{.bak,} +cd .. + + +# 替换辅助码 +echo 替换辅助码... +compact_dicts=( + "moran.essay.dict.yaml" + "moran.tencent.dict.yaml" + "moran.moe.dict.yaml" + "moran.thuocl.dict.yaml" + "moran.computer.dict.yaml" + "moran.hanyu.dict.yaml" + "moran.words.dict.yaml" +) + +simplifyDict() { + cp $1 $1.bak + opencc -c opencc/moran_t2s.json -i $1.bak -o $1 + rm $1.bak +} + +for dict in "${compact_dicts[@]}"; do + simplifyDict $dict +done + +(cd tools/ && ./update_compact_dicts.sh) + +darwin=false; +case "`uname`" in + Darwin*) darwin=true ;; +esac + +sedi () { + case $(uname -s) in + *[Dd]arwin* | *BSD* ) sed -i '' "$@";; + *) sed -i "$@";; + esac +} + +# 替換碼表 +echo 替換碼表... +sedi 's|\&dict moran_fixed|\&dict moran_fixed_simp|' moran_fixed.defaults.yaml +sedi 's|fixed/dictionary: moran_fixed|fixed/dictionary: moran_fixed_simp|' moran.defaults.yaml + +# 替换简体语法模型 +echo 替换简体语法模型... +wget 'https://github.com/lotem/rime-octagram-data/raw/hans/zh-hans-t-essay-bgc.gram' -O zh-hans-t-essay-bgc.gram +wget 'https://github.com/lotem/rime-octagram-data/raw/hans/zh-hans-t-essay-bgw.gram' -O zh-hans-t-essay-bgw.gram +rm zh-hant-t-essay-bg{c,w}.gram +for f in *.defaults.yaml +do + sedi 's/zh-hant-t-essay-bgw/zh-hans-t-essay-bgw/' $f + sedi 's/zh-hant-t-essay-bgc/zh-hans-t-essay-bgc/' $f +done + +cd .. + +# 打包 +echo 打包... +rm -rf dist/tools +rm -rf dist/.git +cp 简体版说明.txt 下载与安装说明.txt 更新纪要.txt dist + +7zz a -tzip -mx=9 -r "MoranSimplified-$(date +%Y%m%d).7z" dist +rm -rf dist diff --git a/moran_sentence.schema.yaml b/moran_sentence.schema.yaml index b43ac7b..221047b 100644 --- a/moran_sentence.schema.yaml +++ b/moran_sentence.schema.yaml @@ -189,6 +189,3 @@ __patch: - moran.defaults.yaml:/patch? - moran_patches.yaml:/fix_opencc_t2s? - moran_sentence.custom.yaml:/patch? - -grammar: - language: zh-hant-t-essay-bgw diff --git a/tools/schemagen.py b/tools/schemagen.py index 4695af7..ff7f6f4 100644 --- a/tools/schemagen.py +++ b/tools/schemagen.py @@ -16,7 +16,6 @@ from itertools import * import zrmify import flypyify -import pandas import math import opencc from pypinyin import lazy_pinyin