Skip to content

Commit

Permalink
ci: build simplified moran
Browse files Browse the repository at this point in the history
  • Loading branch information
ksqsf committed Nov 30, 2023
1 parent 7813cab commit ce1638d
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 4 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
on: [push]
jobs:
simplified-moran:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: pip3 install opencc pypinyin
- run: ./make_simp_dist.sh
- uses: actions/upload-artifact@v3
with:
name: "MoranSimplified"
path: MoranSimplified*.7z
78 changes: 78 additions & 0 deletions make_simp_dist.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

git archive HEAD -o archive.tar
mkdir -p dist
tar xf archive.tar -C dist
rm archive.tar
cd dist

# 更新单字字频
echo 更新单字字频...
cd tools
python3 schemagen.py --pinyin-table=./data/pinyin_simp.txt update-char-weight --rime-dict=../moran.chars.dict.yaml > ../moran.chars.dict.yaml.bak
mv ../moran.chars.dict.yaml{.bak,}
cd ..


# 替换辅助码
echo 替换辅助码...
compact_dicts=(
"moran.essay.dict.yaml"
"moran.tencent.dict.yaml"
"moran.moe.dict.yaml"
"moran.thuocl.dict.yaml"
"moran.computer.dict.yaml"
"moran.hanyu.dict.yaml"
"moran.words.dict.yaml"
)

simplifyDict() {
cp $1 $1.bak
opencc -c opencc/moran_t2s.json -i $1.bak -o $1
rm $1.bak
}

for dict in "${compact_dicts[@]}"; do
simplifyDict $dict
done

(cd tools/ && ./update_compact_dicts.sh)

darwin=false;
case "`uname`" in
Darwin*) darwin=true ;;
esac

sedi () {
case $(uname -s) in
*[Dd]arwin* | *BSD* ) sed -i '' "$@";;
*) sed -i "$@";;
esac
}

# 替換碼表
echo 替換碼表...
sedi 's|\&dict moran_fixed|\&dict moran_fixed_simp|' moran_fixed.defaults.yaml
sedi 's|fixed/dictionary: moran_fixed|fixed/dictionary: moran_fixed_simp|' moran.defaults.yaml

# 替换简体语法模型
echo 替换简体语法模型...
wget 'https://github.com/lotem/rime-octagram-data/raw/hans/zh-hans-t-essay-bgc.gram' -O zh-hans-t-essay-bgc.gram
wget 'https://github.com/lotem/rime-octagram-data/raw/hans/zh-hans-t-essay-bgw.gram' -O zh-hans-t-essay-bgw.gram
rm zh-hant-t-essay-bg{c,w}.gram
for f in *.defaults.yaml
do
sedi 's/zh-hant-t-essay-bgw/zh-hans-t-essay-bgw/' $f
sedi 's/zh-hant-t-essay-bgc/zh-hans-t-essay-bgc/' $f
done

cd ..

# 打包
echo 打包...
rm -rf dist/tools
rm -rf dist/.git
cp 简体版说明.txt 下载与安装说明.txt 更新纪要.txt dist

7zz a -tzip -mx=9 -r "MoranSimplified-$(date +%Y%m%d).7z" dist
rm -rf dist
3 changes: 0 additions & 3 deletions moran_sentence.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,3 @@ __patch:
- moran.defaults.yaml:/patch?
- moran_patches.yaml:/fix_opencc_t2s?
- moran_sentence.custom.yaml:/patch?

grammar:
language: zh-hant-t-essay-bgw
1 change: 0 additions & 1 deletion tools/schemagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from itertools import *
import zrmify
import flypyify
import pandas
import math
import opencc
from pypinyin import lazy_pinyin
Expand Down

0 comments on commit ce1638d

Please sign in to comment.