Skip to content

Commit

Permalink
OOP
Browse files Browse the repository at this point in the history
  • Loading branch information
SkyEye-FAST committed Sep 8, 2024
1 parent 81c7aad commit 403dba2
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 21 deletions.
9 changes: 5 additions & 4 deletions converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,32 +248,33 @@ class ChineseConverter(BaseConverter):
中文转换器类。
"""

def __init__(self, data: Ldata, rep: Ldata = rep_zh) -> None:
def __init__(self, data: Ldata, rep: Ldata = rep_zh, auto_cut: bool = True) -> None:
"""
初始化转换器。
Args:
data (Ldata): 输入的语言数据
rep (Ldata, optional): 替换的格式内容,默认为rep_zh
auto_cut (bool, optional): 是否使用自动分词,默认为True
"""

super().__init__(data, rep)
self.data = data
self.rep = rep
self.auto_cut = auto_cut

def segment_str(self, text: str, auto_cut: bool = True) -> List[str]:
def segment_str(self, text: str) -> List[str]:
"""
根据设置分词或者直接拆分字符串。
Args:
text (str): 需要分割的字符串
auto_cut (bool, optional): 是否使用自动分词,默认为True
Returns:
List[str]: 分割后的字符串列表
"""

return jieba.lcut(text) if auto_cut else text.split()
return jieba.lcut(text) if self.auto_cut else text.split()

def to_harmonic(self, text: str) -> str:
"""
Expand Down
28 changes: 11 additions & 17 deletions fix_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,57 +4,51 @@
from base import load_json
from converter import (
save_to_json,
convert,
to_pinyin,
to_mps2,
to_tongyong,
to_yale,
to_wadegiles,
to_romatzyh,
to_cyrillic,
to_xiaojing,
ChineseConverter
)

rep = {"!:(": "! :(", ",": ", ", "-!": "!"}

fixed_zh_source = load_json("fixed_zh_source")
conv = ChineseConverter(fixed_zh_source, rep, False)

save_to_json(
convert(fixed_zh_source, to_pinyin, auto_cut=False, rep=rep),
conv.convert(conv.to_pinyin),
"fixed_zh_py",
"data",
)
save_to_json(
convert(fixed_zh_source, to_mps2, auto_cut=False, rep=rep),
conv.convert(conv.to_mps2),
"fixed_zh_mps2",
"data",
)
save_to_json(
convert(fixed_zh_source, to_tongyong, auto_cut=False, rep=rep),
conv.convert(conv.to_tongyong),
"fixed_zh_ty",
"data",
)
save_to_json(
convert(fixed_zh_source, to_yale, auto_cut=False, rep=rep),
conv.convert(conv.to_yale),
"fixed_zh_yale",
"data",
)
save_to_json(
convert(fixed_zh_source, to_wadegiles, auto_cut=False, rep=rep),
conv.convert(conv.to_wadegiles),
"fixed_zh_wg",
"data",
)
save_to_json(
convert(fixed_zh_source, to_romatzyh, auto_cut=False, rep=rep),
conv.convert(conv.to_romatzyh),
"fixed_zh_gr",
"data",
)
save_to_json(
convert(fixed_zh_source, to_cyrillic, auto_cut=False, rep=rep),
conv.convert(conv.to_cyrillic),
"fixed_zh_cy",
"data",
)
save_to_json(
convert(fixed_zh_source, to_xiaojing, auto_cut=False, rep=rep),
conv.convert(conv.to_xiaojing),
"fixed_zh_xj",
"data",
)

0 comments on commit 403dba2

Please sign in to comment.