Skip to content

Commit

Permalink
fix missing zero-width space
Browse files Browse the repository at this point in the history
  • Loading branch information
peterolson committed Nov 30, 2019
1 parent f0c9852 commit bbb5557
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 5 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "hanzi-tools",
"version": "1.2.11",
"version": "1.2.12",
"author": "Peter Olson <[email protected]>",
"description": "Converts from Chinese characters to pinyin, between simplified and traditional, and does word segmentation.",
"license": "Unlicense",
Expand Down
2 changes: 1 addition & 1 deletion spec/spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ describe("Pinyinify", () => {
expect("人要是行干一行行一行。").becomes("rén yào​shi xíng gàn yī háng xíng yī háng.");
expect("几行代码?两行代码。行还是不行?行!").becomes("jǐ háng dài​mǎ? liǎng háng dài​mǎ. xíng hái​shi bù​xíng? xíng!");

expect("结果").becomes("jiéguǒ");
expect("结果").becomes("jié​guǒ");
expect("很美的运动。").becomes("hěn měi de yùn​dòng.");
expect("雪地上有好东西。").becomes("xuě​dì​ shàng yǒu hǎo dōng​xi.");
});
Expand Down
14 changes: 11 additions & 3 deletions src/pinyinDict.js
Original file line number Diff line number Diff line change
Expand Up @@ -2284,7 +2284,7 @@ let dict = [
"不明飞行物|||不明飛行物|bù​míng​fēi​xíng​wù",
"不易|||不易|bù​yì",
"不易之论|||不易之論|bù​yì​zhī​lùn",
"不是|||不是|bùshì",
"不是|||不是|bù​shì",
"不是一家人不进一家门|||不是一家人不進一家門|bù​shì​yī​jiā​rén​bù​jìn​yī​jiā​mén",
"不是冤家不聚头|||不是冤家不聚頭|bù​shì​yuān​jiā​bù​jù​tóu",
"不是吃素的|||不是吃素的|bù​shì​chī​sù​de",
Expand Down Expand Up @@ -100335,7 +100335,7 @@ let dict = [
"重音|||重音|zhòng​yīn",
"重音节|||重音節|zhòng​yīn​jié",
"重头戏|||重頭戲|zhòng​tóu​xì",
"重点|||重點|zhòngdiǎn",
"重点|||重點|zhòng​diǎn",
"野人|||野人|yě​rén",
"野兔|||野兔|yě​tù",
"野叟曝言|||野叟曝言|Yě​sǒu​Pù​yán",
Expand Down Expand Up @@ -113177,8 +113177,16 @@ for (let entry of dict) {
let [simp, tail] = entry.split("|||");

let trads = tail.split("||");
let parts = tail.split("|");
let pinyin = parts[parts.length - 1];
for (let pair of trads) {
let [trad, pinyin] = pair.split("|");
let [trad] = pair.split("|");
if (!pinyin) console.log(trad, pinyin);
if (trad.length > 1 && !pinyin.includes("​")) {
if (trad === "PO" || trad === "TA") continue;
if (pinyin.length <= 3) continue;
console.log(trad, pinyin);
}

if (!pinyinDict[simp]) pinyinDict[simp] = pinyin;
if (!pinyinDict[trad]) pinyinDict[trad] = pinyin;
Expand Down

0 comments on commit bbb5557

Please sign in to comment.