Skip to content

Commit

Permalink
fix feedback issues
Browse files Browse the repository at this point in the history
  • Loading branch information
peterolson committed Feb 2, 2020
1 parent 8e8355d commit a37c0d1
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 3 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "hanzi-tools",
"version": "1.2.19",
"version": "1.2.20",
"author": "Peter Olson <[email protected]>",
"description": "Converts from Chinese characters to pinyin, between simplified and traditional, and does word segmentation.",
"license": "Unlicense",
Expand Down
10 changes: 10 additions & 0 deletions spec/spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ describe("Pinyinify", () => {
expect("你得在这儿休息。").becomes("nǐ děi zài zhè​r xiū​xi.");
expect("我现在富得能买我想要的任何东西。").becomes("wǒ xiàn​zài fù de néng mǎi wǒ xiǎng​yào de rèn​hé dōng​xi.");
expect("我们就得这么做。").becomes("wǒ​men jiù děi zhè​me zuò.");
expect("你现在得把门打开。正在动手。").becomes("nǐ xiàn​zài děi bǎ​mén dǎ​kāi. zhèng​zài dòng​shǒu.");
// 还
expect("我有钱了就还你。").becomes("wǒ yǒu​qián le jiù huán nǐ.");
expect("我还给你。").becomes("wǒ huán​gěi nǐ.");
Expand All @@ -57,12 +58,20 @@ describe("Pinyinify", () => {
expect("他是一只鸟。").becomes("tā shì yī zhī niǎo.");
// 长
expect("她长着一张圆脸和一双明亮的眼睛。").becomes("tā zhǎng zhe yī zhāng yuán liǎn hé yī shuāng míng​liàng de yǎn​jing.");
expect("不是他干的,警长。").becomes("bù​shì tā gàn de, jǐng zhǎng.");
// 系
expect("这女孩要我给她把衣服从后面系上。").becomes("zhè nǚ​hái yào wǒ gěi tā bǎ yī​fu cóng hòu​miàn jì shàng.");
expect("一个人的后面有一个系着领带的男人走在道路上").becomes("yī gè rén de hòu​miàn yǒu yī gè jì zhe lǐng​dài de nán​rén zǒu zài dào​lù shàng");

// 地
expect("我说过我不会卖那块地的!").becomes("wǒ shuō guò wǒ bù​huì mài nà kuài dì de!");
expect("一定要了解这些需求并明确地定义它们。").becomes("yī​dìng yào liǎo​jiě zhè​xiē xū​qiú bìng míng​què de dìng​yì tā​men.");

// 重
expect("我重入了房间并且去了工作。").becomes("wǒ chóng rù le fáng​jiān bìng​qiě qù le gōng​zuò.");

// 弹
expect("一个双手弹着吉他的男人在舞台上表演").becomes("yī gè shuāng​shǒu tán zhe jí​tā de nán​rén zài wǔ​tái shàng biǎo​yǎn");

expect("行了吗?").becomes("xíng le ma?");
expect("人要是行干一行行一行。").becomes("rén yào​shi xíng gàn yī háng xíng yī háng.");
Expand Down Expand Up @@ -141,6 +150,7 @@ describe("Traditionalize", () => {
expect(traditionalize("为为为是是哪里哪里")).toEqual("為為為是是哪裡哪裡");
expect(traditionalize("又在梦里见到你")).toEqual("又在夢裡見到你");
expect(traditionalize("我只有三只狗。")).toEqual("我只有三隻狗。");
expect(traditionalize("房间里有一个男人在给一个老人理头发")).toEqual("房間裡有一個男人在給一個老人理頭髮");
});
it("chooses right ambiguous character", () => {
expect(traditionalize(`你对那个女的干了什么?`)).toEqual("你對那個女的幹了什麼?");
Expand Down
15 changes: 14 additions & 1 deletion src/pinyinify.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,12 @@ function decideAmbiguousChar(char, cuts, cutIndex) {
return "jué";
case "长":
case "長":
prevTags = tag(previousText.join(""));
nextTags = tag(afterText.join(""));
if (nextTags && nextTags.length && nextTags[0].tag === "uz")
return "zhǎng";
let prevTag = prevTags && prevTags.length && prevTags[prevTags.length - 1].tag;
if (prevTag === "n") return "zhǎng";
// zhǎng has higher frequency due to compond words,
// but cháng is more common as an individual character.
return "cháng";
Expand All @@ -90,7 +93,7 @@ function decideAmbiguousChar(char, cuts, cutIndex) {
return "děi";
}
}
if (afterTag[0] === "t" || afterTag[0] === "v" || afterTag[0] === "p" || afterTag[0] === "l") {
if (afterTag[0] === "t" || afterTag[0] === "v" || afterTag[0] === "p" || afterTag[0] === "l" || afterTag[0] === "n") {
return "děi";
}
}
Expand Down Expand Up @@ -132,12 +135,22 @@ function decideAmbiguousChar(char, cuts, cutIndex) {
if (prevTags.length && prevTags[prevTags.length - 1].tag === "r") {
return "dì";
}
break;
case "弹":
nextTags = tag(afterText.join(""));
if (afterText.includes("吉他")) return "tán";
if (nextTags && nextTags.length) {
let afterTag = nextTags[0].tag;
if (afterTag[0] === "n") return "tán";
}
break;
case "重":
nextTags = tag(afterText.join(""));
if (nextTags && nextTags.length) {
let afterTag = nextTags[0].tag;
if (afterTag[0] === "v") return "chóng";
}
break;
}
}

Expand Down
6 changes: 5 additions & 1 deletion src/simplify.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ function simplify(text) {
}).join("");
}

const specialChars = new Set(["只", "喂", "面"]);
const specialChars = new Set(["只", "喂", "面", "发"]);

function traditionalize(text) {
return segment(text).map((x, i, segments) => {
Expand Down Expand Up @@ -38,6 +38,10 @@ function traditionalizeSpecialChar(char, beforeText, afterText) {
prev = nodejieba.tag(beforeText.join("")).slice(-1)[0];
if (prev && prev.tag === "v") return "麵";
return "面";
case "发":
let last2 = beforeText.join("").slice(-2);
if (last2.includes("理") || last2.includes("头")) return "髮";
return "發";
}
return char in s2tDict ? s2tDict[char] : char;
}
Expand Down

0 comments on commit a37c0d1

Please sign in to comment.