Skip to content

Commit

Permalink
Merge pull request #115 from meedan/CV2-5247-support-yake-keyword-ext…
Browse files Browse the repository at this point in the history
…raction-for-chinese

CV2-5247-support-yake-keyword-extraction-for-chinese
  • Loading branch information
ahmednasserswe authored Oct 22, 2024
2 parents be8027a + 6bb58c2 commit 5d6d98b
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion lib/model/yake_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def normalize_special_characters(self, text):
return text

def run_chinese_segmentation_with_jieba(self, text):
return " ".join(list(jieba.cut_for_search(text)))
return " ".join(list(jieba.cut(text)))

def run_yake(self, text: str,
language: str,
Expand Down
2 changes: 1 addition & 1 deletion test/lib/model/test_yake_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_normalize_special_characters(self):

def test_run_chinese_segmentation_with_jieba(self):
test_text = '''哈里斯同意与特朗普再进行一次美大选辩论'''
expected = "哈里 里斯 哈里斯 同意 与 特朗普 再 进行 一次 美 大选 辩论"
expected = "哈里斯 同意 与 特朗普 再 进行 一次 美 大选 辩论"
self.assertEqual(self.yake_model.run_chinese_segmentation_with_jieba(test_text), expected)

def test_run_yake_real_with_chinese(self):
Expand Down

0 comments on commit 5d6d98b

Please sign in to comment.