Skip to content

Commit

Permalink
Added support for Simplified vs Traditional Chinese
Browse files Browse the repository at this point in the history
  • Loading branch information
mbanon committed Oct 21, 2024
1 parent 705914f commit 3b53567
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions src/fastspell/fastspell.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import argparse
import traceback
import logging
import hanzidentifier

try:
from . import __version__
Expand Down Expand Up @@ -200,6 +201,7 @@ def getlang(self, sent):
if prediction == "he" and self.lang == "iw": #trick for deprecated iw language code for hebrew
prediction = "iw"


# Always detect script if supported (will be printed only if requested)
script = ''
if prediction in self.script_tables:
Expand Down Expand Up @@ -284,6 +286,31 @@ def getlang(self, sent):

# If script detection not requested
# remove it from prediction

#Special case for Simplified vs Traditional Chinese

if refined_prediction == "zh":
if self.lang.lower() in [ "zh-hans", "zh_hans" ]:
self.script = True
if hanzidentifier.is_simplified(sent.strip()):
refined_prediction = "zh_Hans"
elif hanzidentifier.is_traditional(sent.strip()):
refined_prediction = "zh_Hant"
else:
refined_prediction = "zh"

elif self.lang.lower() in [ "zh-hant", "zh_hant" ]:
self.script = True
if hanzidentifier.is_traditional(sent.strip()):
refined_prediction = "zh_Hant"
elif hanzidentifier.is_simplified(sent.strip()):
refined_prediction = "zh_Hans"
else:
refined_prediction = "zh"

else:
refined_prediction = "zh"

if self.script:
return refined_prediction
else:
Expand Down

0 comments on commit 3b53567

Please sign in to comment.