diff --git a/text/korean.py b/text/korean.py index 4b6c3fb..e964dd7 100644 --- a/text/korean.py +++ b/text/korean.py @@ -199,7 +199,7 @@ def number_to_hangul(text): def korean_to_lazy_ipa(text): text = latin_to_hangul(text) text = number_to_hangul(text) - text=re.sub('[\uac00-\ud7af]+',lambda x:ko_pron.romanise(x.group(0),'ipa'),text).split('] ~ [')[0] + text=re.sub('[\uac00-\ud7af]+',lambda x:ko_pron.romanise(x.group(0),'ipa').split('] ~ [')[0],text) for regex, replacement in _ipa_to_lazy_ipa: text = re.sub(regex, replacement, text) return text diff --git a/text/thai.py b/text/thai.py index c65e5ff..998207c 100644 --- a/text/thai.py +++ b/text/thai.py @@ -4,6 +4,41 @@ num = NumThai() +# List of (Latin alphabet, Thai) pairs: +_latin_to_thai = [(re.compile('%s' % x[0], re.IGNORECASE), x[1]) for x in [ + ('a', 'เอ'), + ('b','บี'), + ('c','ซี'), + ('d','ดี'), + ('e','อี'), + ('f','เอฟ'), + ('g','จี'), + ('h','เอช'), + ('i','ไอ'), + ('j','เจ'), + ('k','เค'), + ('l','แอล'), + ('m','เอ็ม'), + ('n','เอ็น'), + ('o','โอ'), + ('p','พี'), + ('q','คิว'), + ('r','แอร์'), + ('s','เอส'), + ('t','ที'), + ('u','ยู'), + ('v','วี'), + ('w','ดับเบิลยู'), + ('x','เอ็กซ์'), + ('y','วาย'), + ('z','ซี') +]] + def num_to_thai(text): - return re.sub(r'(?:\d+,?\d+)+(?:\.\d+,?\d+)?', lambda x: ''.join(num.NumberToTextThai(float(x.group(0).replace(',', '')))), text) + return re.sub(r'(?:\d+(?:,?\d+)?)+(?:\.\d+(?:,?\d+)?)?', lambda x: ''.join(num.NumberToTextThai(float(x.group(0).replace(',', '')))), text) + +def latin_to_thai(text): + for regex, replacement in _latin_to_thai: + text = re.sub(regex, replacement, text) + return text