From 78c1ed5546e1784ece92bc7a1cb4d477da4b48df Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Mon, 30 May 2022 14:01:36 -0500 Subject: [PATCH] Make sure to put the right things in the alphabet of the binary FST. Fixes bug introduced in 6c2486ab55ee3bb2b5ba1595c47681aa1d5e5bfe --- configure.ac | 2 +- lttoolbox/att_compiler.cc | 2 +- tests/data/alphabet.att | 9 +++++++++ tests/lt_print/__init__.py | 14 ++++++++++++++ tests/printtest.py | 3 ++- 5 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/data/alphabet.att diff --git a/configure.ac b/configure.ac index 88a211b5..e024c3f2 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.52) m4_define([PKG_VERSION_MAJOR], [3]) m4_define([PKG_VERSION_MINOR], [6]) -m4_define([PKG_VERSION_PATCH], [5]) +m4_define([PKG_VERSION_PATCH], [6]) AC_INIT([lttoolbox], [PKG_VERSION_MAJOR.PKG_VERSION_MINOR.PKG_VERSION_PATCH], [apertium-stuff@lists.sourceforge.net], [lttoolbox], [https://wiki.apertium.org/wiki/Lttoolbox]) diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc index 98dca788..21ada517 100644 --- a/lttoolbox/att_compiler.cc +++ b/lttoolbox/att_compiler.cc @@ -88,7 +88,7 @@ AttCompiler::is_word_punct(UChar32 symbol) void AttCompiler::update_alphabet(UChar32 c) { - if (is_word_punct(c) || !(u_ispunct(c) && u_isspace(c))) { + if (is_word_punct(c) || !(u_ispunct(c) || u_isspace(c))) { letters.insert(c); if(u_islower(c)) { letters.insert(u_toupper(c)); diff --git a/tests/data/alphabet.att b/tests/data/alphabet.att new file mode 100644 index 00000000..7e28be96 --- /dev/null +++ b/tests/data/alphabet.att @@ -0,0 +1,9 @@ +0 1 a a +1 2 b b +2 3 c c +0 3 . . +4 3 +0 4 c c +4 3 @_SPACE_@ @_SPACE_@ +0 3 ? ? +3 diff --git a/tests/lt_print/__init__.py b/tests/lt_print/__init__.py index a8ea0702..46b3fc5a 100644 --- a/tests/lt_print/__init__.py +++ b/tests/lt_print/__init__.py @@ -40,3 +40,17 @@ class SectionsFst(unittest.TestCase, PrintTest): 1\t2\tε\t\t0.000000\t 2\t0.000000 """ + + +class Alphabet(unittest.TestCase, PrintTest): + printdix = "data/alphabet.att" + printdir = "lr" + printflags = ["-a"] + expectedOutput = """A +B +C +a +b +c + +""" diff --git a/tests/printtest.py b/tests/printtest.py index d3757fe7..e8b63b42 100644 --- a/tests/printtest.py +++ b/tests/printtest.py @@ -15,6 +15,7 @@ class PrintTest(BasicTest): printdir = "lr" expectedOutput = "" expectedRetCodeFail = False + printflags = [] def compileTest(self, tmpd): self.assertEqual(0, call([os.environ['LTTOOLBOX_PATH']+"/lt-comp", @@ -27,7 +28,7 @@ def runTest(self): tmpd = mkdtemp() try: self.compileTest(tmpd) - self.printresult = Popen([os.environ['LTTOOLBOX_PATH']+"/lt-print"] + [tmpd+"/compiled.bin"], + self.printresult = Popen([os.environ['LTTOOLBOX_PATH']+"/lt-print"] + self.printflags + [tmpd+"/compiled.bin"], stdout=PIPE, stderr=PIPE)