From 975bb4f6d7570d5b23e801f1662cbb0b1c0bda25 Mon Sep 17 00:00:00 2001 From: Sveinbjorn Thordarson Date: Fri, 23 Aug 2024 18:46:53 +0000 Subject: [PATCH] GreynirPackage -> GreynirEngine --- README.rst | 5 ++--- src/tokenizer/Abbrev.conf | 6 +++--- src/tokenizer/abbrev.py | 2 +- src/tokenizer/tokenizer.py | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 50bb5ff..634578f 100644 --- a/README.rst +++ b/README.rst @@ -27,8 +27,7 @@ Tokenizer is an independent spinoff from the `Greynir project `_ uses Tokenizer on its input. -Note that Tokenizer is licensed under the *MIT* license -while GreynirEngine is licensed under *GPLv3*. +Tokenizer is licensed under the *MIT* license. Deep vs. shallow tokenization @@ -613,7 +612,7 @@ defined within the ``TOK`` class: | S_END | 11002 | End of sentence | | +---------------+---------+---------------------+---------------------------+ -(*) The token types marked with an asterisk are reserved for the Greynir package +(*) The token types marked with an asterisk are reserved for the GreynirEngine package and not currently returned by the tokenizer. To obtain a descriptive text for a token kind, use diff --git a/src/tokenizer/Abbrev.conf b/src/tokenizer/Abbrev.conf index ea33485..159e802 100644 --- a/src/tokenizer/Abbrev.conf +++ b/src/tokenizer/Abbrev.conf @@ -621,7 +621,7 @@ m.a.o. = "meðal annarra orða" ao frasi m.a.s. = "meira að segja" ao frasi m.fl.* = "með fleiru" ao frasi m.m.* = "með meiru" ao frasi -msk.* = "matskeið" kvk # Er í GreynirPackage/config/Phrases.conf sem msk +msk.* = "matskeið" kvk # Er í GreynirEngine/config/Phrases.conf sem msk matsk.* = "matskeið" kvk mtt. = "með tilliti til" fs # Leiðrétt í GreynirCorrect m.t.t. = "með tilliti til" fs @@ -686,7 +686,7 @@ pk.* = "pakki" kk portúg.* = "portúgalska" kvk portúg.* = "portúgalskur" lo pr. = "per" hk -próf.^ = "prófessor" kk # Þessi merking er notuð í bintokenizer.py í GreynirPackage +próf.^ = "prófessor" kk # Þessi merking er notuð í bintokenizer.py í GreynirEngine próf.^ = "prófastur" kk # en hefur ekki þýðingu í Tokenizer pt.* = "punktastærð" kvk pt.* = "punktar" kk @@ -797,7 +797,7 @@ tilv.* = "tilvitnun" kvk tilv.* = "tilvísun" kvk to.* = "töluorð" hk transl.* = "þýddur" lo erl -tsk.* = "teskeið" kvk # Er í GreynirPackage/config/Phrases.conf sem tsk +tsk.* = "teskeið" kvk # Er í GreynirEngine/config/Phrases.conf sem tsk tyrkn.* = "tyrkneska" kvk tyrkn.* = "tyrknesur" lo tékkn.* = "tékkneska" kvk diff --git a/src/tokenizer/abbrev.py b/src/tokenizer/abbrev.py index 606f10c..cd7e80b 100644 --- a/src/tokenizer/abbrev.py +++ b/src/tokenizer/abbrev.py @@ -134,7 +134,7 @@ def add(abbrev: str, meaning: str, gender: str, fl: Optional[str] = None) -> Non # This logic is not fully present in Tokenizer as information # about person names is needed to make it work. The full implementation, # using the NAME_FINISHERS set, is found in bintokenizer.py in - # GreynirPackage. + # GreynirEngine. name_finisher = True abbrev = abbrev[0:-1] if not abbrev.endswith("."): diff --git a/src/tokenizer/tokenizer.py b/src/tokenizer/tokenizer.py index 2e7be72..6b2f2d0 100644 --- a/src/tokenizer/tokenizer.py +++ b/src/tokenizer/tokenizer.py @@ -3054,7 +3054,7 @@ def tokenize_without_annotation( text_or_gen: Union[str, Iterable[str]], **options: Any ) -> Iterator[Tok]: """Tokenize without the last pass which can be done more thoroughly if BÍN - annotation is available, for instance in GreynirPackage.""" + annotation is available, for instance in GreynirEngine.""" return tokenize(text_or_gen, with_annotation=False, **options)