More type modernization

mideind · Aug 22, 2024 · d8a3351 · d8a3351
1 parent b26b89e
commit d8a3351
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 24 deletions.
diff --git a/src/tokenizer/definitions.py b/src/tokenizer/definitions.py
@@ -30,7 +30,6 @@
 
 from typing import (
     Mapping,
-    Tuple,
     Union,
     Callable,
     Sequence,
@@ -42,15 +41,15 @@
 import re
 
 
-BeginTuple = Tuple[int, Optional[int]]
-PunctuationTuple = Tuple[int, str]
-NumberTuple = Tuple[float, Optional[list[str]], Optional[list[str]]]
-DateTimeTuple = Tuple[int, int, int]
-MeasurementTuple = Tuple[str, float]
-TimeStampTuple = Tuple[int, int, int, int, int, int]
-AmountTuple = Tuple[float, str, Optional[list[str]], Optional[list[str]]]
-TelnoTuple = Tuple[str, str]
-CurrencyTuple = Tuple[str, Optional[list[str]], Optional[list[str]]]
+BeginTuple = tuple[int, Optional[int]]
+PunctuationTuple = tuple[int, str]
+NumberTuple = tuple[float, Optional[list[str]], Optional[list[str]]]
+DateTimeTuple = tuple[int, int, int]
+MeasurementTuple = tuple[str, float]
+TimeStampTuple = tuple[int, int, int, int, int, int]
+AmountTuple = tuple[float, str, Optional[list[str]], Optional[list[str]]]
+TelnoTuple = tuple[str, str]
+CurrencyTuple = tuple[str, Optional[list[str]], Optional[list[str]]]
 
 
 class BIN_Tuple(NamedTuple):
@@ -339,7 +338,7 @@ class PersonNameTuple(NamedTuple):
 # }
 
 # Time of day expressions spelled out
-CLOCK_NUMBERS: Mapping[str, Tuple[int, int, int]] = {
+CLOCK_NUMBERS: Mapping[str, tuple[int, int, int]] = {
     "eitt": (1, 0, 0),
     "tvö": (2, 0, 0),
     "þrjú": (3, 0, 0),
@@ -431,7 +430,7 @@ class PersonNameTuple(NamedTuple):
 SINGLECHAR_FRACTIONS = "↉⅒⅑⅛⅐⅙⅕¼⅓½⅖⅔⅜⅗¾⅘⅝⅚⅞"
 
 # Derived unit : (base SI unit, conversion factor/function)
-SI_UNITS: dict[str, Tuple[str, Union[float, Callable[[float], float]]]] = {
+SI_UNITS: dict[str, tuple[str, Union[float, Callable[[float], float]]]] = {
     # Distance
     "m": ("m", 1.0),
     "mm": ("m", 1.0e-3),

diff --git a/src/tokenizer/tokenizer.py b/src/tokenizer/tokenizer.py
@@ -3104,7 +3104,7 @@ def mark_paragraphs(txt: str) -> str:
     return "[[" + "]][[".join(t for t in txt.split("\n") if t) + "]]"
 
 
-def paragraphs(tokens: Iterable[Tok]) -> Iterator[list[Tuple[int, list[Tok]]]]:
+def paragraphs(tokens: Iterable[Tok]) -> Iterator[list[tuple[int, list[Tok]]]]:
     """Generator yielding paragraphs from token iterable. Each paragraph is a list
     of sentence tuples. Sentence tuples consist of the index of the first token
     of the sentence (the TOK.S_BEGIN token) and a list of the tokens within the
@@ -3121,7 +3121,7 @@ def valid_sent(sent: Optional[list[Tok]]) -> bool:
 
     sent: list[Tok] = []  # Current sentence
     sent_begin = 0
-    current_p: list[Tuple[int, list[Tok]]] = []  # Current paragraph
+    current_p: list[tuple[int, list[Tok]]] = []  # Current paragraph
 
     for ix, t in enumerate(tokens):
         t0 = t[0]
@@ -3271,7 +3271,7 @@ def detokenize(tokens: Iterable[Tok], normalize: bool = False) -> str:
 
 def calculate_indexes(
     tokens: Iterable[Tok], last_is_end: bool = False
-) -> Tuple[list[int], list[int]]:
+) -> tuple[list[int], list[int]]:
     """Calculate character and byte indexes for a token stream.
     The indexes are the start positions of each token in the original
     text that was tokenized.

diff --git a/test/test_tokenizer.py b/test/test_tokenizer.py
@@ -31,18 +31,18 @@
 
 """
 
-from typing import Any, Iterable, Iterator, List, Tuple, Union, cast
+from typing import Any, Iterable, Iterator, Union, cast
 
 import tokenizer as t
 from tokenizer.definitions import BIN_Tuple, ValType
 
 TOK = t.TOK
 Tok = t.Tok
 
-TestCase = Union[Tuple[str, int], Tuple[str, int, ValType], Tuple[str, List[Tok]]]
+TestCase = Union[tuple[str, int], tuple[str, int, ValType], tuple[str, list[Tok]]]
 
 
-def strip_originals(tokens: List[Tok]) -> List[Tok]:
+def strip_originals(tokens: list[Tok]) -> list[Tok]:
     """Remove origin tracking info from a list of tokens.
     This is useful for simplifying tests where we don't care about tracking
     origins.
@@ -57,7 +57,7 @@ def strip_originals(tokens: List[Tok]) -> List[Tok]:
     return tokens
 
 
-def get_text_and_norm(orig: str) -> Tuple[str, str]:
+def get_text_and_norm(orig: str) -> tuple[str, str]:
     toklist = list(t.tokenize(orig))
     return t.text_from_tokens(toklist), t.normalized_text_from_tokens(toklist)
 
@@ -563,12 +563,12 @@ def test_single_tokens() -> None:
     def run_test(test_cases: Iterable[TestCase], **options: Any) -> None:
         for test_case in test_cases:
             if len(test_case) == 3:
-                txt, kind, val = cast(Tuple[str, int, ValType], test_case)
+                txt, kind, val = cast(tuple[str, int, ValType], test_case)
                 c = [Tok(kind, txt, val)]
             elif isinstance(test_case[1], list):
-                txt, c = cast(Tuple[str, List[Tok]], test_case)
+                txt, c = cast(tuple[str, list[Tok]], test_case)
             else:
-                txt, kind = cast(Tuple[str, int], test_case)
+                txt, kind = cast(tuple[str, int], test_case)
                 c = [Tok(kind, txt, None)]
             l = list(t.tokenize(txt, **options))
             assert len(l) == len(c) + 2, repr(l)
@@ -593,8 +593,8 @@ def run_test(test_cases: Iterable[TestCase], **options: Any) -> None:
                     if check.kind == TOK.WORD:
                         # Test set equivalence, since the order of word meanings
                         # is not deterministic
-                        assert set(cast(List[BIN_Tuple], tok.val) or []) == set(
-                            cast(List[BIN_Tuple], check.val) or []
+                        assert set(cast(list[BIN_Tuple], tok.val) or []) == set(
+                            cast(list[BIN_Tuple], check.val) or []
                         ), (repr(tok.val) + " != " + repr(check.val))
                     else:
                         assert tok.val == check.val, (