UI fixes

OpenPecha · Nov 13, 2024 · 5a068b8 · 5a068b8
1 parent aa3b147
commit 5a068b8
Show file tree

Hide file tree

Showing 5 changed files with 457 additions and 11 deletions.
diff --git a/Demo-OCR.ipynb b/Demo-OCR.ipynb
diff --git a/MonlamOCR/Config.py b/MonlamOCR/Config.py
@@ -4,7 +4,7 @@
 
 MODEL_DICT = {
     "Woodblock": "BDRC/Woodblock",
-    "UCHAN": "BDRC/BigUCHAN_v1",
+    "UCHAN": "BDRC/BigUchan",
     "DergeTenjur": "BDRC/DergeTenjur",
     "GoogleBooks_C": "BDRC/GoogleBooks_C_v1",
     "GoogleBooks_E": "BDRC/GoogleBooks_E_v1",

diff --git a/MonlamOCR/Inference.py b/MonlamOCR/Inference.py
@@ -50,7 +50,7 @@ def __init__(self, charset: str | List[str]):
             self.charset = charset
 
         self.ctc_vocab = self.charset.copy()
-        #self.ctc_vocab.insert(0, " ")
+        self.ctc_vocab.insert(0, " ")
         self.ctc_decoder = build_ctcdecoder(self.ctc_vocab)
 
     def encode(self, label: str):
@@ -60,6 +60,11 @@ def decode(self, inputs: List[int]) -> str:
         return "".join(self.charset[x-1] for x in inputs)
 
     def ctc_decode(self, logits):
+        if logits.shape[0] == len(self.ctc_vocab):
+            logits = np.transpose(
+                logits, axes=[1, 0]
+            )  # adjust logits to have shape time, vocab
+
         return self.ctc_decoder.decode(logits).replace(" ", "")
 
 
@@ -236,7 +241,7 @@ def __init__(self, ocr_config: OCRConfig) -> None:
         self.ocr_session = ort.InferenceSession(
             self._onnx_model_file, providers=self._execution_providers
         )
-
+        print(f"Setting up CTC Decoder: {self._characters}")
         self.decoder = CTCDecoder(self._characters)
 
     def _pad_ocr_line(
@@ -298,12 +303,6 @@ def _predict(self, image_batch: npt.NDArray) -> npt.NDArray:
         return logits
 
     def _decode(self, logits: npt.NDArray) -> str:
-
-        if logits.shape[0] == len(self._characters):
-            logits = np.transpose(
-                logits, axes=[1, 0]
-            )  # adjust logits to have shape time, vocab
-
         text = self.decoder.ctc_decode(logits)
 
         return text

diff --git a/MonlamOCR/Utils.py b/MonlamOCR/Utils.py
@@ -979,7 +979,12 @@ def extract_line_images(image: npt.NDArray, line_data: List[npt.NDArray], defaul
 
 
 def get_charset(charset: str) -> List[str]:
-    charset = f"ß{charset}"
+    if isinstance(charset, str):
+        charset = [x for x in charset]
+
+    elif isinstance(charset, List):
+        charset = charset
+
     return [x for x in charset]
 
 
@@ -1008,7 +1013,7 @@ def read_ocr_model_config(config_file: str):
         output_layer,
         squeeze_channel_dim,
         swap_hw,
-        characters,
+        characters
     )
 
     return config

diff --git a/MonlamOCR/__init__,py → MonlamOCR/__init__.py b/MonlamOCR/__init__,py → MonlamOCR/__init__.py