Skip to content

Commit

Permalink
UI fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
eric86y committed Nov 13, 2024
1 parent aa3b147 commit 5a068b8
Show file tree
Hide file tree
Showing 5 changed files with 457 additions and 11 deletions.
442 changes: 442 additions & 0 deletions Demo-OCR.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion MonlamOCR/Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

MODEL_DICT = {
"Woodblock": "BDRC/Woodblock",
"UCHAN": "BDRC/BigUCHAN_v1",
"UCHAN": "BDRC/BigUchan",
"DergeTenjur": "BDRC/DergeTenjur",
"GoogleBooks_C": "BDRC/GoogleBooks_C_v1",
"GoogleBooks_E": "BDRC/GoogleBooks_E_v1",
Expand Down
15 changes: 7 additions & 8 deletions MonlamOCR/Inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, charset: str | List[str]):
self.charset = charset

self.ctc_vocab = self.charset.copy()
#self.ctc_vocab.insert(0, " ")
self.ctc_vocab.insert(0, " ")
self.ctc_decoder = build_ctcdecoder(self.ctc_vocab)

def encode(self, label: str):
Expand All @@ -60,6 +60,11 @@ def decode(self, inputs: List[int]) -> str:
return "".join(self.charset[x-1] for x in inputs)

def ctc_decode(self, logits):
if logits.shape[0] == len(self.ctc_vocab):
logits = np.transpose(
logits, axes=[1, 0]
) # adjust logits to have shape time, vocab

return self.ctc_decoder.decode(logits).replace(" ", "")


Expand Down Expand Up @@ -236,7 +241,7 @@ def __init__(self, ocr_config: OCRConfig) -> None:
self.ocr_session = ort.InferenceSession(
self._onnx_model_file, providers=self._execution_providers
)

print(f"Setting up CTC Decoder: {self._characters}")
self.decoder = CTCDecoder(self._characters)

def _pad_ocr_line(
Expand Down Expand Up @@ -298,12 +303,6 @@ def _predict(self, image_batch: npt.NDArray) -> npt.NDArray:
return logits

def _decode(self, logits: npt.NDArray) -> str:

if logits.shape[0] == len(self._characters):
logits = np.transpose(
logits, axes=[1, 0]
) # adjust logits to have shape time, vocab

text = self.decoder.ctc_decode(logits)

return text
Expand Down
9 changes: 7 additions & 2 deletions MonlamOCR/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,12 @@ def extract_line_images(image: npt.NDArray, line_data: List[npt.NDArray], defaul


def get_charset(charset: str) -> List[str]:
charset = f"ß{charset}"
if isinstance(charset, str):
charset = [x for x in charset]

elif isinstance(charset, List):
charset = charset

return [x for x in charset]


Expand Down Expand Up @@ -1008,7 +1013,7 @@ def read_ocr_model_config(config_file: str):
output_layer,
squeeze_channel_dim,
swap_hw,
characters,
characters
)

return config
Expand Down
File renamed without changes.

0 comments on commit 5a068b8

Please sign in to comment.