Skip to content

Commit

Permalink
Merge branch 'Dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
eric86y committed Aug 8, 2024
2 parents 2b8429a + 4647195 commit c91ca5e
Show file tree
Hide file tree
Showing 17 changed files with 1,423 additions and 197 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ dmypy.json
.pyre/

# Custom
.idea
Data
Output
Models
MonlamOCR/Models

Map_eText.ipynb
197 changes: 197 additions & 0 deletions Demo-LayoutDetection.ipynb

Large diffs are not rendered by default.

295 changes: 173 additions & 122 deletions Demo-OCR.ipynb

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions MonlamOCR/Config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
from huggingface_hub import snapshot_download


MODEL_DICT = {
"Woodblock": "BDRC/Woodblock",
"DergeTenjur": "BDRC/DergeTenjur",
"GoogleBooks_C": "BDRC/GoogleBooks_C_v1",
"GoogleBooks_E": "BDRC/GoogleBooks_E_v1",
}


# download the line model: https://huggingface.co/BDRC/PhotiLines
def init_monlam_line_model() -> str:
model_id = "BDRC/PhotiLines"
model_path = snapshot_download(
repo_id=model_id,
repo_type="model",
local_dir=f"Models/{model_id}",
)
model_config = f"{model_path}/config.json"
assert os.path.isfile(model_config)

return model_config


# download the layout model: https://huggingface.co/BDRC/Photi
def init_monlam_layout_model() -> str:
model_id = "BDRC/Photi"
model_path = snapshot_download(
repo_id=model_id,
repo_type="model",
local_dir=f"Models/{model_id}",
)

model_config = f"{model_path}/config.json"
assert os.path.isfile(model_config)

return model_config


def init_monla_ocr_model(identifier: str) -> str:
available_models = list(MODEL_DICT.keys())

if identifier in available_models:
model_id = MODEL_DICT[identifier]

model_path = snapshot_download(
repo_id=model_id,
repo_type="model",
local_dir=f"Models/{model_id}",
)

model_config = f"{model_path}/config.json"
assert os.path.isfile(model_config)

return model_config
else:
print(f"Error: {identifier} is not available")
return None
29 changes: 29 additions & 0 deletions MonlamOCR/Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,40 @@ class Line:
bbox: BBox
center: tuple[int, int]


@dataclass
class LineData:
image: npt.NDArray
prediction: npt.NDArray
angle: float
lines: list[Line]


@dataclass
class LayoutData:
image: npt.NDArray
rotation: float
images: list[BBox]
text_bboxes: list[BBox]
lines: list[Line]
captions: list[BBox]
margins: list[BBox]
predictions: dict[str, npt.NDArray]


@dataclass
class LineDetectionConfig:
model_file: str
patch_size: int


@dataclass
class LayoutDetectionConfig:
model_file: str
patch_size: int
classes: list[str]


@dataclass
class OCRConfig:
model_file: str
Expand Down
Loading

0 comments on commit c91ca5e

Please sign in to comment.