Skip to content

Commit

Permalink
#14: fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
einspunktnull committed Jun 20, 2023
1 parent 740d6b7 commit 8dcdb76
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
29 changes: 19 additions & 10 deletions src/digital_eval/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
floor
)
from multiprocessing import (
cpu_count, Queue, Lock, Manager, Array
cpu_count
)
from pathlib import (
Path
Expand Down Expand Up @@ -285,6 +285,17 @@ def _map_page2013(elem: ET.Element) -> Tuple[str, int, int, int, int]:
return (NOT_SET, min(_xs), min(_ys), max(_xs), max(_ys))


def _get_line_pieces_from_piece(piece: Piece, lines: List[Piece] = None) -> List[Piece]:
if lines is None:
lines = []
if piece.level == PieceLevel.LINE and piece.transcription:
lines.append(piece)
return lines
for child in piece.pieces:
_get_line_pieces_from_piece(child, lines)
return lines


def calculate_bounding_box(elements: List[ET.Element], map_func) -> Tuple[int, int, int, int]:
"""Review element's points to get points for
minimum (top-left) and maximum (bottom-right)"""
Expand Down Expand Up @@ -346,11 +357,7 @@ def piece_to_text(file_path, frame=None, oneliner=True) -> Tuple[str | List[str]
frame_piece = Piece()
frame_piece.dimensions = frame
filter_word_pieces(frame_piece, top_piece)
the_lines = [l
for r in top_piece.pieces
for l in r.pieces
if l.transcription and l.level == PieceLevel.LINE]
# print('!!!!!!!!!', oneliner, len(top_piece.transcription), frame, )
the_lines = _get_line_pieces_from_piece(top_piece)
if oneliner:
return top_piece.transcription, len(the_lines)
else:
Expand Down Expand Up @@ -583,7 +590,7 @@ def __init__(
self.evaluation_map = {}
self.text_mode = extras == EVAL_EXTRA_IGNORE_GEOMETRY
self.is_sequential = False
self.metrics = []
self.metrics: List = []
self.evaluation_report = {}

def eval_all(self, entries: List[EvalEntry], sequential=False) -> None:
Expand Down Expand Up @@ -636,6 +643,7 @@ def eval_entry(self, entry: EvalEntry) -> EvalEntry:

# evaluate metric copies
_current_metrics = []

for _m in self.metrics:

path_g = entry.path_g
Expand All @@ -661,6 +669,7 @@ def eval_entry(self, entry: EvalEntry) -> EvalEntry:

# read candidate data as text
(txt_c, _) = to_text_func(path_c, coords, oneliner=True)

if not txt_c:
print(f"[WARN ] candidate '{path_c}' contains no text")

Expand Down Expand Up @@ -720,7 +729,7 @@ def eval_map(self):

# if more than one single evaluation item
# calculate additional statistics to reflect
# impact of outlying data sets
# impact of outlying data sets
# take CA and number of GT into account
# also calculate statistics (mean, std)
if len(data_points) > 1:
Expand Down Expand Up @@ -753,13 +762,13 @@ def aggregate(self, by_type=False, by_metrics=[0, 1, 2, 3]):
# aggregate on each directory
for _metrics_index in by_metrics:
for ee in self.evaluation_entries:
# if we do not have all these different metrics set,
# if we do not have all these different metrics set,
# do of course not aggregate by non-existing index!
if _metrics_index >= len(self.evaluation_entries[0].metrics):
continue
path_key = f"{ee.metrics[_metrics_index].label}@{root_base}"
# ATTENZIONE! works only when forehand
# the *real* attribute has been accessed
# the *real* attribute has been accessed
# *at least one time*
# kept this way for testing reasons
metric_value = ee.metrics[_metrics_index].value
Expand Down
1 change: 0 additions & 1 deletion src/digital_eval/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,6 @@ def __init__(self, precision=2, normalization=UC_NORMALIZATION_NFKD, calc_func=a
self._label = 'DictLT'

def _forward(self):
# print('#####', len(self._data_reference), len(self._data_candidate))
text: str = self._data_candidate
text_list: List[str] = self._data_candidate.split()
self._data_reference = text_list
Expand Down

0 comments on commit 8dcdb76

Please sign in to comment.