Skip to content

Commit

Permalink
fix caption merging
Browse files Browse the repository at this point in the history
  • Loading branch information
iammosespaulr committed Nov 25, 2024
1 parent 4ea61c6 commit 26328ff
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions marker/builders/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def group_caption_blocks(self, page: PageGroup):
if block.block_type not in [BlockTypes.Table, BlockTypes.Figure, BlockTypes.Picture]:
continue

if block.block_id in remove_ids:
if block.id in remove_ids:
continue

block_structure = [block_id]
Expand All @@ -52,12 +52,14 @@ def group_caption_blocks(self, page: PageGroup):

if prev_block and \
prev_block.block_type in caption_types and \
prev_block.polygon.minimum_gap(block.polygon) < gap_threshold_px:
prev_block.polygon.minimum_gap(block.polygon) < gap_threshold_px and \
prev_block.id not in remove_ids:
block_structure.insert(0, prev_block.id)
selected_polygons.append(prev_block.polygon)
elif next_block and \

if next_block and \
next_block.block_type in caption_types and \
next_block.polygon.minimum_gap(selected_polygons[-1]) < gap_threshold_px:
next_block.polygon.minimum_gap(block.polygon) < gap_threshold_px:
block_structure.append(next_block.id)
selected_polygons.append(next_block.polygon)

Expand Down

0 comments on commit 26328ff

Please sign in to comment.