Skip to content

Commit

Permalink
restricting column break threshold doesn't work quite as well for lis…
Browse files Browse the repository at this point in the history
…t groups, loosen it
  • Loading branch information
iammosespaulr committed Nov 30, 2024
1 parent d9eb4dd commit 9d0f0eb
Showing 1 changed file with 1 addition and 6 deletions.
7 changes: 1 addition & 6 deletions marker/processors/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ class ListProcessor(BaseProcessor):
"""
block_types = (BlockTypes.ListGroup,)
ignored_block_types = (BlockTypes.PageHeader, BlockTypes.PageFooter)
column_gap_ratio = 0.02 # column gaps are atleast 2% of the current column width

def __init__(self, config):
super().__init__(config)
Expand All @@ -29,16 +28,12 @@ def __call__(self, document: Document):
if next_block.ignore_for_output:
continue

column_gap = block.polygon.width * self.column_gap_ratio
column_break, page_break = False, False
next_block_in_first_quadrant = False

if next_block.page_id == block.page_id: # block on the same page
# we check for a column break
column_break = (
next_block.polygon.y_start <= block.polygon.y_end and
next_block.polygon.x_start > (block.polygon.x_end + column_gap)
)
column_break = next_block.polygon.y_start <= block.polygon.y_end
else:
page_break = True
next_page = document.get_page(next_block.page_id)
Expand Down

0 comments on commit 9d0f0eb

Please sign in to comment.