Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Repair fix coords #43

Merged
merged 12 commits into from
Sep 14, 2020
31 changes: 27 additions & 4 deletions ocrd_segment/repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from scipy.ndimage import filters, morphology
import cv2
import numpy as np
from shapely.geometry import Polygon, LineString
from shapely.geometry import asPolygon, Polygon, LineString

from ocrd import Processor
from ocrd_utils import (
Expand Down Expand Up @@ -240,11 +240,13 @@ def sanitize_page(self, page, page_id):
LOG.warning('Ignoring contour %d too small (%d/%d) in region "%s"',
i, area, total_area, region.id)
continue
# simplify shape:
# simplify shape (until valid):
# can produce invalid (self-intersecting) polygons:
#polygon = cv2.approxPolyDP(contour, 2, False)[:, 0, ::] # already ordered x,y
polygon = contour[:, 0, ::] # already ordered x,y
polygon = Polygon(polygon).simplify(1).exterior.coords
polygon = Polygon(polygon).simplify(1)
polygon = make_valid(polygon)
polygon = polygon.exterior.coords[:-1] # keep open
if len(polygon) < 4:
LOG.warning('Ignoring contour %d less than 4 points in region "%s"',
i, region.id)
Expand Down Expand Up @@ -354,7 +356,13 @@ def _plausibilize_group(regionspolys, rogroup, mark_for_deletion, mark_for_mergi
# and use-cases in the future
superpoly = Polygon(polygon_from_points(superreg.get_Coords().points))
superpoly = superpoly.union(poly)
superreg.get_Coords().points = points_from_polygon(superpoly.exterior.coords)
if superpoly.type == 'MultiPolygon':
superpoly = superpoly.convex_hull
if superpoly.minimum_clearance < 1.0:
superpoly = asPolygon(np.round(superpoly.exterior.coords))
superpoly = make_valid(superpoly)
superpoly = superpoly.exterior.coords[:-1] # keep open
superreg.get_Coords().points = points_from_polygon(superpoly)
# FIXME should we merge/mix attributes and features?
if region.get_orientation() != superreg.get_orientation():
LOG.warning('Merging region "%s" with orientation %f into "%s" with %f',
Expand Down Expand Up @@ -399,3 +407,18 @@ def _plausibilize_group(regionspolys, rogroup, mark_for_deletion, mark_for_mergi
if region.parent_object_:
# remove in-place
region.parent_object_.get_TextRegion().remove(region)

def make_valid(polygon):
kba marked this conversation as resolved.
Show resolved Hide resolved
"""Ensures shapely.geometry.Polygon object is valid by repeated simplification"""
for split in range(1, len(polygon.exterior.coords)-1):
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
break
# simplification may not be possible (at all) due to ordering
# in that case, try another starting point
polygon = Polygon(polygon.exterior.coords[-split:]+polygon.exterior.coords[:-split])
for tolerance in range(1, int(polygon.area)):
if polygon.is_valid:
break
# simplification may require a larger tolerance
polygon = polygon.simplify(tolerance)
return polygon
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ocrd >= 2.13.1
bertsky marked this conversation as resolved.
Show resolved Hide resolved
shapely
shapely >= 1.7.1
scikit-image
numpy