diff --git a/scraper/src/custom_dupefilter.py b/scraper/src/custom_dupefilter.py index 0b3993e6..5e36cca7 100644 --- a/scraper/src/custom_dupefilter.py +++ b/scraper/src/custom_dupefilter.py @@ -29,7 +29,7 @@ def custom_request_fingerprint(self, request, include_headers=None, if remove_scheme: match_capture_any_scheme = r'(https?)(.*)' url_for_hash = re.sub(match_capture_any_scheme, r"\2", - url_for_hash) + url_for_finger_print) if include_headers: include_headers = tuple(to_bytes(h.lower()) diff --git a/scraper/src/documentation_spider.py b/scraper/src/documentation_spider.py index 18b56412..1bbca092 100644 --- a/scraper/src/documentation_spider.py +++ b/scraper/src/documentation_spider.py @@ -52,7 +52,6 @@ def to_any_scheme(url): def to_other_scheme(url): """Return a list with the translation to this url into each other scheme.""" other_scheme_urls = [] - url = url.encode('utf8') match = DocumentationSpider.match_capture_any_scheme.match(url) assert match if not (match and match.group(1) and match.group(2)):