Skip to content

Commit

Permalink
Merge pull request #449 from algolia/fix_dupefilter
Browse files Browse the repository at this point in the history
fix: use the right email to create the fingerprint of a page
  • Loading branch information
Sylvain Pace authored Apr 23, 2019
2 parents 0eb34c0 + a3066da commit 0f58e55
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 2 deletions.
2 changes: 1 addition & 1 deletion scraper/src/custom_dupefilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def custom_request_fingerprint(self, request, include_headers=None,
if remove_scheme:
match_capture_any_scheme = r'(https?)(.*)'
url_for_hash = re.sub(match_capture_any_scheme, r"\2",
url_for_hash)
url_for_finger_print)

if include_headers:
include_headers = tuple(to_bytes(h.lower())
Expand Down
1 change: 0 additions & 1 deletion scraper/src/documentation_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def to_any_scheme(url):
def to_other_scheme(url):
"""Return a list with the translation to this url into each other scheme."""
other_scheme_urls = []
url = url.encode('utf8')
match = DocumentationSpider.match_capture_any_scheme.match(url)
assert match
if not (match and match.group(1) and match.group(2)):
Expand Down

0 comments on commit 0f58e55

Please sign in to comment.