Skip to content

Commit

Permalink
feat(meta): handle comma-separated version
Browse files Browse the repository at this point in the history
  • Loading branch information
s-pace committed Sep 3, 2020
1 parent 342be03 commit d74eac3
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 1 deletion.
7 changes: 6 additions & 1 deletion scraper/src/strategies/default_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,12 @@ def get_records_from_dom(self, current_page_url=None):
record[name] = content

if name == "version":
record[name] = str(content)
version = str(content)
# When version is a comma-separated tokens
if ',' in version:
record[name] = [token.strip() for token in version.split(",")]
else:
record[name] = version

if current_page_url is not None:
# Add variables to the record
Expand Down
54 changes: 54 additions & 0 deletions scraper/src/tests/default_strategy/meta_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,3 +253,57 @@ def test_meta_escaped_string(self):
assert actual[1]['string'] == "ok"
assert actual[2]['string'] == "ok"
assert actual[3]['string'] == "ok"

def test_meta_coma_separated_version(self):
# Given
strategy = get_strategy()
strategy.dom = lxml.html.fromstring("""
<html>
<header>
<meta name="docsearch:version" content="1.2.0,latest">
</header>
<body>
<h1>Foo</h1>
<p>text</p>
<h2>Bar</h2>
<h3>Baz</h3>
</body>
</html>
""")

# When
actual = strategy.get_records_from_dom()

# Then
assert len(actual) == 4
assert actual[0]['version'] == ["1.2.0", "latest"]
assert actual[1]['version'] == ["1.2.0", "latest"]
assert actual[2]['version'] == ["1.2.0", "latest"]
assert actual[3]['version'] == ["1.2.0", "latest"]

def test_meta_coma_separated_whitespace_version(self):
# Given
strategy = get_strategy()
strategy.dom = lxml.html.fromstring("""
<html>
<header>
<meta name="docsearch:version" content=" 1.2.0, latest ">
</header>
<body>
<h1>Foo</h1>
<p>text</p>
<h2>Bar</h2>
<h3>Baz</h3>
</body>
</html>
""")

# When
actual = strategy.get_records_from_dom()

# Then
assert len(actual) == 4
assert actual[0]['version'] == ["1.2.0", "latest"]
assert actual[1]['version'] == ["1.2.0", "latest"]
assert actual[2]['version'] == ["1.2.0", "latest"]
assert actual[3]['version'] == ["1.2.0", "latest"]

0 comments on commit d74eac3

Please sign in to comment.