From d74eac3f847a55246ea28aa01278cb1fa6386297 Mon Sep 17 00:00:00 2001 From: s-pace Date: Thu, 3 Sep 2020 10:58:38 +0200 Subject: [PATCH] feat(meta): handle comma-separated version --- scraper/src/strategies/default_strategy.py | 7 ++- .../src/tests/default_strategy/meta_test.py | 54 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/scraper/src/strategies/default_strategy.py b/scraper/src/strategies/default_strategy.py index 83aa2a29..83928ba5 100644 --- a/scraper/src/strategies/default_strategy.py +++ b/scraper/src/strategies/default_strategy.py @@ -177,7 +177,12 @@ def get_records_from_dom(self, current_page_url=None): record[name] = content if name == "version": - record[name] = str(content) + version = str(content) + # When version is a comma-separated tokens + if ',' in version: + record[name] = [token.strip() for token in version.split(",")] + else: + record[name] = version if current_page_url is not None: # Add variables to the record diff --git a/scraper/src/tests/default_strategy/meta_test.py b/scraper/src/tests/default_strategy/meta_test.py index 13dfa608..4e5eeca1 100644 --- a/scraper/src/tests/default_strategy/meta_test.py +++ b/scraper/src/tests/default_strategy/meta_test.py @@ -253,3 +253,57 @@ def test_meta_escaped_string(self): assert actual[1]['string'] == "ok" assert actual[2]['string'] == "ok" assert actual[3]['string'] == "ok" + + def test_meta_coma_separated_version(self): + # Given + strategy = get_strategy() + strategy.dom = lxml.html.fromstring(""" + +
+ +
+ +

Foo

+

text

+

Bar

+

Baz

+ + + """) + + # When + actual = strategy.get_records_from_dom() + + # Then + assert len(actual) == 4 + assert actual[0]['version'] == ["1.2.0", "latest"] + assert actual[1]['version'] == ["1.2.0", "latest"] + assert actual[2]['version'] == ["1.2.0", "latest"] + assert actual[3]['version'] == ["1.2.0", "latest"] + + def test_meta_coma_separated_whitespace_version(self): + # Given + strategy = get_strategy() + strategy.dom = lxml.html.fromstring(""" + +
+ +
+ +

Foo

+

text

+

Bar

+

Baz

+ + + """) + + # When + actual = strategy.get_records_from_dom() + + # Then + assert len(actual) == 4 + assert actual[0]['version'] == ["1.2.0", "latest"] + assert actual[1]['version'] == ["1.2.0", "latest"] + assert actual[2]['version'] == ["1.2.0", "latest"] + assert actual[3]['version'] == ["1.2.0", "latest"]