diff --git a/README.md b/README.md index f25a661..682a27e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # ECO Parser -[![Build Status](https://travis-ci.org/DemocracyClub/eco-parser.svg?branch=master)](https://travis-ci.org/DemocracyClub/eco-parser) -[![Coverage Status](https://coveralls.io/repos/github/DemocracyClub/eco-parser/badge.svg?branch=master)](https://coveralls.io/github/DemocracyClub/eco-parser?branch=master) +[![Run tests](https://github.com/DemocracyClub/eco-parser/actions/workflows/test.yml/badge.svg)](https://github.com/DemocracyClub/eco-parser/actions/workflows/test.yml)[![Coverage Status](https://coveralls.io/repos/github/DemocracyClub/eco-parser/badge.svg?branch=master)](https://coveralls.io/github/DemocracyClub/eco-parser?branch=master) ![PyPI Version](https://img.shields.io/pypi/v/eco-parser.svg) ![License](https://img.shields.io/pypi/l/eco-parser.svg) ![Python Support](https://img.shields.io/pypi/pyversions/eco-parser.svg) diff --git a/eco_parser/__init__.py b/eco_parser/__init__.py index 6756837..f1265b7 100644 --- a/eco_parser/__init__.py +++ b/eco_parser/__init__.py @@ -5,3 +5,11 @@ TableParser, ) from eco_parser.parser import EcoParser + +__all__ = [ + "ParseError", + "BodyParser", + "ElementParserFactory", + "TableParser", + "EcoParser", +] diff --git a/eco_parser/core.py b/eco_parser/core.py index 54f4516..54ccb5a 100644 --- a/eco_parser/core.py +++ b/eco_parser/core.py @@ -30,5 +30,4 @@ def get_single_element(parent, tag): def get_child_text(parent): text = "".join(parent.itertext()) - text = re.sub("\s+", " ", text).strip() - return text + return re.sub("\s+", " ", text).strip() diff --git a/eco_parser/element_parsers.py b/eco_parser/element_parsers.py index da4d817..86937d7 100644 --- a/eco_parser/element_parsers.py +++ b/eco_parser/element_parsers.py @@ -78,10 +78,11 @@ def parse_body(self): table_format = self.get_table_format(tbody) if table_format == self.FORMAT_ONE_ROW_PARA: return self.parse_one_row_table(tbody) - elif table_format == self.FORMAT_STANDARD_TABLE: + if table_format == self.FORMAT_STANDARD_TABLE: return self.parse_standard_table(tbody) - elif table_format == self.FORMAT_UNKNOWN: + if table_format == self.FORMAT_UNKNOWN: raise ParseError("Could not detect table format", 0) + return None def parse(self): try: @@ -107,5 +108,4 @@ def create(element): except ParseError as e: if e.matches == 0: return BodyParser(element) - else: - raise + raise e diff --git a/eco_parser/parser.py b/eco_parser/parser.py index e7b4111..20d9cd5 100644 --- a/eco_parser/parser.py +++ b/eco_parser/parser.py @@ -37,9 +37,9 @@ def parse(self): article_pattern = r"http[s]?\:\/\/(www\.)?legislation\.gov\.uk\/(.)+\/article\/(.)+\/data\.xml" if re.match(schedule_pattern, self.url): return self.parse_schedule() - elif re.match(article_pattern, self.url): + if re.match(article_pattern, self.url): return self.parse_article() - else: - raise ParseError( - "Could not find a suitable parser for %s" % (self.url), 0 - ) + + raise ParseError( + "Could not find a suitable parser for %s" % (self.url), 0 + ) diff --git a/tests/tests.py b/tests/tests.py index 5d79b80..52b27c7 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -39,9 +39,10 @@ def get_data(self): dirname = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.abspath(os.path.join(dirname, fixtures[self.url])) if self.url in fixtures: - return bytes(open(file_path, "r").read(), "utf-8") - else: - raise Exception("no test fixture defined for url '%s'" % self.url) + with open(file_path, "r", encoding="utf-8") as file: + return bytes(file.read(), "utf-8") + + raise Exception("no test fixture defined for url '%s'" % self.url) class ParserTest(unittest.TestCase):