diff --git a/CHANGELOG.md b/CHANGELOG.md index cab1cbc..9179b65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Fixed + +- Ignore null characters in the input CSV file + https://github.com/OpenDataServices/flatten-tool/pull/435 + ## [0.24.0] - 2023-11-15 ### Changed diff --git a/flattentool/input.py b/flattentool/input.py index cc9137a..80d007d 100644 --- a/flattentool/input.py +++ b/flattentool/input.py @@ -44,6 +44,21 @@ def __init__(self, cell_value, cell_location): self.sub_cells = [] +# Avoid _csv.Error "line contains NUL" in Python < 3.11. +class NullCharacterFilter: + def __init__(self, file): + self.file = file + + def __iter__(self): + return self + + def __next__(self): + """ + Remove null characters read from the file. + """ + return next(self.file).replace("\0", "") + + def convert_type(type_string, value, timezone=pytz.timezone("UTC"), convert_flags={}): if value == "" or value is None: return None @@ -615,7 +630,7 @@ def get_sheet_headings(self, sheet_name): with open( os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding ) as main_sheet_file: - r = csvreader(main_sheet_file) + r = csvreader(NullCharacterFilter(main_sheet_file)) for num, row in enumerate(r): if num == (skip_rows + configuration_line): return row diff --git a/flattentool/tests/test_input.py b/flattentool/tests/test_input.py index ce8a05e..5ea04b6 100644 --- a/flattentool/tests/test_input.py +++ b/flattentool/tests/test_input.py @@ -5,7 +5,13 @@ """ from __future__ import unicode_literals -from flattentool.input import path_search +import csv +import io +import sys + +import pytest + +from flattentool.input import NullCharacterFilter, path_search def test_path_search(): @@ -42,3 +48,15 @@ def test_path_search(): ) is goal_dict ) + + +def test_null_character_filter(): + # https://bugs.python.org/issue27580 + if sys.version_info < (3, 11): + with pytest.raises(Exception): + next(csv.reader(io.StringIO("\0"))) + + try: + next(csv.reader(NullCharacterFilter(io.StringIO("\0")))) + except Exception as e: + pytest.fail(str(e))