Skip to content

Commit

Permalink
Custom fieldname support
Browse files Browse the repository at this point in the history
Add support for validating CSVs that don't include a header row. The
implementation allows a user to define a sequence of field names and
passes these through to the underlying `DictReader` instances. The
`fieldnames` attribute is `None` by default, which retains the existing
behavior of inferring field names from a header row in the source CSV.

Resolves di#88.
  • Loading branch information
jonafato committed Mar 15, 2024
1 parent 48f92e1 commit 1311ab3
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,13 @@ Running Vlads Programatically
List of validators. Optional, defaults to the class variable `validators`
if set, otherwise uses `EmptyValidator` for all fields.

:``fieldnames=None``:
Sequence of field names to be passed through to the underlying
`csv.DictReader` instance. If provided, the reader will use these field
names instead of inferring them from the input CSV's first row. Intended
only for use with CSVs that do not have header rows. Optional, defaults
to `None`.

:``delimiter=','``:
The delimiter used within your csv source. Optional, defaults to `,`.

Expand Down
26 changes: 26 additions & 0 deletions tests/test_vlads.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,32 @@ class TestVlad(Vlad):
assert not TestVlad(source=source).validate()


def test_explicit_fieldnames():
source = String("Dracula,Vampire")

class TestVlad(Vlad):
validators = {
"Name": [UniqueValidator()],
"Status": [SetValidator(["Vampire", "Not A Vampire"])],
}
fieldnames = ["Name", "Status"]

assert TestVlad(source=source).validate()


def test_explicit_fieldnames_conflict_fails():
source = LocalFile("vladiate/examples/vampires.csv")

class TestVlad(Vlad):
validators = {
"Name": [UniqueValidator()],
"Status": [SetValidator(["Vampire", "Not A Vampire"])],
}
fieldnames = ["Name", "Status"]

assert not TestVlad(source=source).validate()


def test_fails_validation():
source = LocalFile("vladiate/examples/vampires.csv")

Expand Down
10 changes: 8 additions & 2 deletions vladiate/vlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(
file_validation_failure_threshold=None,
quiet=False,
row_validators=[],
fieldnames=None,
):
self.logger = logs.logger
self.failures = defaultdict(lambda: defaultdict(list))
Expand All @@ -26,6 +27,7 @@ def __init__(
self.source = source
self.validators = validators or getattr(self, "validators", {})
self.row_validators = row_validators or getattr(self, "row_validators", [])
self.fieldnames = fieldnames or getattr(self, "fieldnames", None)
self.delimiter = delimiter or getattr(self, "delimiter", ",")
self.line_count = 0
self.ignore_missing_validators = ignore_missing_validators
Expand Down Expand Up @@ -124,15 +126,19 @@ def _log_missing(self, missing_items):
)

def _get_total_lines(self):
reader = csv.DictReader(self.source.open(), delimiter=self.delimiter)
reader = csv.DictReader(
self.source.open(), delimiter=self.delimiter, fieldnames=self.fieldnames
)
self.total_lines = sum(1 for _ in reader)
return self.total_lines

def validate(self):
self.logger.info(
"\nValidating {}(source={})".format(self.__class__.__name__, self.source)
)
reader = csv.DictReader(self.source.open(), delimiter=self.delimiter)
reader = csv.DictReader(
self.source.open(), delimiter=self.delimiter, fieldnames=self.fieldnames
)

if not reader.fieldnames:
self.logger.info(
Expand Down

0 comments on commit 1311ab3

Please sign in to comment.