Skip to content

Commit

Permalink
Handle newlines in the middle of a name
Browse files Browse the repository at this point in the history
  • Loading branch information
VirginiaDooley committed Jun 5, 2023
1 parent bcdb0d0 commit 130924b
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
13 changes: 12 additions & 1 deletion ynr/apps/sopn_parsing/helpers/parse_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,18 @@ def clean_name(name):
- Build a string to represent the other names by looking for all words not in all caps
- Strip whitespace in case last_names is empty and return string titleized
"""
name = name.replace("\n", " ")

if "\n" in name and "-" not in name:
if name.count("\n") == 1 and name.find("\n") != -1:
name = name.replace("\n", " ")
# # if the newline is in the middle of a word, then replace with an empty string
elif name.count("\n") > 1 or name.find("\n") == -1:
name = name.replace("\n", "")
else:
name = name.replace("\n", " ")
elif "\n" in name and "-" in name:
name = name.replace("\n", " ")

name = name.replace("`", "'")
name = name.replace("\u2013", "\u002d")
# remove multiple whitespaces
Expand Down
8 changes: 5 additions & 3 deletions ynr/apps/sopn_parsing/tests/test_parse_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,10 +319,12 @@ def test_clean_name_replaces_backticks(self):
assert "'" in name

def test_clean_name_replaces_newlines(self):
name = parse_tables.clean_name(
"A Very Long Name That Splits \nOver Lines"
)
name = parse_tables.clean_name("D\nICKIE \nPaul")
assert "\n" not in name
assert name == "Paul Dickie"
name = parse_tables.clean_name("M\nARSH-PRITCHARD Thomas")
assert "\n" not in name
assert name == "Thomas Marsh-Pritchard"

def test_clean_name_capitalized_last_and_titalized(self):
name = parse_tables.clean_name("SMITH John")
Expand Down

0 comments on commit 130924b

Please sign in to comment.