From 6bd6094f3e1bb1cf662cd009ecf1dc53c6b69e46 Mon Sep 17 00:00:00 2001 From: Yibei Chen Date: Wed, 20 Nov 2024 15:21:27 -0500 Subject: [PATCH 1/2] fix strip() issue for choices --- reproschema/redcap2reproschema.py | 21 ++-- reproschema/tests/test_process_choices.py | 111 ++++++++++++++++++++++ 2 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 reproschema/tests/test_process_choices.py diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 174ca72..41be83a 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -201,7 +201,8 @@ def process_choices(choices_str, field_name): choices = [] choices_value_type = [] for ii, choice in enumerate(choices_str.split("|")): - parts = choice.split(", ") + choice = choice.strip() # Strip leading/trailing whitespace for each choice + parts = [p.strip() for p in choice.split(",")] # Handle the case where the choice is something like "1," if len(parts) == 1: @@ -213,14 +214,22 @@ def process_choices(choices_str, field_name): ) parts = [ii, parts[0]] - # Try to convert the first part to an integer, if it fails, keep it as a string - try: - value = int(parts[0]) + # Determine if value should be treated as an integer or string + if parts[0] == '0': + # Special case for "0", treat it as an integer + value = 0 choices_value_type.append("xsd:integer") - except ValueError: + elif parts[0].isdigit() and parts[0][0] == '0': + # If it has leading zeros, treat it as a string value = parts[0] choices_value_type.append("xsd:string") - + else: + try: + value = int(parts[0]) + choices_value_type.append("xsd:integer") + except ValueError: + value = parts[0] + choices_value_type.append("xsd:string") choice_obj = { "name": {"en": " ".join(parts[1:]).strip()}, "value": value, diff --git a/reproschema/tests/test_process_choices.py b/reproschema/tests/test_process_choices.py new file mode 100644 index 0000000..1c542ce --- /dev/null +++ b/reproschema/tests/test_process_choices.py @@ -0,0 +1,111 @@ +import os +import shutil + +import pytest +import yaml +from click.testing import CliRunner + +from ..cli import main +from ..redcap2reproschema import process_choices + +def test_process_choices_numeric_codes(): + # Test standard numeric codes with descriptions + choices_str = "1, Male | 2, Female | 3, Other" + choices, value_types = process_choices(choices_str, "gender") + assert choices == [ + {"name": {"en": "Male"}, "value": 1}, + {"name": {"en": "Female"}, "value": 2}, + {"name": {"en": "Other"}, "value": 3}, + ] + assert value_types == ["xsd:integer"] + +def test_process_choices_boolean(): + # Test boolean choices (Yes/No) + choices_str = "1, Yes | 0, No" + choices, value_types = process_choices(choices_str, "boolean_field") + assert choices == [ + {"name": {"en": "Yes"}, "value": 1}, + {"name": {"en": "No"}, "value": 0}, + ] + assert value_types == ["xsd:integer"] + +def test_process_choices_special_characters(): + # Test choices with special characters + choices_str = "1, Option A | 2, \"Option B\" | 3, Option C with 'quotes'" + choices, value_types = process_choices(choices_str, "special_chars") + assert choices == [ + {"name": {"en": "Option A"}, "value": 1}, + {"name": {"en": "\"Option B\""}, "value": 2}, + {"name": {"en": "Option C with 'quotes'"}, "value": 3}, + ] + assert value_types == ["xsd:integer"] + +def test_process_choices_with_missing_values(): + # Test choices with a missing value (commonly used for "Not applicable" or "Prefer not to say") + choices_str = "1, Yes | 2, No | 99, Not applicable" + choices, value_types = process_choices(choices_str, "missing_values") + assert choices == [ + {"name": {"en": "Yes"}, "value": 1}, + {"name": {"en": "No"}, "value": 2}, + {"name": {"en": "Not applicable"}, "value": 99}, + ] + assert value_types == ["xsd:integer"] + +def test_process_choices_with_unicode(): + # Test choices with Unicode characters (e.g., accents, symbols) + choices_str = "1, Café | 2, Niño | 3, Résumé | 4, ☺" + choices, value_types = process_choices(choices_str, "unicode_field") + assert choices == [ + {"name": {"en": "Café"}, "value": 1}, + {"name": {"en": "Niño"}, "value": 2}, + {"name": {"en": "Résumé"}, "value": 3}, + {"name": {"en": "☺"}, "value": 4}, + ] + assert value_types == ["xsd:integer"] + +def test_process_choices_alpha_codes(): + # Test alpha codes (e.g., categorical text codes) + choices_str = "A, Apple | B, Banana | C, Cherry" + choices, value_types = process_choices(choices_str, "alpha_codes") + assert choices == [ + {"name": {"en": "Apple"}, "value": "A"}, + {"name": {"en": "Banana"}, "value": "B"}, + {"name": {"en": "Cherry"}, "value": "C"}, + ] + assert sorted(value_types) == ["xsd:string"] + +def test_process_choices_incomplete_values(): + # Test choices with missing descriptions + choices_str = "1, Yes | 2, | 3, No" + choices, value_types = process_choices(choices_str, "incomplete_values") + assert choices == [ + {"name": {"en": "Yes"}, "value": 1}, + {"name": {"en": ""}, "value": 2}, + {"name": {"en": "No"}, "value": 3}, + ] + assert value_types == ["xsd:integer"] + +def test_process_choices_numeric_strings(): + # Test numeric strings as values (e.g., not converted to integers) + choices_str = "001, Option 001 | 002, Option 002 | 003, Option 003" + choices, value_types = process_choices(choices_str, "numeric_strings") + assert choices == [ + {"name": {"en": "Option 001"}, "value": "001"}, + {"name": {"en": "Option 002"}, "value": "002"}, + {"name": {"en": "Option 003"}, "value": "003"}, + ] + assert sorted(value_types) == ["xsd:string"] + +def test_process_choices_spaces_in_values(): + # Test choices with spaces in values and names + choices_str = "A B, Choice AB | C D, Choice CD" + choices, value_types = process_choices(choices_str, "spaces_in_values") + assert choices == [ + {"name": {"en": "Choice AB"}, "value": "A B"}, + {"name": {"en": "Choice CD"}, "value": "C D"}, + ] + assert sorted(value_types) == ["xsd:string"] + +# Run pytest if script is called directly +if __name__ == "__main__": + pytest.main() From d10601afd5fa631cb9909ea046c0a97c73cbdbfa Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:26:38 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- reproschema/redcap2reproschema.py | 8 +++++--- reproschema/tests/test_process_choices.py | 12 +++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index 41be83a..2e93a7d 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -201,7 +201,9 @@ def process_choices(choices_str, field_name): choices = [] choices_value_type = [] for ii, choice in enumerate(choices_str.split("|")): - choice = choice.strip() # Strip leading/trailing whitespace for each choice + choice = ( + choice.strip() + ) # Strip leading/trailing whitespace for each choice parts = [p.strip() for p in choice.split(",")] # Handle the case where the choice is something like "1," @@ -215,11 +217,11 @@ def process_choices(choices_str, field_name): parts = [ii, parts[0]] # Determine if value should be treated as an integer or string - if parts[0] == '0': + if parts[0] == "0": # Special case for "0", treat it as an integer value = 0 choices_value_type.append("xsd:integer") - elif parts[0].isdigit() and parts[0][0] == '0': + elif parts[0].isdigit() and parts[0][0] == "0": # If it has leading zeros, treat it as a string value = parts[0] choices_value_type.append("xsd:string") diff --git a/reproschema/tests/test_process_choices.py b/reproschema/tests/test_process_choices.py index 1c542ce..147e822 100644 --- a/reproschema/tests/test_process_choices.py +++ b/reproschema/tests/test_process_choices.py @@ -8,6 +8,7 @@ from ..cli import main from ..redcap2reproschema import process_choices + def test_process_choices_numeric_codes(): # Test standard numeric codes with descriptions choices_str = "1, Male | 2, Female | 3, Other" @@ -19,6 +20,7 @@ def test_process_choices_numeric_codes(): ] assert value_types == ["xsd:integer"] + def test_process_choices_boolean(): # Test boolean choices (Yes/No) choices_str = "1, Yes | 0, No" @@ -29,17 +31,19 @@ def test_process_choices_boolean(): ] assert value_types == ["xsd:integer"] + def test_process_choices_special_characters(): # Test choices with special characters choices_str = "1, Option A | 2, \"Option B\" | 3, Option C with 'quotes'" choices, value_types = process_choices(choices_str, "special_chars") assert choices == [ {"name": {"en": "Option A"}, "value": 1}, - {"name": {"en": "\"Option B\""}, "value": 2}, + {"name": {"en": '"Option B"'}, "value": 2}, {"name": {"en": "Option C with 'quotes'"}, "value": 3}, ] assert value_types == ["xsd:integer"] + def test_process_choices_with_missing_values(): # Test choices with a missing value (commonly used for "Not applicable" or "Prefer not to say") choices_str = "1, Yes | 2, No | 99, Not applicable" @@ -51,6 +55,7 @@ def test_process_choices_with_missing_values(): ] assert value_types == ["xsd:integer"] + def test_process_choices_with_unicode(): # Test choices with Unicode characters (e.g., accents, symbols) choices_str = "1, Café | 2, Niño | 3, Résumé | 4, ☺" @@ -63,6 +68,7 @@ def test_process_choices_with_unicode(): ] assert value_types == ["xsd:integer"] + def test_process_choices_alpha_codes(): # Test alpha codes (e.g., categorical text codes) choices_str = "A, Apple | B, Banana | C, Cherry" @@ -74,6 +80,7 @@ def test_process_choices_alpha_codes(): ] assert sorted(value_types) == ["xsd:string"] + def test_process_choices_incomplete_values(): # Test choices with missing descriptions choices_str = "1, Yes | 2, | 3, No" @@ -85,6 +92,7 @@ def test_process_choices_incomplete_values(): ] assert value_types == ["xsd:integer"] + def test_process_choices_numeric_strings(): # Test numeric strings as values (e.g., not converted to integers) choices_str = "001, Option 001 | 002, Option 002 | 003, Option 003" @@ -96,6 +104,7 @@ def test_process_choices_numeric_strings(): ] assert sorted(value_types) == ["xsd:string"] + def test_process_choices_spaces_in_values(): # Test choices with spaces in values and names choices_str = "A B, Choice AB | C D, Choice CD" @@ -106,6 +115,7 @@ def test_process_choices_spaces_in_values(): ] assert sorted(value_types) == ["xsd:string"] + # Run pytest if script is called directly if __name__ == "__main__": pytest.main()