Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix strip() issue for choices #78

Merged
merged 2 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions reproschema/redcap2reproschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,10 @@ def process_choices(choices_str, field_name):
choices = []
choices_value_type = []
for ii, choice in enumerate(choices_str.split("|")):
parts = choice.split(", ")
choice = (
choice.strip()
) # Strip leading/trailing whitespace for each choice
parts = [p.strip() for p in choice.split(",")]

# Handle the case where the choice is something like "1,"
if len(parts) == 1:
Expand All @@ -213,14 +216,22 @@ def process_choices(choices_str, field_name):
)
parts = [ii, parts[0]]

# Try to convert the first part to an integer, if it fails, keep it as a string
try:
value = int(parts[0])
# Determine if value should be treated as an integer or string
if parts[0] == "0":
# Special case for "0", treat it as an integer
value = 0
choices_value_type.append("xsd:integer")
except ValueError:
elif parts[0].isdigit() and parts[0][0] == "0":
# If it has leading zeros, treat it as a string
value = parts[0]
choices_value_type.append("xsd:string")

else:
try:
value = int(parts[0])
choices_value_type.append("xsd:integer")
except ValueError:
value = parts[0]
choices_value_type.append("xsd:string")
choice_obj = {
"name": {"en": " ".join(parts[1:]).strip()},
"value": value,
Expand Down
121 changes: 121 additions & 0 deletions reproschema/tests/test_process_choices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import os
import shutil

import pytest
import yaml
from click.testing import CliRunner

from ..cli import main
from ..redcap2reproschema import process_choices


def test_process_choices_numeric_codes():
# Test standard numeric codes with descriptions
choices_str = "1, Male | 2, Female | 3, Other"
choices, value_types = process_choices(choices_str, "gender")
assert choices == [
{"name": {"en": "Male"}, "value": 1},
{"name": {"en": "Female"}, "value": 2},
{"name": {"en": "Other"}, "value": 3},
]
assert value_types == ["xsd:integer"]


def test_process_choices_boolean():
# Test boolean choices (Yes/No)
choices_str = "1, Yes | 0, No"
choices, value_types = process_choices(choices_str, "boolean_field")
assert choices == [
{"name": {"en": "Yes"}, "value": 1},
{"name": {"en": "No"}, "value": 0},
]
assert value_types == ["xsd:integer"]


def test_process_choices_special_characters():
# Test choices with special characters
choices_str = "1, Option A | 2, \"Option B\" | 3, Option C with 'quotes'"
choices, value_types = process_choices(choices_str, "special_chars")
assert choices == [
{"name": {"en": "Option A"}, "value": 1},
{"name": {"en": '"Option B"'}, "value": 2},
{"name": {"en": "Option C with 'quotes'"}, "value": 3},
]
assert value_types == ["xsd:integer"]


def test_process_choices_with_missing_values():
# Test choices with a missing value (commonly used for "Not applicable" or "Prefer not to say")
choices_str = "1, Yes | 2, No | 99, Not applicable"
choices, value_types = process_choices(choices_str, "missing_values")
assert choices == [
{"name": {"en": "Yes"}, "value": 1},
{"name": {"en": "No"}, "value": 2},
{"name": {"en": "Not applicable"}, "value": 99},
]
assert value_types == ["xsd:integer"]


def test_process_choices_with_unicode():
# Test choices with Unicode characters (e.g., accents, symbols)
choices_str = "1, Café | 2, Niño | 3, Résumé | 4, ☺"
choices, value_types = process_choices(choices_str, "unicode_field")
assert choices == [
{"name": {"en": "Café"}, "value": 1},
{"name": {"en": "Niño"}, "value": 2},
{"name": {"en": "Résumé"}, "value": 3},
{"name": {"en": "☺"}, "value": 4},
]
assert value_types == ["xsd:integer"]


def test_process_choices_alpha_codes():
# Test alpha codes (e.g., categorical text codes)
choices_str = "A, Apple | B, Banana | C, Cherry"
choices, value_types = process_choices(choices_str, "alpha_codes")
assert choices == [
{"name": {"en": "Apple"}, "value": "A"},
{"name": {"en": "Banana"}, "value": "B"},
{"name": {"en": "Cherry"}, "value": "C"},
]
assert sorted(value_types) == ["xsd:string"]


def test_process_choices_incomplete_values():
# Test choices with missing descriptions
choices_str = "1, Yes | 2, | 3, No"
choices, value_types = process_choices(choices_str, "incomplete_values")
assert choices == [
{"name": {"en": "Yes"}, "value": 1},
{"name": {"en": ""}, "value": 2},
{"name": {"en": "No"}, "value": 3},
]
assert value_types == ["xsd:integer"]


def test_process_choices_numeric_strings():
# Test numeric strings as values (e.g., not converted to integers)
choices_str = "001, Option 001 | 002, Option 002 | 003, Option 003"
choices, value_types = process_choices(choices_str, "numeric_strings")
assert choices == [
{"name": {"en": "Option 001"}, "value": "001"},
{"name": {"en": "Option 002"}, "value": "002"},
{"name": {"en": "Option 003"}, "value": "003"},
]
assert sorted(value_types) == ["xsd:string"]


def test_process_choices_spaces_in_values():
# Test choices with spaces in values and names
choices_str = "A B, Choice AB | C D, Choice CD"
choices, value_types = process_choices(choices_str, "spaces_in_values")
assert choices == [
{"name": {"en": "Choice AB"}, "value": "A B"},
{"name": {"en": "Choice CD"}, "value": "C D"},
]
assert sorted(value_types) == ["xsd:string"]


# Run pytest if script is called directly
if __name__ == "__main__":
pytest.main()