Skip to content

Commit

Permalink
Merge pull request #37 from yibeichan/master
Browse files Browse the repository at this point in the history
update conversion tools and their tests
  • Loading branch information
yibeichan authored Apr 29, 2024
2 parents 9d3e929 + ab7c051 commit 105489b
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 81 deletions.
119 changes: 74 additions & 45 deletions reproschema/redcap2reproschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,43 @@


def clean_header(header):
return {k.lstrip("\ufeff"): v for k, v in header.items()}
cleaned_header = {}
for k, v in header.items():
# Strip BOM, whitespace, and enclosing quotation marks if present
cleaned_key = k.lstrip("\ufeff").strip().strip('"')
cleaned_header[cleaned_key] = v
return cleaned_header


def normalize_condition(condition_str):
# Regular expressions for various pattern replacements
re_parentheses = re.compile(r"\(([0-9]*)\)")
re_non_gt_lt_equal = re.compile(r"([^>|<])=")
re_brackets = re.compile(r"\[([^\]]*)\]")
re_extra_spaces = re.compile(r"\s+")
re_double_quotes = re.compile(r'"')
re_or = re.compile(r"\bor\b") # Match 'or' as whole word

# Apply regex replacements
condition_str = re_parentheses.sub(r"___\1", condition_str)
condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str)
condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ")
condition_str = re_brackets.sub(r" \1 ", condition_str)
return condition_str

# Replace 'or' with '||', ensuring not to replace '||'
condition_str = re_or.sub("||", condition_str)

# Replace 'and' with '&&'
condition_str = condition_str.replace(" and ", " && ")

# Trim extra spaces and replace double quotes with single quotes
condition_str = re_extra_spaces.sub(
" ", condition_str
).strip() # Reduce multiple spaces to a single space
condition_str = re_double_quotes.sub(
"'", condition_str
) # Replace double quotes with single quotes

return condition_str.strip()


def process_visibility(data):
Expand All @@ -42,7 +66,11 @@ def process_visibility(data):

def parse_field_type_and_value(field, input_type_map):
field_type = field.get("Field Type", "")
input_type = input_type_map.get(field_type, field_type)
# Check if field_type is 'yesno' and directly assign 'radio' as the input type
if field_type == "yesno":
input_type = "radio" # Directly set to 'radio' for 'yesno' fields
else:
input_type = input_type_map.get(field_type, field_type) # Original logic

# Initialize the default value type as string
value_type = "xsd:string"
Expand All @@ -55,7 +83,8 @@ def parse_field_type_and_value(field, input_type_map):
"time_": "xsd:time",
"email": "xsd:string",
"phone": "xsd:string",
} # todo: input_type="signature"
# No change needed here for 'yesno', as it's handled above
}

# Get the validation type from the field, if available
validation_type = field.get(
Expand Down Expand Up @@ -91,10 +120,11 @@ def process_choices(field_type, choices_str):
except ValueError:
value = parts[0]

choice_obj = {"name": parts[1], "value": value}
if len(parts) == 3:
# Handle image url
choice_obj["schema:image"] = f"{parts[2]}.png"
choice_obj = {"name": " ".join(parts[1:]), "value": value}
# remove image for now
# if len(parts) == 3:
# # Handle image url
# choice_obj["image"] = f"{parts[2]}.png"
choices.append(choice_obj)
return choices

Expand Down Expand Up @@ -156,7 +186,7 @@ def process_row(

rowData = {
"@context": schema_context_url,
"@type": "reproschema:Field",
"@type": "reproschema:Item",
"@id": item_id,
"prefLabel": item_id,
"description": f"{item_id} of {form_name}",
Expand All @@ -179,10 +209,7 @@ def process_row(
}

for key, value in field.items():
if (
schema_map.get(key) in ["question", "schema:description", "preamble"]
and value
):
if schema_map.get(key) in ["question", "description", "preamble"] and value:
rowData.update({schema_map[key]: parse_html(value)})

elif schema_map.get(key) == "allow" and value:
Expand Down Expand Up @@ -214,21 +241,15 @@ def process_row(
}
)

elif schema_map.get(key) == "visibility" and value:
condition = normalize_condition(value)
rowData.setdefault("visibility", []).append(
{"variableName": field["Variable / Field Name"], "isVis": condition}
)

elif key == "Identifier?" and value:
identifier_val = value.lower() == "y"
rowData.update(
{
schema_map[key]: [
{"legalStandard": "unknown", "isIdentifier": identifier_val}
]
}
)
# elif key == "Identifier?" and value:
# identifier_val = value.lower() == "y"
# rowData.update(
# {
# schema_map[key]: [
# {"legalStandard": "unknown", "isIdentifier": identifier_val}
# ]
# }
# )

elif key in additional_notes_list and value:
notes_obj = {"source": "redcap", "column": key, "value": value}
Expand All @@ -240,6 +261,7 @@ def process_row(
def create_form_schema(
abs_folder_path,
schema_context_url,
redcap_version,
form_name,
activity_display_name,
activity_description,
Expand All @@ -259,16 +281,17 @@ def create_form_schema(
"prefLabel": activity_display_name,
"description": activity_description,
"schemaVersion": "1.0.0-rc4",
"version": "0.0.1",
"version": redcap_version,
"ui": {
"order": unique_order,
"addProperties": bl_list,
"shuffle": False,
},
}

if matrix_list:
json_ld["matrixInfo"] = matrix_list
# remove matrixInfo to pass validataion
# if matrix_list:
# json_ld["matrixInfo"] = matrix_list
if scores_list:
json_ld["scoringLogic"] = scores_list

Expand Down Expand Up @@ -296,6 +319,7 @@ def process_activities(activity_name, protocol_visibility_obj, protocol_order):
def create_protocol_schema(
abs_folder_path,
schema_context_url,
redcap_version,
protocol_name,
protocol_display_name,
protocol_description,
Expand All @@ -307,31 +331,33 @@ def create_protocol_schema(
"@context": schema_context_url,
"@type": "reproschema:Protocol",
"@id": f"{protocol_name}_schema",
"skos:prefLabel": protocol_display_name,
"skos:altLabel": f"{protocol_name}_schema",
"schema:description": protocol_description,
"schema:schemaVersion": "1.0.0-rc4",
"schema:version": "0.0.1",
"prefLabel": protocol_display_name,
"altLabel": f"{protocol_name}_schema",
"description": protocol_description,
"schemaVersion": "1.0.0-rc4",
"version": redcap_version,
"ui": {
"addProperties": [],
"order": protocol_order,
"order": [],
"shuffle": False,
},
}

# Populate addProperties list
for activity in protocol_order:
full_path = f"../activities/{activity}/{activity}_schema"
add_property = {
"isAbout": f"../activities/{activity}/{activity}_schema",
"isAbout": full_path,
"variableName": f"{activity}_schema",
# Assuming activity name as prefLabel, update as needed
"prefLabel": activity.replace("_", " ").title(),
"isVis": protocol_visibility_obj.get(
activity, True
), # Default to True if not specified
}
protocol_schema["ui"]["addProperties"].append(add_property)

# Add visibility if needed
if protocol_visibility_obj:
protocol_schema["ui"]["visibility"] = protocol_visibility_obj
# Add the full path to the order list
protocol_schema["ui"]["order"].append(full_path)

protocol_dir = f"{abs_folder_path}/{protocol_name}"
schema_file = f"{protocol_name}_schema"
Expand Down Expand Up @@ -420,6 +446,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
protocol_name = protocol.get("protocol_name")
protocol_display_name = protocol.get("protocol_display_name")
protocol_description = protocol.get("protocol_description")
redcap_version = protocol.get("redcap_version")

if not protocol_name:
raise ValueError("Protocol name not specified in the YAML file.")
Expand All @@ -434,7 +461,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
abs_folder_path = os.path.abspath(protocol_name)

if schema_context_url is None:
schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic"
schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/efb74e155c09e13aa009ea04609ba4f1152fcbc6/contexts/reproschema_new"

# Initialize variables
schema_map = {
Expand All @@ -451,7 +478,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
"Choices, Calculations, OR Slider Labels": "choices", # column F
"Branching Logic (Show field only if...)": "visibility", # column L
"Custom Alignment": "customAlignment", # column N
"Identifier?": "identifiable", # column K
# "Identifier?": "identifiable", # column K
"multipleChoice": "multipleChoice",
"responseType": "@type",
}
Expand Down Expand Up @@ -515,6 +542,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
create_form_schema(
abs_folder_path,
schema_context_url,
redcap_version,
form_name,
activity_display_name,
activity_description,
Expand All @@ -530,6 +558,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None):
create_protocol_schema(
abs_folder_path,
schema_context_url,
redcap_version,
protocol_name,
protocol_display_name,
protocol_description,
Expand Down
11 changes: 9 additions & 2 deletions reproschema/reproschema2redcap.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def get_csv_data(dir_path):
if protocol_dir.is_dir():
# Check for a _schema file in each directory
schema_file = next(protocol_dir.glob("*_schema"), None)
print(f"Found schema file: {schema_file}")
if schema_file:
# Process the found _schema file
parsed_protocol_json = read_json_file(schema_file)
Expand All @@ -152,8 +153,14 @@ def get_csv_data(dir_path):
normalized_relative_path = Path(
relative_activity_path.lstrip("../")
)
activity_path = dir_path / normalized_relative_path
print(f"Processing activity {activity_path}")

activity_path = (
dir_path
/ "activities"
/ normalized_relative_path
/ (normalized_relative_path.name + "_schema")
)

parsed_activity_json = read_json_file(activity_path)

if parsed_activity_json:
Expand Down
39 changes: 28 additions & 11 deletions reproschema/tests/test_redcap2reproschema.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import shutil
import pytest
import yaml
from click.testing import CliRunner
from ..cli import main # Import the Click group
from ..cli import main

# Assuming your test files are located in a 'tests' directory
CSV_FILE_NAME = "redcap_dict.csv"
YAML_FILE_NAME = "redcap2rs.yaml"
CSV_TEST_FILE = os.path.join(
Expand All @@ -15,17 +15,34 @@
)


def test_redcap2reproschema_success():
def test_redcap2reproschema(tmpdir):
runner = CliRunner()

with runner.isolated_filesystem():
# Copy the test files to the isolated filesystem
shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME)
shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME)
temp_csv_file = tmpdir.join(CSV_FILE_NAME)
temp_yaml_file = tmpdir.join(YAML_FILE_NAME)

shutil.copy(CSV_TEST_FILE, str(temp_csv_file)) # Convert to string
shutil.copy(YAML_TEST_FILE, str(temp_yaml_file)) # Convert to string

# Change the current working directory to tmpdir
with tmpdir.as_cwd():
# Read YAML to find the expected output directory name
with open(str(temp_yaml_file), "r") as file: # Convert to string
protocol = yaml.safe_load(file)
protocol_name = protocol.get("protocol_name", "").replace(" ", "_")

# Run the command within the isolated filesystem
result = runner.invoke(
main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME]
main,
[
"redcap2reproschema",
str(temp_csv_file),
str(temp_yaml_file),
], # Convert to string
)
print(result.output)
assert result.exit_code == 0

assert (
result.exit_code == 0
), f"The command failed to execute successfully: {result.output}"
assert os.path.isdir(
protocol_name
), f"Expected output directory '{protocol_name}' does not exist"
9 changes: 2 additions & 7 deletions reproschema/tests/test_redcap2rs_data/redcap2rs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,8 @@ protocol_name: "test_redcap2rs" # Example: "My_Protocol"
# This name will be displayed in the application.
protocol_display_name: "redcap protocols"

# GitHub Repository Information:
# Create a GitHub repository named 'reproschema' to store your reproschema protocols.
# Replace 'your_github_username' with your actual GitHub username.
user_name: "yibeichan"
repo_name: "redcap2reproschema" # Recommended name; can be different if preferred.
repo_url: "https://github.com/{{user_name}}/{{repo_name}}"

# Protocol Description:
# Provide a brief description of your protocol.
protocol_description: "testing" # Example: "This protocol is for ..."

redcap_version: "3.0.0"
Loading

0 comments on commit 105489b

Please sign in to comment.