diff --git a/reproschema/redcap2reproschema.py b/reproschema/redcap2reproschema.py index f267a2f..e7f67c7 100644 --- a/reproschema/redcap2reproschema.py +++ b/reproschema/redcap2reproschema.py @@ -10,19 +10,43 @@ def clean_header(header): - return {k.lstrip("\ufeff"): v for k, v in header.items()} + cleaned_header = {} + for k, v in header.items(): + # Strip BOM, whitespace, and enclosing quotation marks if present + cleaned_key = k.lstrip("\ufeff").strip().strip('"') + cleaned_header[cleaned_key] = v + return cleaned_header def normalize_condition(condition_str): + # Regular expressions for various pattern replacements re_parentheses = re.compile(r"\(([0-9]*)\)") re_non_gt_lt_equal = re.compile(r"([^>|<])=") re_brackets = re.compile(r"\[([^\]]*)\]") + re_extra_spaces = re.compile(r"\s+") + re_double_quotes = re.compile(r'"') + re_or = re.compile(r"\bor\b") # Match 'or' as whole word + # Apply regex replacements condition_str = re_parentheses.sub(r"___\1", condition_str) condition_str = re_non_gt_lt_equal.sub(r"\1 ==", condition_str) - condition_str = condition_str.replace(" and ", " && ").replace(" or ", " || ") condition_str = re_brackets.sub(r" \1 ", condition_str) - return condition_str + + # Replace 'or' with '||', ensuring not to replace '||' + condition_str = re_or.sub("||", condition_str) + + # Replace 'and' with '&&' + condition_str = condition_str.replace(" and ", " && ") + + # Trim extra spaces and replace double quotes with single quotes + condition_str = re_extra_spaces.sub( + " ", condition_str + ).strip() # Reduce multiple spaces to a single space + condition_str = re_double_quotes.sub( + "'", condition_str + ) # Replace double quotes with single quotes + + return condition_str.strip() def process_visibility(data): @@ -42,7 +66,11 @@ def process_visibility(data): def parse_field_type_and_value(field, input_type_map): field_type = field.get("Field Type", "") - input_type = input_type_map.get(field_type, field_type) + # Check if field_type is 'yesno' and directly assign 'radio' as the input type + if field_type == "yesno": + input_type = "radio" # Directly set to 'radio' for 'yesno' fields + else: + input_type = input_type_map.get(field_type, field_type) # Original logic # Initialize the default value type as string value_type = "xsd:string" @@ -55,7 +83,8 @@ def parse_field_type_and_value(field, input_type_map): "time_": "xsd:time", "email": "xsd:string", "phone": "xsd:string", - } # todo: input_type="signature" + # No change needed here for 'yesno', as it's handled above + } # Get the validation type from the field, if available validation_type = field.get( @@ -91,10 +120,11 @@ def process_choices(field_type, choices_str): except ValueError: value = parts[0] - choice_obj = {"name": parts[1], "value": value} - if len(parts) == 3: - # Handle image url - choice_obj["schema:image"] = f"{parts[2]}.png" + choice_obj = {"name": " ".join(parts[1:]), "value": value} + # remove image for now + # if len(parts) == 3: + # # Handle image url + # choice_obj["image"] = f"{parts[2]}.png" choices.append(choice_obj) return choices @@ -156,7 +186,7 @@ def process_row( rowData = { "@context": schema_context_url, - "@type": "reproschema:Field", + "@type": "reproschema:Item", "@id": item_id, "prefLabel": item_id, "description": f"{item_id} of {form_name}", @@ -179,10 +209,7 @@ def process_row( } for key, value in field.items(): - if ( - schema_map.get(key) in ["question", "schema:description", "preamble"] - and value - ): + if schema_map.get(key) in ["question", "description", "preamble"] and value: rowData.update({schema_map[key]: parse_html(value)}) elif schema_map.get(key) == "allow" and value: @@ -214,21 +241,15 @@ def process_row( } ) - elif schema_map.get(key) == "visibility" and value: - condition = normalize_condition(value) - rowData.setdefault("visibility", []).append( - {"variableName": field["Variable / Field Name"], "isVis": condition} - ) - - elif key == "Identifier?" and value: - identifier_val = value.lower() == "y" - rowData.update( - { - schema_map[key]: [ - {"legalStandard": "unknown", "isIdentifier": identifier_val} - ] - } - ) + # elif key == "Identifier?" and value: + # identifier_val = value.lower() == "y" + # rowData.update( + # { + # schema_map[key]: [ + # {"legalStandard": "unknown", "isIdentifier": identifier_val} + # ] + # } + # ) elif key in additional_notes_list and value: notes_obj = {"source": "redcap", "column": key, "value": value} @@ -240,6 +261,7 @@ def process_row( def create_form_schema( abs_folder_path, schema_context_url, + redcap_version, form_name, activity_display_name, activity_description, @@ -259,7 +281,7 @@ def create_form_schema( "prefLabel": activity_display_name, "description": activity_description, "schemaVersion": "1.0.0-rc4", - "version": "0.0.1", + "version": redcap_version, "ui": { "order": unique_order, "addProperties": bl_list, @@ -267,8 +289,9 @@ def create_form_schema( }, } - if matrix_list: - json_ld["matrixInfo"] = matrix_list + # remove matrixInfo to pass validataion + # if matrix_list: + # json_ld["matrixInfo"] = matrix_list if scores_list: json_ld["scoringLogic"] = scores_list @@ -296,6 +319,7 @@ def process_activities(activity_name, protocol_visibility_obj, protocol_order): def create_protocol_schema( abs_folder_path, schema_context_url, + redcap_version, protocol_name, protocol_display_name, protocol_description, @@ -307,31 +331,33 @@ def create_protocol_schema( "@context": schema_context_url, "@type": "reproschema:Protocol", "@id": f"{protocol_name}_schema", - "skos:prefLabel": protocol_display_name, - "skos:altLabel": f"{protocol_name}_schema", - "schema:description": protocol_description, - "schema:schemaVersion": "1.0.0-rc4", - "schema:version": "0.0.1", + "prefLabel": protocol_display_name, + "altLabel": f"{protocol_name}_schema", + "description": protocol_description, + "schemaVersion": "1.0.0-rc4", + "version": redcap_version, "ui": { "addProperties": [], - "order": protocol_order, + "order": [], "shuffle": False, }, } # Populate addProperties list for activity in protocol_order: + full_path = f"../activities/{activity}/{activity}_schema" add_property = { - "isAbout": f"../activities/{activity}/{activity}_schema", + "isAbout": full_path, "variableName": f"{activity}_schema", # Assuming activity name as prefLabel, update as needed "prefLabel": activity.replace("_", " ").title(), + "isVis": protocol_visibility_obj.get( + activity, True + ), # Default to True if not specified } protocol_schema["ui"]["addProperties"].append(add_property) - - # Add visibility if needed - if protocol_visibility_obj: - protocol_schema["ui"]["visibility"] = protocol_visibility_obj + # Add the full path to the order list + protocol_schema["ui"]["order"].append(full_path) protocol_dir = f"{abs_folder_path}/{protocol_name}" schema_file = f"{protocol_name}_schema" @@ -420,6 +446,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): protocol_name = protocol.get("protocol_name") protocol_display_name = protocol.get("protocol_display_name") protocol_description = protocol.get("protocol_description") + redcap_version = protocol.get("redcap_version") if not protocol_name: raise ValueError("Protocol name not specified in the YAML file.") @@ -434,7 +461,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): abs_folder_path = os.path.abspath(protocol_name) if schema_context_url is None: - schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/1.0.0-rc4/contexts/generic" + schema_context_url = "https://raw.githubusercontent.com/ReproNim/reproschema/efb74e155c09e13aa009ea04609ba4f1152fcbc6/contexts/reproschema_new" # Initialize variables schema_map = { @@ -451,7 +478,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): "Choices, Calculations, OR Slider Labels": "choices", # column F "Branching Logic (Show field only if...)": "visibility", # column L "Custom Alignment": "customAlignment", # column N - "Identifier?": "identifiable", # column K + # "Identifier?": "identifiable", # column K "multipleChoice": "multipleChoice", "responseType": "@type", } @@ -515,6 +542,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): create_form_schema( abs_folder_path, schema_context_url, + redcap_version, form_name, activity_display_name, activity_description, @@ -530,6 +558,7 @@ def redcap2reproschema(csv_file, yaml_file, schema_context_url=None): create_protocol_schema( abs_folder_path, schema_context_url, + redcap_version, protocol_name, protocol_display_name, protocol_description, diff --git a/reproschema/reproschema2redcap.py b/reproschema/reproschema2redcap.py index 3d03cf3..298c56e 100644 --- a/reproschema/reproschema2redcap.py +++ b/reproschema/reproschema2redcap.py @@ -142,6 +142,7 @@ def get_csv_data(dir_path): if protocol_dir.is_dir(): # Check for a _schema file in each directory schema_file = next(protocol_dir.glob("*_schema"), None) + print(f"Found schema file: {schema_file}") if schema_file: # Process the found _schema file parsed_protocol_json = read_json_file(schema_file) @@ -152,8 +153,14 @@ def get_csv_data(dir_path): normalized_relative_path = Path( relative_activity_path.lstrip("../") ) - activity_path = dir_path / normalized_relative_path - print(f"Processing activity {activity_path}") + + activity_path = ( + dir_path + / "activities" + / normalized_relative_path + / (normalized_relative_path.name + "_schema") + ) + parsed_activity_json = read_json_file(activity_path) if parsed_activity_json: diff --git a/reproschema/tests/test_redcap2reproschema.py b/reproschema/tests/test_redcap2reproschema.py index 2386a8c..bbf2df7 100644 --- a/reproschema/tests/test_redcap2reproschema.py +++ b/reproschema/tests/test_redcap2reproschema.py @@ -1,10 +1,10 @@ import os import shutil import pytest +import yaml from click.testing import CliRunner -from ..cli import main # Import the Click group +from ..cli import main -# Assuming your test files are located in a 'tests' directory CSV_FILE_NAME = "redcap_dict.csv" YAML_FILE_NAME = "redcap2rs.yaml" CSV_TEST_FILE = os.path.join( @@ -15,17 +15,34 @@ ) -def test_redcap2reproschema_success(): +def test_redcap2reproschema(tmpdir): runner = CliRunner() - with runner.isolated_filesystem(): - # Copy the test files to the isolated filesystem - shutil.copy(CSV_TEST_FILE, CSV_FILE_NAME) - shutil.copy(YAML_TEST_FILE, YAML_FILE_NAME) + temp_csv_file = tmpdir.join(CSV_FILE_NAME) + temp_yaml_file = tmpdir.join(YAML_FILE_NAME) + + shutil.copy(CSV_TEST_FILE, str(temp_csv_file)) # Convert to string + shutil.copy(YAML_TEST_FILE, str(temp_yaml_file)) # Convert to string + + # Change the current working directory to tmpdir + with tmpdir.as_cwd(): + # Read YAML to find the expected output directory name + with open(str(temp_yaml_file), "r") as file: # Convert to string + protocol = yaml.safe_load(file) + protocol_name = protocol.get("protocol_name", "").replace(" ", "_") - # Run the command within the isolated filesystem result = runner.invoke( - main, ["redcap2reproschema", CSV_FILE_NAME, YAML_FILE_NAME] + main, + [ + "redcap2reproschema", + str(temp_csv_file), + str(temp_yaml_file), + ], # Convert to string ) - print(result.output) - assert result.exit_code == 0 + + assert ( + result.exit_code == 0 + ), f"The command failed to execute successfully: {result.output}" + assert os.path.isdir( + protocol_name + ), f"Expected output directory '{protocol_name}' does not exist" diff --git a/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml b/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml index 3330f3b..95d4a9c 100644 --- a/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml +++ b/reproschema/tests/test_redcap2rs_data/redcap2rs.yaml @@ -9,13 +9,8 @@ protocol_name: "test_redcap2rs" # Example: "My_Protocol" # This name will be displayed in the application. protocol_display_name: "redcap protocols" -# GitHub Repository Information: -# Create a GitHub repository named 'reproschema' to store your reproschema protocols. -# Replace 'your_github_username' with your actual GitHub username. -user_name: "yibeichan" -repo_name: "redcap2reproschema" # Recommended name; can be different if preferred. -repo_url: "https://github.com/{{user_name}}/{{repo_name}}" - # Protocol Description: # Provide a brief description of your protocol. protocol_description: "testing" # Example: "This protocol is for ..." + +redcap_version: "3.0.0" diff --git a/reproschema/tests/test_reproschema2redcap.py b/reproschema/tests/test_reproschema2redcap.py index f0a02ce..eff26b3 100644 --- a/reproschema/tests/test_reproschema2redcap.py +++ b/reproschema/tests/test_reproschema2redcap.py @@ -2,46 +2,41 @@ import pytest from click.testing import CliRunner from ..cli import main -from shutil import copytree +from shutil import copytree, rmtree from pathlib import Path import csv -def test_reproschema2redcap_success(): +def test_reproschema2redcap(tmpdir): runner = CliRunner() with runner.isolated_filesystem(): # Copy necessary test data into the isolated filesystem original_data_dir = os.path.join( - os.path.dirname(__file__), "test_rs2redcap_data" + os.path.dirname(__file__), "test_rs2redcap_data", "test_redcap2rs" ) copytree(original_data_dir, "input_data") - input_path = Path("input_data") # Using Path object - output_csv_path = "output.csv" + input_path = Path("input_data") + output_csv_path = os.path.join(tmpdir, "output.csv") - # Invoke the reproschema2redcap command result = runner.invoke( main, ["reproschema2redcap", str(input_path), output_csv_path] ) - # Print the output for debugging print(result.output) - # Assert the expected outcomes assert result.exit_code == 0 - # Check if the output CSV file has been created assert os.path.exists(output_csv_path) - # Read and print the contents of the CSV file with open(output_csv_path, "r", encoding="utf-8") as csv_file: reader = csv.reader(csv_file) csv_contents = list(reader) - print("CSV File Contents:") - for row in csv_contents: - print(row) - # Optionally, assert conditions about the CSV contents - # For example, assert that the file is not empty - assert len(csv_contents) > 0 + assert ( + len(csv_contents) > 1 + ) # More than one row indicates content beyond headers + + # Clean up temporary directory after use (optional) + # rmtree(tmpdir) diff --git a/templates/redcap2rs.yaml b/templates/redcap2rs.yaml index 1e1dbc3..4bbf78f 100644 --- a/templates/redcap2rs.yaml +++ b/templates/redcap2rs.yaml @@ -12,3 +12,5 @@ protocol_display_name: "Your protocol display name" # Protocol Description: # Provide a brief description of your protocol. protocol_description: "Description for your protocol" # Example: "This protocol is for ..." + +redcap_version: "x.y.z" # Example: "3.0.0"