Implemented suggestions by @ericearl and @perronea on 2019-11-07

DCAN-Labs · Nov 12, 2019 · f180125 · f180125
1 parent 7b9fa85
commit f180125
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 41 deletions.
diff --git a/abcd2bids.py b/abcd2bids.py
@@ -4,7 +4,7 @@
 ABCD to BIDS CLI Wrapper
 Greg Conan: [email protected]
 Created 2019-05-29
-Last Updated 2019-11-06
+Last Updated 2019-11-11
 """
 
 ##################################
@@ -35,7 +35,12 @@
 STEP_NAMES = ["create_good_and_bad_series_table", "download_nda_data",
               "unpack_and_setup", "correct_jsons", "validate_bids"]
 
-PWD = os.getcwd()
+# Get path to directory containing abcd2bids.py
+try:
+    PWD = os.path.dirname(os.path.abspath(__file__))
+    assert os.access(os.path.join(PWD, "abcd2bids.py"), os.R_OK)
+except (OSError, AssertionError):
+    PWD = os.getcwd()
 
 # Constants: Default paths to scripts to call from this wrapper, and default
 # paths to folders in which to manipulate data
@@ -61,6 +66,9 @@ def main():
     cli_args = get_cli_args()
 
     def now():
+        """
+        :return: String with date and time in readable format
+        """
         return datetime.now().strftime("%H:%M:%S on %b %d, %Y")
 
     started_at = now()
@@ -190,7 +198,7 @@ def get_cli_args():
         default=STEP_NAMES[0],
         help=("Give the name of the step in the wrapper to start "
               "at, then run that step and every step after it. Here are the "
-              "names of each step, in order from first to last: "
+              "names of all of the steps, in order from first to last: "
               + ", ".join(STEP_NAMES))
     )
 
@@ -279,7 +287,7 @@ def validate_readable_file(param):
     """
     if not os.access(param, os.R_OK):
         raise argparse.ArgumentTypeError("Could not read file at " + param)
-    return param
+    return os.path.abspath(param)
 
 
 def try_to_create_and_prep_directory_at(folder_path, default_path, parser):
@@ -461,8 +469,7 @@ def create_good_and_bad_series_table(cli_args):
     with open(cli_args.qc) as qc_file:
         all_qc_data = pd.read_csv(
             qc_file, encoding="utf-8-sig", sep=",|\t", engine="python",
-            index_col=False, header=0, skiprows=[1], # Skip row 2 (description)
-            usecols=lambda x: x != "ftq_notes" # Skip unneeded column w/ commas
+            index_col=False, header=0, skiprows=[1]  # Skip row 2 (description)
         )  
     qc_data = fix_split_col(all_qc_data.loc[all_qc_data["ftq_usable"] == 1])
 
@@ -503,6 +510,7 @@ def trim_end_columns(row):
         """
         ix = int(row.name)
         if not pd.isna(qc_df.at[ix, columns[-1]]):
+            qc_df.at[ix, columns[-3]] += " " + qc_df.at[ix, columns[-2]]
             qc_df.at[ix, columns[-2]] = qc_df.at[ix, columns[-1]]
 
     # Keep checking and dropping the last column of qc_df until it's valid
@@ -526,7 +534,8 @@ def download_nda_data(cli_args):
     with downloaded NDA data.
     :return: N/A
     """
-    subprocess.check_call(("python3", SERIES_TABLE_PARSER, cli_args.download))
+    subprocess.check_call(("python3", SERIES_TABLE_PARSER, cli_args.download, 
+                           SPREADSHEET_DOWNLOAD))
 
 
 def unpack_and_setup(args):
@@ -584,10 +593,11 @@ def correct_jsons(cli_args):
     # sefm_eval_and_json_editor.py, and the vol*.nii.gz files
     sub_dirs = os.path.join(cli_args.output, "sub*")
     for json_path in iglob(os.path.join(sub_dirs, "*.json")):
-        print("Removing {}".format(json_path))
+        print("Removing .JSON file: {}".format(json_path))
         os.remove(json_path)
-    for vol_file in iglob(os.path.join(sub_dirs, "ses*", "fmap", "vol*.nii.gz")):
-        print("Removing {}".format(vol_file))
+    for vol_file in iglob(os.path.join(sub_dirs, "ses*", 
+                          "fmap", "vol*.nii.gz")):
+        print("Removing 'vol' file: {}".format(vol_file))
         os.remove(vol_file)
 
 

diff --git a/spreadsheets/README.md b/spreadsheets/README.md
@@ -5,4 +5,4 @@ This is where the spreadsheets belong:
 1. `abcd_fastqc01.txt` (the QC info)
 1. `ABCD_good_bad_series_table.csv` (generated after `data_gatherer` is run)
 
-The `abcd_fastqc01.txt` is currently available from the NDA. In a future update, this script will be updated to download that file and read from it instead of requiring the user to manually get the `abcd_fastqc01.txt` spreadsheet.
+The `abcd_fastqc01.txt` is currently available from the NDA.
diff --git a/src/good_bad_series_parser.py b/src/good_bad_series_parser.py
@@ -31,18 +31,22 @@
 
 # Get download folder name. Use one entered from command line if it exists;
 # otherwise use "./new_download". Added by Greg Conan 2019-06-06
-if len(sys.argv) is 2:
+if len(sys.argv) > 1:
     new_download_dir = sys.argv[1]
-else:
+    if len(sys.argv) > 2:  # added 2019-11-07
+        series_csv = sys.argv[2]
+    else:
+        series_csv = os.path.join(os.path.dirname(os.path.dirname(
+            os.path.abspath(__file__))), "spreadsheets",
+            "ABCD_good_and_bad_series_table.csv"
+        )
+elif len(sys.argv) < 1:
     new_download_dir = './new_download/'
 
-
 with open('abcd_download_log.csv','w') as f:
     writer = csv.writer(f)
 
-
     # Read csv as pandas dataframe, drop duplicate entries, sort, and group by subject/visit
-    series_csv = "./spreadsheets/ABCD_good_and_bad_series_table.csv"
     series_df = pd.read_csv(series_csv)
     subject_dfs = series_df.drop_duplicates().sort_values(by='SeriesTime', ascending=True).groupby(["pGUID", "EventName"])
 
@@ -66,8 +70,8 @@
         visit = name[1]
         sub = "sub-" + sub_id.replace("_","")
         #print(sub_id, visit)
-        tgz_dir = './download' + sub + '/' + visit
-        new_tgz_dir = new_download_dir + sub + '/' + visit
+        tgz_dir = os.path.join('./download', sub, visit)
+        new_tgz_dir = os.path.join(new_download_dir, sub, visit)
         if os.path.exists(tgz_dir):
             print("{0} already exists from old download. Updating now.".format(name))
             #continue

diff --git a/src/sefm_eval_and_json_editor.py b/src/sefm_eval_and_json_editor.py
@@ -7,16 +7,15 @@
 os.environ['FSLOUTPUTTYPE'] = 'NIFTI_GZ'
 
 # Last modified
-last_modified = "Created by Anders Perrone 3/21/2017. Last modified by Eric Earl 8/29/2018"
+last_modified = "Created by Anders Perrone 3/21/2017. Last modified by Greg Conan 11/11/2019"
 
 # Program description
 prog_descrip =  """%(prog)s: sefm_eval pairs each of the pos/neg sefm and returns the pair that is most representative
                    of the average by calculating the eta squared value for each sefm pair to the average sefm.""" + last_modified
 
-# Path to pwd/src, which contains compiled MATLAB ETA squared function; added
-# by Greg 2019-06-10 & updated 2019-06-13
-ETA_DIR = "./src/"
-
+# Path to abcd2bids/src, which contains compiled MATLAB ETA squared function; added
+# by Greg 2019-06-10 & updated 2019-11-07
+ETA_DIR = os.path.dirname(os.path.abspath(__file__))
 
 def read_bids_layout(layout, subject_list=None, collect_on_subject=False):
     """
@@ -183,8 +182,8 @@ def seperate_concatenated_fm(bids_layout, subject, session, fsl_dir):
             # Change by Greg 2019-06-10: Replaced hardcoded Exacloud path to
             # FSL_identity_transformation_matrix with relative path to that
             # file in the pwd
-            AP_flirt = [fsl_dir + "/flirt", "-out", AP_filename, "-in", AP_filename, "-ref", func_ref, "-applyxfm", "-init", "./src/FSL_identity_transformation_matrix.mat", "-interp", "spline"]
-            PA_flirt = [fsl_dir + "/flirt", "-out", PA_filename, "-in", PA_filename, "-ref", func_ref, "-applyxfm", "-init", "./src/FSL_identity_transformation_matrix.mat", "-interp", "spline"]
+            AP_flirt = [fsl_dir + "/flirt", "-out", AP_filename, "-in", AP_filename, "-ref", func_ref, "-applyxfm", "-init", os.path.join(ETA_DIR, "FSL_identity_transformation_matrix.mat"), "-interp", "spline"]
+            PA_flirt = [fsl_dir + "/flirt", "-out", PA_filename, "-in", PA_filename, "-ref", func_ref, "-applyxfm", "-init", os.path.join(ETA_DIR, "FSL_identity_transformation_matrix.mat"), "-interp", "spline"]
 
             subprocess.run(AP_flirt, env=os.environ)
             subprocess.run(PA_flirt, env=os.environ)
@@ -274,12 +273,6 @@ def main(argv=sys.argv):
     # for this script's usage of FSL_DIR...
     fsl_dir = args.fsl_dir + '/bin'
 
-    # This block was added by Greg Conan 2019-10-25
-    for json_file in os.scandir(args.output_dir):
-        json_path = json_file.path
-        if "json" in json_path:
-            shutil.copy2(json_path, args.bids_dir)
-
     # Load the bids layout
     layout = BIDSLayout(args.bids_dir)
     subsess = read_bids_layout(layout, subject_list=args.subject_list, collect_on_subject=args.collect)
@@ -310,29 +303,25 @@ def main(argv=sys.argv):
             TX_metadata = layout.get_metadata(TX)
                 #if 'T1' in TX_metadata['SeriesDescription']:
 
-            """
             if 'Philips' in TX_metadata['Manufacturer']:
                 insert_edit_json(TX_json, 'DwellTime', 0.00062771)
             if 'GE' in TX_metadata['Manufacturer']:
                 insert_edit_json(TX_json, 'DwellTime', 0.000536)
             if 'Siemens' in TX_metadata['Manufacturer']:
                 insert_edit_json(TX_json, 'DwellTime', 0.00051001152626)
-            """
 
         # add EffectiveEchoSpacing if it doesn't already exist
         fmap = layout.get(subject=subject, session=sessions, modality='fmap', extensions='.nii.gz')
         for sefm in [x.filename for x in fmap]:
             sefm_json = sefm.replace('.nii.gz', '.json')
             sefm_metadata = layout.get_metadata(sefm)
 
-            """
             if 'Philips' in sefm_metadata['Manufacturer']:
                 insert_edit_json(sefm_json, 'EffectiveEchoSpacing', 0.00062771)
             if 'GE' in sefm_metadata['Manufacturer']:
                 insert_edit_json(sefm_json, 'EffectiveEchoSpacing', 0.000536)
             if 'Siemens' in sefm_metadata['Manufacturer']:
                 insert_edit_json(sefm_json, 'EffectiveEchoSpacing', 0.00051001152626)
-            """
 
         # PE direction vs axis
         func = layout.get(subject=subject, session=sessions, modality='func', extensions='.nii.gz')

diff --git a/src/unpack_and_setup.sh b/src/unpack_and_setup.sh
@@ -45,8 +45,6 @@ TGZDIR=$3 # Path to directory containing all .tgz for this subject's session
 participant=`echo ${SUB} | sed 's|sub-||'`
 session=`echo ${VISIT} | sed 's|ses-||'`
 
-echo "ScratchSpaceDir=${ScratchSpaceDir}, ROOT_BIDSINPUT=${ROOT_BIDSINPUT}";
-
 date
 hostname
 echo ${SLURM_JOB_ID}
@@ -85,25 +83,26 @@ done
 mkdir ${TempSubjectDir}/BIDS_unprocessed
 echo ${participant}
 echo `date`" :RUNNING dcm2bids"
-dcm2bids -d ${TempSubjectDir}/DCMs/${SUB} -p ${participant} -s ${session} -c ./abcd_dcm2bids.conf -o ${TempSubjectDir}/BIDS_unprocessed --forceDcm2niix --clobber
+ABCD2BIDS_DIR="$(dirname "$ROOT_BIDSINPUT")"
+dcm2bids -d ${TempSubjectDir}/DCMs/${SUB} -p ${participant} -s ${session} -c ${ABCD2BIDS_DIR}/abcd_dcm2bids.conf -o ${TempSubjectDir}/BIDS_unprocessed --forceDcm2niix --clobber
 
 echo `date`" :CHECKING BIDS ORDERING OF EPIs"
 if [[ -e ${TempSubjectDir}/BIDS_unprocessed/${SUB}/${VISIT}/func ]]; then
-    if [[ `./src/run_order_fix.py ${TempSubjectDir}/BIDS_unprocessed ${TempSubjectDir}/bids_order_error.json ${TempSubjectDir}/bids_order_map.json --all --subject ${SUB}` == ${SUB} ]]; then
+    if [[ `${ABCD2BIDS_DIR}/src/run_order_fix.py ${TempSubjectDir}/BIDS_unprocessed ${TempSubjectDir}/bids_order_error.json ${TempSubjectDir}/bids_order_map.json --all --subject ${SUB}` == ${SUB} ]]; then
         echo BIDS correctly ordered
     else
         echo ERROR: BIDS incorrectly ordered even after running run_order_fix.py
         exit
     fi
 else
-    echo ERROR: No functional images found T1 only processing not yet enabeled
+    echo "No functional images found for subject ${SUB}. Skipping sefm_eval_and_json_editor to copy and rename source data."
     exit
 fi
 
 # select best fieldmap and update sidecar jsons
 echo `date`" :RUNNING SEFM SELECTION AND EDITING SIDECAR JSONS"
 if [ -d ${TempSubjectDir}/BIDS_unprocessed/${SUB}/${VISIT}/fmap ]; then
-    ./src/sefm_eval_and_json_editor.py ${TempSubjectDir}/BIDS_unprocessed/${SUB} ${FSL_DIR} ${MRE_DIR} --participant-label=${participant} --output_dir $ROOT_BIDSINPUT
+    ${ABCD2BIDS_DIR}/src/sefm_eval_and_json_editor.py ${TempSubjectDir}/BIDS_unprocessed/${SUB} ${FSL_DIR} ${MRE_DIR} --participant-label=${participant} --output_dir $ROOT_BIDSINPUT
 fi
 
 rm ${TempSubjectDir}/BIDS_unprocessed/${SUB}/ses-baselineYear1Arm1/fmap/*dir-both* 2> /dev/null || true