diff --git a/DAL_ABCD_merged_pcqcinfo_importer.m b/DAL_ABCD_merged_pcqcinfo_importer.m deleted file mode 100644 index e53ab63..0000000 --- a/DAL_ABCD_merged_pcqcinfo_importer.m +++ /dev/null @@ -1,122 +0,0 @@ -function data = DAL_ABCD_merged_pcqcinfo_importer(filename) -%% Import data from text file. -% Script for importing data from the following text file: -% -% spreadsheets/DAL_ABCD_merged_pcqcinfo.csv -% -% To extend the code to different selected data or a different text file, -% generate a function instead of a script. - -% Auto-generated by MATLAB on 2018/08/14 13:31:43 - -%% Initialize variables. -delimiter = ','; -startRow = 2; - -%% Read columns of data as text: -% For more information, see the TEXTSCAN documentation. -formatSpec = '%s%s%s%s%s%s%s%s%s%s%s%*s%*s%*s%*s%*s%*s%*s%*s%*s%s%s%s%*s%*s%*s%s%s%s%s%s%*s%s%*s%*s%*s%*s%*s%*s%*s%*s%*s%s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%s%s%[^\n\r]'; - -%% Open the text file. -fileID = fopen(filename,'r'); - -%% Read columns of data according to the format. -% This call is based on the structure of the file used to generate this -% code. If an error occurs for a different file, try regenerating the code -% from the Import Tool. -dataArray = textscan(fileID, formatSpec, 'Delimiter', delimiter, 'TextType', 'string', 'HeaderLines' ,startRow-1, 'ReturnOnError', false, 'EndOfLine', '\r\n'); - -%% Close the text file. -fclose(fileID); - -%% Convert the contents of columns containing numeric text to numbers. -% Replace non-numeric text with NaN. -raw = repmat({''},length(dataArray{1}),length(dataArray)-1); -for col=1:length(dataArray)-1 - raw(1:length(dataArray{col}),col) = mat2cell(dataArray{col}, ones(length(dataArray{col}), 1)); -end -numericData = NaN(size(dataArray{1},1),size(dataArray,2)); - -for col=[9,11,12,17,18,19,20,21] - % Converts text in the input cell array to numbers. Replaced non-numeric - % text with NaN. - rawData = dataArray{col}; - for row=1:size(rawData, 1) - % Create a regular expression to detect and remove non-numeric prefixes and - % suffixes. - regexstr = '(?.*?)(?([-]*(\d+[\,]*)+[\.]{0,1}\d*[eEdD]{0,1}[-+]*\d*[i]{0,1})|([-]*(\d+[\,]*)*[\.]{1,1}\d+[eEdD]{0,1}[-+]*\d*[i]{0,1}))(?.*)'; - try - result = regexp(rawData(row), regexstr, 'names'); - numbers = result.numbers; - - % Detected commas in non-thousand locations. - invalidThousandsSeparator = false; - if numbers.contains(',') - thousandsRegExp = '^\d+?(\,\d{3})*\.{0,1}\d*$'; - if isempty(regexp(numbers, thousandsRegExp, 'once')) - numbers = NaN; - invalidThousandsSeparator = true; - end - end - % Convert numeric text to numbers. - if ~invalidThousandsSeparator - numbers = textscan(char(strrep(numbers, ',', '')), '%f'); - numericData(row, col) = numbers{1}; - raw{row, col} = numbers{1}; - end - catch - raw{row, col} = rawData{row}; - end - end -end - - -%% Split data into numeric and string columns. -rawNumericColumns = raw(:, [9,11,12,17,18,19,20,21]); -rawStringColumns = string(raw(:, [1,2,3,4,5,6,7,8,10,13,14,15,16,22,23])); - - -%% Replace non-numeric cells with NaN -R = cellfun(@(x) ~isnumeric(x) && ~islogical(x),rawNumericColumns); % Find non-numeric cells -rawNumericColumns(R) = {NaN}; % Replace non-numeric cells - -%% Make sure any text containing is properly converted to an categorical -for catIdx = [1,3,4,5,6,7,8,9,10,11,12,14,15] - idx = (rawStringColumns(:, catIdx) == ""); - rawStringColumns(idx, catIdx) = ""; -end - -%% Create output variable -DALABCDmergedpcqcinfo = table; -DALABCDmergedpcqcinfo.pGUID = categorical(rawStringColumns(:, 1)); -DALABCDmergedpcqcinfo.VisitID = rawStringColumns(:, 2); -DALABCDmergedpcqcinfo.EventName = categorical(rawStringColumns(:, 3)); -DALABCDmergedpcqcinfo.SessionType = categorical(rawStringColumns(:, 4)); -DALABCDmergedpcqcinfo.SiteName = categorical(rawStringColumns(:, 5)); -DALABCDmergedpcqcinfo.SeriesType = categorical(rawStringColumns(:, 6)); -DALABCDmergedpcqcinfo.ABCD_Compliant = categorical(rawStringColumns(:, 7)); -DALABCDmergedpcqcinfo.SeriesDescription = categorical(rawStringColumns(:, 8)); -DALABCDmergedpcqcinfo.Completed = cell2mat(rawNumericColumns(:, 1)); -DALABCDmergedpcqcinfo.AdditionalInfo = categorical(rawStringColumns(:, 9)); -DALABCDmergedpcqcinfo.NumberOfFiles = cell2mat(rawNumericColumns(:, 2)); -DALABCDmergedpcqcinfo.SeriesNumber = cell2mat(rawNumericColumns(:, 3)); -DALABCDmergedpcqcinfo.Manufacturer = categorical(rawStringColumns(:, 10)); -DALABCDmergedpcqcinfo.SequenceName = categorical(rawStringColumns(:, 11)); -DALABCDmergedpcqcinfo.StudyInstanceUID = categorical(rawStringColumns(:, 12)); -DALABCDmergedpcqcinfo.SeriesInstanceUID = rawStringColumns(:, 13); -DALABCDmergedpcqcinfo.StudyDate = cell2mat(rawNumericColumns(:, 4)); -DALABCDmergedpcqcinfo.StudyTime = cell2mat(rawNumericColumns(:, 5)); -DALABCDmergedpcqcinfo.SeriesTime = cell2mat(rawNumericColumns(:, 6)); -DALABCDmergedpcqcinfo.SiteID = cell2mat(rawNumericColumns(:, 7)); -DALABCDmergedpcqcinfo.QC = cell2mat(rawNumericColumns(:, 8)); -DALABCDmergedpcqcinfo.stype = categorical(rawStringColumns(:, 14)); -DALABCDmergedpcqcinfo.ManufacturersModelName = categorical(rawStringColumns(:, 15)); - -%% Clear temporary variables -clearvars filename delimiter startRow formatSpec fileID dataArray ans raw col numericData rawData row regexstr result numbers invalidThousandsSeparator thousandsRegExp rawNumericColumns rawStringColumns R catIdx idx; - -%% -data = DALABCDmergedpcqcinfo; -QC_flag = data.QC == 1; -cleandata_idx = QC_flag; - diff --git a/FSL_identity_transformation_matrix.mat b/FSL_identity_transformation_matrix.mat deleted file mode 100644 index 00451f0..0000000 --- a/FSL_identity_transformation_matrix.mat +++ /dev/null @@ -1,4 +0,0 @@ -1.000000 0.000000 0.000000 0.000000 -0.000000 1.000000 0.000000 0.000000 -0.000000 0.000000 1.000000 0.000000 -0.000000 0.000000 0.000000 1.000000 diff --git a/correct_jsons.py b/correct_jsons.py deleted file mode 100755 index 958a414..0000000 --- a/correct_jsons.py +++ /dev/null @@ -1,115 +0,0 @@ -#! /usr/bin/env python3 - -import json,os,sys,argparse,re - -__doc__ = \ -""" -This scripts is meant to correct ABCD BIDS input data to -conform to the Official BIDS Validator. -""" -__version__ = "1.0.0" - -def read_json_field(json_path, json_field): - - with open(json_path, 'r') as f: - data = json.load(f) - - if json_field in data: - return data[json_field] - else: - return None - -def remove_json_field(json_path, json_field): - - with open(json_path, 'r+') as f: - data = json.load(f) - - if json_field in data: - del data[json_field] - f.seek(0) - json.dump(data, f, indent=4) - f.truncate() - flag = True - else: - flag = False - - return flag - -def update_json_field(json_path, json_field, value): - - with open(json_path, 'r+') as f: - data = json.load(f) - - if json_field in data: - flag = True - else: - flag = False - - data[json_field] = value - f.seek(0) - json.dump(data, f, indent=4) - f.truncate() - - return flag - -def main(argv=sys.argv): - parser = argparse.ArgumentParser( - prog='correct_jsons.py', - description=__doc__, - usage='%(prog)s BIDS_DIR' - ) - parser.add_argument( - 'BIDS_DIR', - help='Path to the input BIDS dataset root directory. Read more ' - 'about the BIDS standard in the link in the description. It is ' - 'recommended to use Dcm2Bids to convert from participant dicoms ' - 'into BIDS format.' - ) - parser.add_argument( - '--version', '-v', action='version', version='%(prog)s ' + __version__ - ) - - args = parser.parse_args() - - for root, dirs, files in os.walk(args.BIDS_DIR): - for filename in files: - fn, ext = os.path.splitext(filename) - - if ext == '.json': - json_path = os.path.join(root, filename) - - with open(json_path, 'r') as f: - data = json.load(f) - - # If TotalReadoutTime is missing from fmap JSON - if ('fmap' in root or 'func' in root) and 'TotalReadoutTime' not in data: - # Then check for EffectiveEchoSpacing and ReconMatrixPE - if 'EffectiveEchoSpacing' in data and 'ReconMatrixPE' in data: - # If both are present then update the JSON with a calculated TotalReadoutTime - EffectiveEchoSpacing = data['EffectiveEchoSpacing'] - ReconMatrixPE = data['ReconMatrixPE'] - # Calculated TotalReadoutTime = EffectiveEchoSpacing * (ReconMatrixPE - 1) - TotalReadoutTime = EffectiveEchoSpacing * (ReconMatrixPE - 1) - update_json_field(json_path, 'TotalReadoutTime', TotalReadoutTime) - - # If EffectiveEchoSpacing is missing print error - if 'EffectiveEchoSpacing' not in data: - print(json_path + ': No EffectiveEchoSpacing') - - # If ReconMatrixPE is missing print error - if 'ReconMatrixPE' not in data: - print(json_path + ': No ReconMatrixPE') - - # Find the IntendedFor field that is a non-empty list - if 'fmap' in root and 'IntendedFor' in data and len(data['IntendedFor']) > 0: - # Regular expression replace all paths in that list with a relative path to ses-SESSION - intended_list = data['IntendedFor'] - corrected_intended_list = [re.sub(r'.*(ses-.*_ses-.+)','\g<1>',entry) for entry in intended_list] - update_json_field(json_path, 'IntendedFor', corrected_intended_list) - - # Remove SliceTiming field from func JSONs - if 'func' in root and 'SliceTiming' in data: - remove_json_field(json_path, 'SliceTiming') - -if __name__ == "__main__": - sys.exit(main()) diff --git a/data_gatherer.m b/data_gatherer.m deleted file mode 100755 index 84df85f..0000000 --- a/data_gatherer.m +++ /dev/null @@ -1,57 +0,0 @@ -%% variable initialization - -clear variables -load mapping.mat - -QC_file = 'spreadsheets/DAL_ABCD_QC_merged_pcqcinfo.csv'; -image03_file = 'spreadsheets/image03.txt'; -output_csv = 'spreadsheets/ABCD_good_and_bad_series_table.csv'; - -%% QC parsing - -data = DAL_ABCD_merged_pcqcinfo_importer(QC_file); - -for i = 1:height(data) - if data.SeriesTime(i) < 100000 - data.timestamp{i} = ['0' num2str(floor(data.SeriesTime(i)))]; - else - data.timestamp{i} = num2str(floor(data.SeriesTime(i))); - end -end -data.CleanFlag = cleandata_idx; - -%% image03 parsing - -image03 = image03_importer(image03_file); - -for i = 1:height(image03) - image03.timestamp{i} = image03.image_file{i}(end-10:end-5); -end - -image03_1 = innerjoin(image03,map_image03_qc); -image03_2 = innerjoin(image03_1,map_image03_descriptor); - -image03_2 = sortrows(image03_2,'image_file','ascend'); -image03_2.Properties.VariableNames{1} = 'pGUID'; -image03_2.Properties.VariableNames{9} = 'EventName'; - -%% table joins - -data_1 = innerjoin(data,map_qc_descriptor); -data_1 = sortrows(data_1,'pGUID','ascend'); - -% Hack to deal with quotations around strings in table -foo = image03_2.SeriesType; -[l,w] = size(foo); -for i=1:l - foo(i) = strjoin(['"' string(foo(i)) '"'],''); -end -image03_2.SeriesType = foo; - -data_2 = innerjoin(data_1,image03_2); - - -%% final output table (path hardcoded) - -writetable(data_2,output_csv); - diff --git a/eta_squared b/eta_squared deleted file mode 100755 index a3a8f1a..0000000 Binary files a/eta_squared and /dev/null differ diff --git a/good_bad_series_parser.py b/good_bad_series_parser.py deleted file mode 100755 index 11f355d..0000000 --- a/good_bad_series_parser.py +++ /dev/null @@ -1,214 +0,0 @@ -#! /usr/bin/env python - - -import pandas as pd -import csv -import subprocess -import os - -####################################### -# Read in ABCD_good_and_bad_series_table.csv that is continually updated -# Create a log of all subjects that have been checked -# If they are not able to be processed report what is wrong with them -# -####################################### - - -# Logging variables -num_sub_visits = 0 -num_siemens = 0 -num_ge = 0 -num_philips = 0 -num_rsfmri = 0 -num_sst = 0 -num_mid = 0 -num_nback = 0 -num_t2 = 0 -num_invalid = 0 -num_valid = 0 -num_subjects_after_checks = 0 -with open('abcd_download_log.csv','w') as f: - writer = csv.writer(f) - - - # Read csv as pandas dataframe, drop duplicate entries, sort, and group by subject/visit - series_csv = "./spreadsheets/ABCD_good_and_bad_series_table.csv" - series_df = pd.read_csv(series_csv) - subject_dfs = series_df.drop_duplicates().sort_values(by='SeriesTime', ascending=True).groupby(["pGUID", "EventName"]) - - for name, group in subject_dfs: - - ### Logging information - # initialize logging variables - has_t1 = 0 - has_t2 = 0 - has_sefm = 0 - has_rsfmri = 0 - has_mid = 0 - has_sst = 0 - has_nback = 0 - - # TODO: Add pGUID and EventName (Subject ID and Visit) to csv for logging information - num_sub_visits += 1 - - scanner = group.iloc[0]['Manufacturer'] - if scanner == 'Philips Medical Systems': - num_philips += 1 - elif scanner == 'GE MEDICAL SYSTEMS': - num_ge += 1 - elif scanner == 'SIEMENS': - num_siemens += 1 - else: - print("Unexpected scanner type: %s" % scanner) - - # TODO: Create tgz directory if it doesn't already exist - sub_id = name[0] - visit = name[1] - sub = "sub-" + sub_id.replace("_","") - #print(sub_id, visit) - tgz_dir = './download' + sub + '/' + visit - new_tgz_dir = './new_download/' + sub + '/' + visit - if os.path.exists(tgz_dir): - print("{0} already exists from old download. Updating now.".format(name)) - #continue - elif os.path.exists(new_tgz_dir): - print("{0} already exists from the most recent download. Updating now.".format(name)) - tgz_dir = new_tgz_dir - else: - print("{0} downloading now.".format(name)) - tgz_dir = new_tgz_dir - os.makedirs(tgz_dir) - - ### Get ready to download only good QC'd data that passes all of our criteria ! - - passed_QC_group = group.loc[group['QC'] == 1.0] - - file_paths = [] - - ### Identify valid scans - # Download only T1, T2, fMRI_FM_PA, fMRI_FM_AP, fMRI_FM, rsfMRI, fMRI_MID_task, fMRI_SST_task, fMRI_nBack_task - - ## Check if T1_NORM exists and download that instead of just T1 - # If there is a T1_NORM in the df of good T1s then use it. Else just use good T1 - T1_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-T1-NORM'] - if T1_df.empty: - T1_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-T1'] - if T1_df.empty: - has_t1 = 0 # No T1s. Invalid subject - else: - for file_path in T1_df['image_file']: - file_paths += [file_path] - has_t1 = T1_df.shape[0] - else: - for file_path in T1_df['image_file']: - file_paths += [file_path] - has_t1 = T1_df.shape[0] - - T2_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-T2-NORM'] - if T2_df.empty: - T2_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-T2'] - if T2_df.empty: - has_t2 = 0 # No T2s - else: - for file_path in T2_df['image_file']: - file_paths += [file_path] - has_t2 = T2_df.shape[0] - else: - for file_path in T2_df['image_file']: - file_paths += [file_path] - has_t2 = T2_df.shape[0] - - ## Pair SEFMs and only download if both pass QC - # Check first if just the FM exists - FM_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-fMRI-FM'] - if FM_df.empty: - FM_AP_df = group.loc[group['image_description'] == 'ABCD-fMRI-FM-AP'] - FM_PA_df = group.loc[group['image_description'] == 'ABCD-fMRI-FM-PA'] - if FM_AP_df.shape[0] != FM_PA_df.shape[0] or FM_AP_df.empty: - has_sefm = 0 # No SEFMs. Invalid subject - else: - for i in range(0, FM_AP_df.shape[0]): - if FM_AP_df.iloc[i]['QC'] == 1.0 and FM_PA_df.iloc[i]['QC'] == 1.0: - FM_df = FM_df.append(FM_AP_df.iloc[i]) - FM_df = FM_df.append(FM_PA_df.iloc[i]) - if FM_df.empty: - has_sefm = 0 # No SEFMs. Invalid subject - else: - for file_path in FM_df['image_file']: - file_paths += [file_path] - has_sefm = FM_df.shape[0] - - - ## List all rsfMRI scans that pass QC - RS_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-rsfMRI'] - if RS_df.empty: - has_rsfmri = 0 - else: - for file_path in RS_df['image_file']: - file_paths += [file_path] - has_rsfmri = RS_df.shape[0] - - ## List only download task iff their is a pair of scans for the task that passed QC - MID_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-MID-fMRI'] - if MID_df.shape[0] != 2: - has_mid = MID_df.shape[0] - else: - for file_path in MID_df['image_file']: - file_paths += [file_path] - has_mid = MID_df.shape[0] - SST_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-SST-fMRI'] - if SST_df.shape[0] != 2: - has_sst = SST_df.shape[0] - else: - for file_path in SST_df['image_file']: - file_paths += [file_path] - has_sst = SST_df.shape[0] - nBack_df = passed_QC_group.loc[passed_QC_group['image_description'] == 'ABCD-nBack-fMRI'] - if nBack_df.shape[0] != 2: - has_nback = nBack_df.shape[0] - else: - for file_path in nBack_df['image_file']: - file_paths += [file_path] - has_nback = nBack_df.shape[0] - - # TODO: log subject level information - if has_t1 == 0: - num_invalid += 1 - print('%s: t1=%s, t2=%s, sefm=%s, rsfmri=%s, mid=%s, sst=%s, nback=%s INVALID' % (sub, has_t1, has_t2, has_sefm, has_rsfmri, has_mid, has_sst, has_nback)) - writer.writerow([sub, has_t1, has_t2, has_sefm, has_rsfmri, has_mid, has_sst, has_nback]) - else: - num_valid += 1 - print('%s: t1=%s, t2=%s, sefm=%s, rsfmri=%s, mid=%s, sst=%s, nback=%s' % (sub, has_t1, has_t2, has_sefm, has_rsfmri, has_mid, has_sst, has_nback)) - writer.writerow([sub, has_t1, has_t2, has_sefm, has_rsfmri, has_mid, has_sst, has_nback]) - if has_t2 != 0: - num_t2 += 1 - if has_rsfmri != 0: - num_rsfmri += 1 - if has_mid != 0: - num_mid += 1 - if has_sst != 0: - num_sst += 1 - if has_nback != 0: - num_nback += 1 - subprocess.call(["./nda_aws_token_maker.py", ">", "/dev/null"]) - for i in file_paths: - tgz_name = os.path.basename(i) - tgz_path = tgz_dir + '/' + tgz_name - if os.path.exists(tgz_path): - continue - else: - aws_cmd = ["aws", "s3", "cp", i, tgz_dir + "/", "--profile", "NDA"] - #print(aws_cmd) - subprocess.call(aws_cmd) - - -print("There are %s subject visits" % num_sub_visits) -print("%s are valid. %s are invalid" % (num_valid, num_invalid)) -print("%s Siemens" % num_siemens) -print("%s Philips" % num_philips) -print("%s GE" % num_ge) -print("number of valid subjects with a T2 : %s" % num_t2) -print("number of valid subjects with rest : %s" % num_rsfmri) -print("number of valid subjects with mid : %s" % num_mid) -print("number of valid subjects with sst : %s" % num_sst) -print("number of valid subjects with nBack: %s" % num_nback) diff --git a/image03_importer.m b/image03_importer.m deleted file mode 100644 index b4739bd..0000000 --- a/image03_importer.m +++ /dev/null @@ -1,100 +0,0 @@ -function image03 = image03_importer(filename) -%% Import data from text file. -% Script for importing data from the following text file: -% -% /mnt/max/shared/projects/ABCD/daic_spreadsheet/image03.txt -% -% To extend the code to different selected data or a different text file, -% generate a function instead of a script. - -% Auto-generated by MATLAB on 2018/08/14 10:58:42 - -%% Initialize variables. -delimiter = '\t'; -startRow = 3; - -%% Read columns of data as text: -% For more information, see the TEXTSCAN documentation. -formatSpec = '%*s%*s%*s%s%*s%*s%*s%*s%s%s%*s%s%*s%s%*s%*s%*s%*s%*s%s%s%s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%*s%s%[^\n\r]'; - -%% Open the text file. -fileID = fopen(filename,'r'); - -%% Read columns of data according to the format. -% This call is based on the structure of the file used to generate this -% code. If an error occurs for a different file, try regenerating the code -% from the Import Tool. -dataArray = textscan(fileID, formatSpec, 'Delimiter', delimiter, 'TextType', 'string', 'HeaderLines' ,startRow-1, 'ReturnOnError', false, 'EndOfLine', '\r\n'); - -%% Close the text file. -fclose(fileID); - -%% Convert the contents of columns containing numeric text to numbers. -% Replace non-numeric text with NaN. -raw = repmat({''},length(dataArray{1}),length(dataArray)-1); -for col=1:length(dataArray)-1 - raw(1:length(dataArray{col}),col) = mat2cell(dataArray{col}, ones(length(dataArray{col}), 1)); -end -numericData = NaN(size(dataArray{1},1),size(dataArray,2)); - -% Converts text in the input cell array to numbers. Replaced non-numeric -% text with NaN. -rawData = dataArray{8}; -for row=1:size(rawData, 1) - % Create a regular expression to detect and remove non-numeric prefixes and - % suffixes. - regexstr = '(?.*?)(?([-]*(\d+[\,]*)+[\.]{0,1}\d*[eEdD]{0,1}[-+]*\d*[i]{0,1})|([-]*(\d+[\,]*)*[\.]{1,1}\d+[eEdD]{0,1}[-+]*\d*[i]{0,1}))(?.*)'; - try - result = regexp(rawData(row), regexstr, 'names'); - numbers = result.numbers; - - % Detected commas in non-thousand locations. - invalidThousandsSeparator = false; - if numbers.contains(',') - thousandsRegExp = '^\d+?(\,\d{3})*\.{0,1}\d*$'; - if isempty(regexp(numbers, thousandsRegExp, 'once')) - numbers = NaN; - invalidThousandsSeparator = true; - end - end - % Convert numeric text to numbers. - if ~invalidThousandsSeparator - numbers = textscan(char(strrep(numbers, ',', '')), '%f'); - numericData(row, 8) = numbers{1}; - raw{row, 8} = numbers{1}; - end - catch - raw{row, 8} = rawData{row}; - end -end - - -%% Split data into numeric and string columns. -rawNumericColumns = raw(:, 8); -rawStringColumns = string(raw(:, [1,2,3,4,5,6,7,9])); - - -%% Replace non-numeric cells with NaN -R = cellfun(@(x) ~isnumeric(x) && ~islogical(x),rawNumericColumns); % Find non-numeric cells -rawNumericColumns(R) = {NaN}; % Replace non-numeric cells - -%% Make sure any text containing is properly converted to an categorical -for catIdx = [1,2,4,5,6,7,8] - idx = (rawStringColumns(:, catIdx) == ""); - rawStringColumns(idx, catIdx) = ""; -end - -%% Create output variable -image03 = table; -image03.subjectkey = categorical(rawStringColumns(:, 1)); -image03.comments_misc = categorical(rawStringColumns(:, 2)); -image03.image_file = rawStringColumns(:, 3); -image03.image_description = categorical(rawStringColumns(:, 4)); -image03.scan_type = categorical(rawStringColumns(:, 5)); -image03.scanner_manufacturer_pd = categorical(rawStringColumns(:, 6)); -image03.scanner_type_pd = categorical(rawStringColumns(:, 7)); -image03.scanner_software_versions_pd = cell2mat(rawNumericColumns(:, 1)); -image03.visit = categorical(rawStringColumns(:, 8)); - -%% Clear temporary variables -clearvars filename delimiter startRow formatSpec fileID dataArray ans raw col numericData rawData row regexstr result numbers invalidThousandsSeparator thousandsRegExp rawNumericColumns rawStringColumns R catIdx idx; diff --git a/mapping.mat b/mapping.mat deleted file mode 100644 index 0adaa77..0000000 Binary files a/mapping.mat and /dev/null differ diff --git a/nda_aws_token_maker.py b/nda_aws_token_maker.py deleted file mode 100755 index 7ec9635..0000000 --- a/nda_aws_token_maker.py +++ /dev/null @@ -1,39 +0,0 @@ -#! /usr/bin/env python - -from nda_aws_token_generator import * -import getpass - -web_service_url = 'https://ndar.nih.gov/DataManager/dataManager' -#username = raw_input('Enter your NIMH Data Archives username:') -#password = getpass.getpass('Enter your NIMH Data Archives password:') -username = "FILL_IN_YOUR_NDA_USERNAME" -password = "FILL_IN_YOUR_NDA_PASSWORD" - -generator = NDATokenGenerator(web_service_url) - -token = generator.generate_token(username,password) - -print('aws_access_key_id=%s\n' - 'aws_secret_access_key=%s\n' - 'security_token=%s\n' - 'expiration=%s\n' - %(token.access_key, - token.secret_key, - token.session, - token.expiration) - ) - -import os -from configparser import ConfigParser -parser = ConfigParser() -parser.read(os.path.expanduser('~/.aws/credentials')) - -if not parser.has_section('NDA'): - parser.add_section('NDA') - -parser.set('NDA', 'aws_access_key_id', token.access_key) -parser.set('NDA', 'aws_secret_access_key', token.secret_key) -parser.set('NDA', 'aws_session_token', token.session) - -with open (os.path.expanduser('~/.aws/credentials'), 'w') as configfile: - parser.write(configfile) diff --git a/run_eta_squared.sh b/run_eta_squared.sh deleted file mode 100755 index 88c1920..0000000 --- a/run_eta_squared.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh -# script for execution of deployed applications -# -# Sets up the MATLAB Runtime environment for the current $ARCH and executes -# the specified command. -# -exe_name=$0 -exe_dir=`dirname "$0"` -if [ ! -d $TMPDIR/$USER ]; then - mkdir $TMPDIR/$USER -fi -export MCR_CACHE_ROOT=$TMPDIR/$USER -echo "------------------------------------------" -if [ "x$1" = "x" ]; then - echo Usage: - echo $0 \ args -else - echo Setting up environment variables - MCRROOT="$1" - echo --- - LD_LIBRARY_PATH=.:${MCRROOT}/runtime/glnxa64 ; - LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/bin/glnxa64 ; - LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/sys/os/glnxa64; - LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MCRROOT}/sys/opengl/lib/glnxa64; - export LD_LIBRARY_PATH; - echo LD_LIBRARY_PATH is ${LD_LIBRARY_PATH}; - shift 1 - args= - while [ $# -gt 0 ]; do - token=$1 - args="${args} \"${token}\"" - shift - done - eval "\"${exe_dir}/eta_squared\"" $args -fi -exit - diff --git a/run_order_fix.py b/run_order_fix.py deleted file mode 100755 index 37fc2ff..0000000 --- a/run_order_fix.py +++ /dev/null @@ -1,266 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import datetime -import json -import os -import re -import shutil -import tempfile -from collections import OrderedDict - -taskmatch = re.compile('^.*task-([A-z0-9]+)_run-(\d+).*.nii.gz$') - - -def _cli(): - parser = generate_parser() - args = parser.parse_args() - if args.all: - args.get_bids_errors = True - args.generate_map = True - args.execute_swap = True - - assert args.get_bids_errors or args.generate_map or args.execute_swap, \ - 'INPUT: no mode selected!' - - # run stages - if args.get_bids_errors: - bids_input = args.bids_input - output_json = args.error_json - subject_list = args.subject - - get_bids_errors(bids_input, output_json, subject_list) - - if args.generate_map: - bids_input = args.bids_input - input_json = args.error_json - output_map = args.file_map - - get_bids_errors_correction_map(input_json, output_map, bids_input) - - if args.execute_swap: - input_map = args.file_map - - swap_files(input_map) - - -def generate_parser(): - - parser = argparse.ArgumentParser( - description='can be chained together. For full pipe: ' - 'run_order_fix.py --get-bids-errors --generate-map ' - '--execute-swap ... inputs', - epilog='Example call: ./run_order_fix.py ' - '/mnt/max/shared/projects/ABCD/example_BIDS error.json map.json ' - '--all --subject [NDARINVXXXXXX|sub-NDARINVXXXXXX]' - ) - modes = parser.add_argument_group( - title='modes', - description='flags to execute each of three stages.') - modes.add_argument( - '--get-bids-errors', action='store_true', - help='records all fmri mismatches in the json file.' - ) - modes.add_argument( - '--generate-map', action='store_true', - help='creates a map between errors and true files to swap in the swap ' - 'folder.' - ) - modes.add_argument( - '--execute-swap', action='store_true', - help='uses output map file' - ) - modes.add_argument( - '--all', action='store_true', - help='runs start to finish.' - ) - parser.add_argument( - '--subject', nargs='+', - help='optional subject list to narrow down bids inputs. ONLY USED ' - 'DURING GET BIDS ERRORS' - ) - parser.add_argument( - 'bids_input', default=None, - help='path to bids input folder to detect errors. Will also fix if ' - '--fix-bids-errors flag is turned on.' - ) - parser.add_argument( - 'error_json', default='bids_errors.json', - help='path to bids error json file (output/input depending on mode). ' - 'Generated by the --get-bids-errors flag' - ) - parser.add_argument( - 'file_map', default='file_map.json', - help='map of files to be swapped around, in format {before: after}, ' - 'generated by the --generate-map flag' - ) - # parser.add_argument( - # '--bids-output', - # help='path to bids output folder to fix. Should not be input if the ' - # 'intent is to fix the bids-input folder.' - # ) - # parser.add_argument( - # '--short-json', - # help='optional path to an output file which will have no filenames.' - # ) - - return parser - - -def get_bids_errors(bids_input, output_json, subject_list=None, detailed=False): - - if subject_list: - subject_list = ['sub-%s' % x if not x.startswith('sub-') else x for x - in subject_list] - - func_folders = get_func_folders(bids_input) - - submatch = re.compile('^.*(sub-[A-z0-9]*).*$') - structured_output = {} - - for folder in func_folders: - subject = submatch.match(folder).group(1) - if subject_list and subject not in subject_list: - continue - print(subject) - contents = os.listdir(folder) - # sort contents - tasks = task_splitter(contents) - # filenames - new = True - for name, task_set in tasks.items(): - task_set = sorted(task_set) - run_nums = [int(taskmatch.match(t).group(2)) for t in task_set] - files = [os.path.join(folder, t) for t in task_set] - acq_times = [acquisition_time(f) for f in files] - order = sorted(range(0, len(acq_times)), - key=acq_times.__getitem__) - order = [1 + n for n in order] - if run_nums == order: - if detailed: - structured_output[subject][name] = 'correct' - continue - if new: - structured_output[subject] = {} - new = False - structured_output[subject][name] = {} - structured_output[subject][name]['current_order'] = run_nums - structured_output[subject][name]['actual_order'] = order - - if output_json: - if os.path.exists(output_json): - os.remove(output_json) - with open(output_json, 'w') as fd: - json.dump(structured_output, fd) - - -def get_bids_errors_correction_map(input_json, output_map, bids_input=None): - with open(input_json) as fd: - jso = json.load(fd) - - mapping = OrderedDict() - - for subject, tasks in jso.items(): - subject_folder = bids_input + '/{subject}/{session}'.format( - subject=subject, session='ses-baselineYear1Arm1') - for name, map_data in tasks.items(): - if map_data == 'correct': - continue - else: - if name != 'rest': - print('subject %s on %s has bad task data!' % (subject, - name)) - - current_order = map_data['current_order'] - actual_order = map_data['actual_order'] - if bids_input: - current_names = ['task-%s_run-0%s' % (name, i) for i in - current_order] - actual_names = ['task-%s_run-0%s' % (name, i) for i in - actual_order] - else: - current_names = ['task-%s0%s' % (name, i) for i in - current_order] - actual_names = ['task-%s0%s' % (name, i) for i in - actual_order] - for (current, end) in zip(current_names, actual_names): - mapping.update( - generate_file_map(subject_folder, current, end)) - - if os.path.exists(output_map): - os.remove(output_map) - with open(output_map, 'w') as fd: - json.dump(mapping, fd, indent=4) - - -def swap_files(json_file): - - with open(json_file) as fd: - file_mapper = json.load(fd) - file_mapper = OrderedDict(sorted(file_mapper.items())) - swapped = [] - _, tmp = tempfile.mkstemp() - for before, after in file_mapper.items(): - if before in swapped: - continue - try: - shutil.move(before, tmp) - except: - print('failed to move %s' % before) - raise - try: - shutil.move(after, before) - except: - print('failed to move %s' % after) - shutil.move(tmp, before) - raise - shutil.move(tmp, after) - print('swapped %s with %s' % (before, after)) - swapped.append(after) - - -def generate_file_map(subject_directory, current_task, end_task, - tempspace=None): - file_map = {} - task_directory = subject_directory - - for pathspec in os.walk(task_directory): - for filepath in (os.path.join(pathspec[0], f) for f in pathspec[2]): - end_filepath = filepath.replace(current_task, end_task) - if filepath != end_filepath: - file_map[filepath] = end_filepath - - return file_map - - -def get_func_folders(bids_input): - full_paths = os.walk(bids_input) - func_paths = filter(lambda x: os.path.basename(x[0]) == 'func', full_paths) - func_paths = (i[0] for i in func_paths) - # func_paths = itertools.islice(func_paths, 10) - return func_paths - - -def task_splitter(filenames): - tasks = filter(lambda x: x, (taskmatch.match(x) for x in filenames)) - task_dict = {} - for t in tasks: - name = t.group(1) - if name in task_dict.keys(): - task_dict[name] += [t.string] - else: - task_dict[name] = [t.string] - - return task_dict - - -def acquisition_time(filename): - sidecar = filename[:-7] + '.json' - with open(sidecar) as fd: - jso = json.load(fd) - time = datetime.datetime.strptime(jso['AcquisitionTime'], - '%H:%M:%S.%f').time() - return time - - -if __name__ == '__main__': - _cli() diff --git a/sefm_eval_and_json_editor.py b/sefm_eval_and_json_editor.py deleted file mode 100755 index 8d611be..0000000 --- a/sefm_eval_and_json_editor.py +++ /dev/null @@ -1,329 +0,0 @@ -#! /home/exacloud/lustre1/fnl_lab/code/external/utilities/exahead1-anaconda3/bin/python3 - -import os, sys, glob, argparse, subprocess, socket, operator, shutil, json -from bids.grabbids import BIDSLayout -from itertools import product - -os.environ['FSLOUTPUTTYPE'] = 'NIFTI_GZ' - -# Last modified -last_modified = "Created by Anders Perrone 3/21/2017. Last modified by Eric Earl 8/29/2018" - -# Program description -prog_descrip = """%(prog)s: sefm_eval pairs each of the pos/neg sefm and returns the pair that is most representative - of the average by calculating the eta squared value for each sefm pair to the average sefm.""" + last_modified - -# Get path to the FSL directory, compiled Matlab eta squared function, and Matlab Runtime Environment -try: - if socket.gethostname() == 'rushmore': - FSL_DIR = '/usr/share/fsl/5.0' - ETA_DIR = '/mnt/max/shared/utilities/compiled_Matlab_code' - MRE = '/mnt/max/shared/code/external/utilities/Matlab2016bRuntime/v91' - elif 'exa' in socket.gethostname(): - FSL_DIR = '/opt/installed/fsl-5.0.10' - ETA_DIR = '/home/exacloud/tempwork/fnl_lab/code/internal/utilities/ABCD' - MRE = '/home/exacloud/tempwork/fnl_lab/code/external/utilities/matlab_runtime/v91' - else: - print('This script has not yet been configured to run on ' + socket.gethostname() + ', sorry.') -except: - print("Error: Host not recognized") -finally: - os.environ['FSL_DIR'] = FSL_DIR - os.environ['FSLDIR'] = FSL_DIR - # for this script's usage of FSL_DIR... - FSL_DIR = FSL_DIR + '/bin/' - - -def read_bids_layout(layout, subject_list=None, collect_on_subject=False): - """ - :param bids_input: path to input bids folder - :param subject_list: a list of subject ids to filter on - :param collect_on_subject: collapses all sessions, for cases with - non-longitudinal data spread across scan sessions. - """ - - subjects = layout.get_subjects() - - # filter subject list - if isinstance(subject_list, list): - subjects = [s for s in subjects if s in subject_list] - elif isinstance(subject_list, dict): - subjects = [s for s in subjects if s in subject_list.keys()] - - subsess = [] - # filter session list - for s in subjects: - sessions = layout.get_sessions(subject=s) - if not sessions: - subsess += [(s, 'session')] - elif collect_on_subject: - subsess += [(s, sessions)] - else: - subsess += list(product([s], sessions)) - - assert len(subsess), 'bids data not found for participants. If labels ' \ - 'were provided, check the participant labels for errors. ' \ - 'Otherwise check that the bids folder provided is correct.' - - return subsess - - -def sefm_select(layout, subject, sessions, base_temp_dir, debug=False): - pos = 'PA' - neg = 'AP' - - # Make a temporary working directory - temp_dir = os.path.join(base_temp_dir, subject + '_eta_temp') - try: - os.mkdir(temp_dir) - except: - print(temp_dir + " already exists") - pass - - print("Pairing for subject " + subject + ":") - print(subject, sessions) - fmap = layout.get(subject=subject, session=sessions, modality='fmap', extensions='.nii.gz') - print(fmap) - if len(fmap): - list_pos = [x.filename for i, x in enumerate(fmap) if 'dir-PA' in x.filename] - list_neg = [x.filename for i, x in enumerate(fmap) if 'dir-AP' in x.filename] - - try: - len(list_pos) == len(list_neg) - except: - print("Error: There are a mismatched number of SEFMs. This should never happen!") - - pairs = [] - for pair in zip(list_pos, list_neg): - pairs.append(pair) - - pos_ref = pairs[0][0] - neg_ref = pairs[0][1] - - print("Aligning SEFMs and creating template") - for i, pair in enumerate(pairs): - pos_input = pair[0] - neg_input = pair[1] - for pedir,ref,flirt_in in [(pos,pos_ref,pos_input),(neg,neg_ref,neg_input)]: - out = os.path.join(temp_dir,'init_' + pedir + '_reg_' + str(i) + '.nii.gz') - cmd = [FSL_DIR + 'flirt', '-in', flirt_in, '-ref', ref, '-dof', str(6), '-out', out] - subprocess.run(cmd, stdout=subprocess.DEVNULL, env=os.environ) - - # Average the pos/neg SEFMs after alignment - - # First sum all of the images together - for pedir in [pos,neg]: - sum_cmd = [os.path.join(FSL_DIR,'fslmaths'), os.path.join(temp_dir,'init_' + pedir + '_reg_0.nii.gz')] - for i in range(1,len(pairs)): - sum_cmd += ['-add', os.path.join(temp_dir,'init_' + pedir + '_reg_' + str(i) + '.nii.gz')] - sum_cmd += [os.path.join(temp_dir, pedir + '_sum.nii.gz')] - subprocess.run(sum_cmd, env=os.environ) - - # Divide the sum by the number of pos/neg SEFMs to get the average - num_sefm = len(pairs) - for pedir in [pos,neg]: - avg_cmd = [os.path.join(FSL_DIR, 'fslmaths'), os.path.join(temp_dir, pedir + '_sum.nii.gz'), '-div', str(num_sefm), os.path.join(temp_dir,pedir + '_mean.nii.gz')] - subprocess.run(avg_cmd, env=os.environ) - - print("Computing ETA squared value for each image to the template") - - # Calculate the eta squared value of each aligned image to the average and return the pair with the highest average - #avg_eta_dict = {} - min_eta_dict = {} - for i, pair in enumerate(pairs): - eta_list = [] - for pedir,image in [(pos,pair[0]),(neg,pair[1])]: - mat_cmd = [os.path.join(ETA_DIR,'run_eta_squared.sh'), MRE, os.path.join(temp_dir,'init_' + pedir + '_reg_' + str(i) + '.nii.gz'), os.path.join(temp_dir,pedir + '_mean.nii.gz')] - mat_stdout = subprocess.check_output(mat_cmd) - eta = float(mat_stdout.split()[-1]) - print(image + " eta value = " + str(eta)) - eta_list.append(eta) - # instead of finding the average between eta values between pairs. Take the pair with the highest lowest eta value. - min_eta = min(eta_list) - min_eta_dict[pair] = min_eta - best_pos, best_neg = max(min_eta_dict, key=min_eta_dict.get) - print(best_pos) - print(best_neg) - - # Add metadata - func_list = [x.filename for x in layout.get(subject=subject, session=sessions, modality='func', extensions='.nii.gz')] - anat_list = [x.filename for x in layout.get(subject=subject, session=sessions, modality='anat', extensions='.nii.gz')] - for pair in pairs: - pos_nifti = pair[0] - neg_nifti = pair[1] - pos_json = pos_nifti.replace(".nii.gz", ".json") - neg_json = neg_nifti.replace(".nii.gz", ".json") - insert_edit_json(pos_json, "PhaseEncodingDirection", "j") - insert_edit_json(neg_json, "PhaseEncodingDirection", "j-") - - if pair == (best_pos, best_neg): - insert_edit_json(pos_json, "IntendedFor", anat_list + func_list) - insert_edit_json(neg_json, "IntendedFor", anat_list + func_list) - else: - insert_edit_json(pos_json, "IntendedFor", []) - insert_edit_json(neg_json, "IntendedFor", []) - - - # Delete the temp directory containing all the intermediate images - if not debug: - rm_cmd = ['rm', '-rf', temp_dir] - subprocess.run(rm_cmd, env=os.environ) - - print("Success! Best SEFM pair has been chosen and linked in " + subject + "'s nifti directory.") - - return best_pos, best_neg - -def seperate_concatenated_fm(bids_layout, subject, session): - print("actually running") - fmap = bids_layout.get(subject=subject, session=session, modality='fmap', extensions='.nii.gz') - # use the first functional image as the reference for the nifti header after fslswapdim - func_ref = bids_layout.get(subject=subject, session=session, modality='func', extensions='.nii.gz')[0].filename - print("functional reference: {}".format(func_ref)) - for FM in [x.filename for x in fmap]: - subject_dir = os.path.dirname(FM) - if "-both_" in FM: - print("Splitting up {}".format(FM)) - AP_filename = FM.replace("-both_", "-AP_") - PA_filename = FM.replace("-both_", "-PA_") - split = [FSL_DIR + "/fslsplit", FM, subject_dir + "/vol" ,"-t"] - subprocess.run(split, env=os.environ) - swap_dim = [FSL_DIR + "/fslswapdim", subject_dir + "/vol0000.nii.gz" ,"x", "-y", "z", subject_dir + "/vol0000.nii.gz"] - subprocess.run(swap_dim, env=os.environ) - os.rename(subject_dir + "/vol0000.nii.gz",AP_filename) - os.rename(subject_dir + "/vol0001.nii.gz",PA_filename) - AP_flirt = [FSL_DIR + "/flirt", "-out", AP_filename, "-in", AP_filename, "-ref", func_ref, "-applyxfm", "-init", "/home/exacloud/tempwork/fnl_lab/code/internal/utilities/ABCD/FSL_identity_transformation_matrix.mat", "-interp", "spline"] - PA_flirt = [FSL_DIR + "/flirt", "-out", PA_filename, "-in", PA_filename, "-ref", func_ref, "-applyxfm", "-init", "/home/exacloud/tempwork/fnl_lab/code/internal/utilities/ABCD/FSL_identity_transformation_matrix.mat", "-interp", "spline"] - subprocess.run(AP_flirt, env=os.environ) - subprocess.run(PA_flirt, env=os.environ) - - # create the side car jsons for the new pair - orig_json = FM.replace(".nii.gz", ".json") - AP_json = AP_filename.replace(".nii.gz", ".json") - PA_json = PA_filename.replace(".nii.gz", ".json") - shutil.copyfile(orig_json, AP_json) - shutil.copyfile(orig_json, PA_json) - insert_edit_json(orig_json, 'PhaseEncodingDirection', 'NA') - insert_edit_json(AP_json, 'PhaseEncodingDirection', 'j-') - insert_edit_json(PA_json, 'PhaseEncodingDirection', 'j') - # add required fields to the orig json as well - insert_edit_json(orig_json, 'IntendedFor', []) - return - -def insert_edit_json(json_path, json_field, value): - with open(json_path, 'r+') as f: - data = json.load(f) - data[json_field] = value - f.seek(0) - json.dump(data, f, indent=4) - f.truncate - return - - -def generate_parser(parser=None): - """ - Generates the command line parser for this program. - :param parser: optional subparser for wrapping this program as a submodule. - :return: ArgumentParser for this script/module - """ - if not parser: - parser = argparse.ArgumentParser( - description=prog_descrip - ) - parser.add_argument( - 'bids_dir', - help='path to the input bids dataset root directory. It is recommended to use ' - 'the dcan bids gui or Dcm2Bids to convert from participant dicoms.' - ) - parser.add_argument( - '--participant-label', dest='subject_list', metavar='ID', nargs='+', - help='optional list of participant ids to run. Default is all ids ' - 'found under the bids input directory. A participant label ' - 'does not include "sub-"' - ) - parser.add_argument( - '-a','--all-sessions', dest='collect', action='store_true', - help='collapses all sessions into one when running a subject.' - ) - parser.add_argument( - '-d', '--debug', dest='debug', action='store_true', default=False, - help='debug mode, leaves behind the "eta_temp" directory.' - ) - parser.add_argument( - '-v', '--version', action='version', version=last_modified, - help="Return script's last modified date." - ) - - return parser - - -def main(argv=sys.argv): - parser = generate_parser() - args = parser.parse_args() - - # Load the bids layout - layout = BIDSLayout(args.bids_dir) - subsess = read_bids_layout(layout, subject_list=args.subject_list, collect_on_subject=args.collect) - print(subsess) - - for subject,sessions in subsess: - # fmap directory = base dir - fmap = layout.get(subject=subject, session=sessions, modality='fmap', extensions='.nii.gz') - base_temp_dir = os.path.dirname(fmap[0].filename) - - # Check if fieldmaps are concatenated - print(fmap[0].filename) - print("-both_" in fmap[0].filename) - if "-both_" in fmap[0].filename: - print("Running seperate_concatenate_fm") - seperate_concatenated_fm(layout, subject, sessions) - # recreate layout with the additional SEFMS - layout = BIDSLayout(args.bids_dir) - - # Return a list of each SEFM pos/neg pair - bes_pos, best_neg = sefm_select(layout, subject, sessions, base_temp_dir, args.debug) - - # Additional edits to the anat json sidecar - anat = layout.get(subject=subject, session=sessions, modality='anat', extensions='.nii.gz') - for TX in [x.filename for x in anat]: - TX_json = TX.replace('.nii.gz', '.json') - TX_metadata = layout.get_metadata(TX) - #if 'T1' in TX_metadata['SeriesDescription']: - if 'Philips' in TX_metadata['Manufacturer']: - insert_edit_json(TX_json, 'DwellTime', 0.00062771) - if 'GE' in TX_metadata['Manufacturer']: - insert_edit_json(TX_json, 'DwellTime', 0.000536) - if 'Siemens' in TX_metadata['Manufacturer']: - insert_edit_json(TX_json, 'DwellTime', 0.00051001152626) - - # add EffectiveEchoSpacing if it doesn't already exist - fmap = layout.get(subject=subject, session=sessions, modality='fmap', extensions='.nii.gz') - for sefm in [x.filename for x in fmap]: - sefm_json = sefm.replace('.nii.gz', '.json') - sefm_metadata = layout.get_metadata(sefm) - if 'Philips' in sefm_metadata['Manufacturer']: - insert_edit_json(sefm_json, 'EffectiveEchoSpacing', 0.00062771) - if 'GE' in sefm_metadata['Manufacturer']: - insert_edit_json(sefm_json, 'EffectiveEchoSpacing', 0.000536) - if 'Siemens' in sefm_metadata['Manufacturer']: - insert_edit_json(sefm_json, 'EffectiveEchoSpacing', 0.00051001152626) - - # PE direction vs axis - func = layout.get(subject=subject, session=sessions, modality='func', extensions='.nii.gz') - for task in [x.filename for x in func]: - task_json = task.replace('.nii.gz', '.json') - task_metadata = layout.get_metadata(task) - print('Inserting PE into func') - if "PhaseEncodingAxis" in task_metadata: - print('Adding PEDirection') - print(task_json) - print('PhaseEncodingDirection') - print(task_metadata['PhaseEncodingAxis']) - insert_edit_json(task_json, 'PhaseEncodingDirection', task_metadata['PhaseEncodingAxis']) - elif "PhaseEncodingDirection" in task_metadata: - insert_edit_json(task_json, 'PhaseEncodingAxis', task_metadata['PhaseEncodingDirection']) - - -if __name__ == "__main__": - sys.exit(main()) - - diff --git a/unpack_and_setup.sh b/unpack_and_setup.sh deleted file mode 100755 index d19fe44..0000000 --- a/unpack_and_setup.sh +++ /dev/null @@ -1,142 +0,0 @@ -#! /bin/bash - -set -e - -# Given a subject ID, session, and tgz directory: -# 1) Copy all tgzs to compute node's disk -# 2) Unpack tgzs -# 3) Convert dcms to niftis in BIDS -# 4) Select the best SEFM -# 5) Rename and move Eprime files -# 6) Copy back to Lustre - -## Necessary dependencies -# dcm2bids (https://github.com/DCAN-Labs/Dcm2Bids) -# microgl_lx (https://github.com/rordenlab/dcm2niix) -# pigz-2.4 (https://zlib.net/pigz) -# run_order_fix.py (in this repo) -# sefm_eval_and_json_editor.py (in this repo) - -ScratchSpaceDir=/tmp/abcd-dicom2bids -ROOT_BIDSINPUT=./ABCD-HCP -SUB=$1 # Full BIDS formatted subject ID (sub-SUBJECTID) -VISIT=$2 # Full BIDS formatted session ID (ses-SESSIONID) -TGZDIR=$3 # Path to directory containing all .tgz for this subject's session - -participant=`echo ${SUB} | sed 's|sub-||'` -session=`echo ${VISIT} | sed 's|ses-||'` - - -date -hostname -echo ${SLURM_JOB_ID} - -# Setup scratch space directory -if [ ! -d ${ScratchSpaceDir} ]; then - mkdir -p ${ScratchSpaceDir} - # chown :fnl_lab ${ScratchSpaceDir} || true - chmod 770 ${ScratchSpaceDir} || true -fi -RandomHash=`cat /dev/urandom | tr -cd 'a-f0-9' | head -c 16` -TempSubjectDir=${ScratchSpaceDir}/${RandomHash} -mkdir -p ${TempSubjectDir} -# chown :fnl_lab ${TempSubjectDir} || true -echo "TempSubjectDir = ${TempSubjectDir}" - -# copy all tgz to the scratch space dir -echo `date`" :COPYING TGZs TO SCRATCH: ${TempSubjectDir}" -cp ${TGZDIR}/* ${TempSubjectDir} - -# unpack tgz to ABCD_DCMs directory -mkdir ${TempSubjectDir}/DCMs -echo `date`" :UNPACKING DCMs: ${TempSubjectDir}/DCMs" -for tgz in ${TempSubjectDir}/*.tgz; do - echo $tgz - tar -xzf ${tgz} -C ${TempSubjectDir}/DCMs -done - - -# # IMPORTANT PATH DEPENDENCY VARIABLES AT OHSU IN SLURM CLUSTER -# export PATH=.../anaconda2/bin:${PATH} # relevant Python path with dcm2bids -# export PATH=.../mricrogl_lx/:${PATH} # relevant dcm2niix path -# export PATH=.../pigz-2.4/:${PATH} # relevant pigz path for improved (de)compression - - -# convert DCM to BIDS and move to ABCD directory -mkdir ${TempSubjectDir}/BIDS_unprocessed -echo ${participant} -echo `date`" :RUNNING dcm2bids" -dcm2bids -d ${TempSubjectDir}/DCMs/${SUB} -p ${participant} -s ${session} -c ./abcd_dcm2bids.conf -o ${TempSubjectDir}/BIDS_unprocessed --forceDcm2niix --clobber - -echo `date`" :CHECKING BIDS ORDERING OF EPIs" -if [ -e ${TempSubjectDir}/BIDS_unprocessed/${SUB}/${VISIT}/func ]; then - if [ `./run_order_fix.py ${TempSubjectDir}/BIDS_unprocessed ${TempSubjectDir}/bids_order_error.json ${TempSubjectDir}/bids_order_map.json --all --subject ${SUB}` == ${SUB} ]; then - echo BIDS correctly ordered - else - echo ERROR: BIDS incorrectly ordered even after running run_order_fix.py - exit - fi -else - echo ERROR: No functional images found T1 only processing not yet enabeled - exit -fi - -# select best fieldmap and update sidecar jsons -echo `date`" :RUNNING SEFM SELECTION AND EDITING SIDECAR JSONS" -./sefm_eval_and_json_editor.py ${TempSubjectDir}/BIDS_unprocessed/${SUB} --participant-label=${participant} - -rm ${TempSubjectDir}/BIDS_unprocessed/${SUB}/ses-baselineYear1Arm1/fmap/*dir-both* 2> /dev/null || true - -# rename EventRelatedInformation -echo `date`" :COPY AND RENAME SOURCE DATA" -srcdata_dir=${TempSubjectDir}/BIDS_unprocessed/sourcedata/${SUB}/ses-baselineYear1Arm1/func -if ls ${TempSubjectDir}/DCMs/${SUB}/ses-baselineYear1Arm1/func/*EventRelatedInformation.txt > /dev/null 2>&1; then - mkdir -p ${srcdata_dir} -fi -MID_evs=`ls ${TempSubjectDir}/DCMs/${SUB}/ses-baselineYear1Arm1/func/*MID*EventRelatedInformation.txt 2>/dev/null` -SST_evs=`ls ${TempSubjectDir}/DCMs/${SUB}/ses-baselineYear1Arm1/func/*SST*EventRelatedInformation.txt 2>/dev/null` -nBack_evs=`ls ${TempSubjectDir}/DCMs/${SUB}/ses-baselineYear1Arm1/func/*nBack*EventRelatedInformation.txt 2>/dev/null` -if [ `echo ${MID_evs} | wc -w` -eq 2 ]; then - i=1 - for ev in ${MID_evs}; do - cp ${ev} ${srcdata_dir}/${SUB}_ses-baselineYear1Arm1_task-MID_run-0${i}_bold_EventRelatedInformation.txt - ((i++)) - done -fi -if [ `echo ${SST_evs} | wc -w` -eq 2 ]; then - i=1 - for ev in ${SST_evs}; do - cp ${ev} ${srcdata_dir}/${SUB}_ses-baselineYear1Arm1_task-SST_run-0${i}_bold_EventRelatedInformation.txt - ((i++)) - done -fi -if [ `echo ${nBack_evs} | wc -w` -eq 2 ]; then - i=1 - for ev in ${nBack_evs}; do - cp ${ev} ${srcdata_dir}/${SUB}_ses-baselineYear1Arm1_task-nback_run-0${i}_bold_EventRelatedInformation.txt - ((i++)) - done -fi - -echo `date`" :COPYING SOURCE AND SORTED DATA BACK: ${ROOT_BIDSINPUT}" - -TEMPBIDSINPUT=${TempSubjectDir}/BIDS_unprocessed/${SUB} -if [ -d ${TEMPBIDSINPUT} ] ; then - echo `date`" :CHMOD BIDS INPUT" - chmod g+rw -R ${TEMPBIDSINPUT} || true - echo `date`" :COPY BIDS INPUT" - mkdir -p ${ROOT_BIDSINPUT} - cp -r ${TEMPBIDSINPUT} ${ROOT_BIDSINPUT}/ -fi - -ROOT_SRCDATA=${ROOT_BIDSINPUT}/sourcedata -TEMPSRCDATA=${TempSubjectDir}/BIDS_unprocessed/sourcedata/${SUB} -if [ -d ${TEMPSRCDATA} ] ; then - echo `date`" :CHMOD SOURCEDATA" - chmod g+rw -R ${TEMPSRCDATA} || true - echo `date`" :COPY SOURCEDATA" - mkdir -p ${ROOT_SRCDATA} - cp -r ${TEMPSRCDATA} ${ROOT_SRCDATA}/ -fi - -echo `date`" :UNPACKING AND SETUP COMPLETE: ${SUB}/${VISIT}"