diff --git a/abcd_basic_proc.m b/abcd_basic_proc.m
new file mode 100755
index 0000000..90298d7
--- /dev/null
+++ b/abcd_basic_proc.m
@@ -0,0 +1,137 @@
+% abcd_basic_proc.m
+% Written by Nikhil Goyal, National Institute of Mental Health, 2019-2020
+% Created: June 2020
+% Modified: 7/1/2020
+
+% CCA processing script for the ABCD connectome and SM data
+% Required inputs are the NET.txt and VARS.txt files.
+
+function abcd_basic_proc(N_perm, N_dim, abcd_cca_dir, n_subs)
+    if nargin<4
+        sprintf("ERROR, not enough arguments.")
+        sprintf("Example: abcd_basic_proc(100000, 70, '/data/ABCD_MBDU/goyaln2/abcd_cca_replication/', 5013)")
+        return
+	end
+	
+	%% Add the dependencies folders to the PATH, and read in necessary data
+	if ~isdeployed
+		addpath(genpath(sprintf('%s/dependencies/', abcd_cca_dir)));
+		addpath(genpath(sprintf('%s/data/', abcd_cca_dir)));
+	end
+
+	% Read in data, set some variables, create confounds matrix
+	% VARS_0 = Subjects X SMs text file
+	VARS_0=strcsvread(sprintf('%s/data/%d/VARS.txt', abcd_cca_dir, n_subs));
+
+	% Load the Subjects X Nodes matrix (should be size Nx19900)
+	N0=load(sprintf('%s/data/%d/NET.txt', abcd_cca_dir, n_subs));   
+	
+	% Load list of SMs to be used in ICA (this list is made manually)
+	ica_sms_0=fileread(sprintf('%s/data/ica_subject_measures.txt', abcd_cca_dir));
+	ica_sms = strsplit(ica_sms_0);
+
+	% Load list of names of colums used to encode scanner ID
+	scanner_col_names_0=fileread(sprintf('%s/data/%d/scanner_confounds.txt', abcd_cca_dir, n_subs));
+	scanner_col_names = strsplit(scanner_col_names_0);
+
+	% Drop subject col and device serial number col (they are strings)
+	egid_col    = find(strcmpi(VARS_0(1,:),'subjectid'));
+	serial_col  = find(strcmpi(VARS_0(1,:),'mri_info_device.serial.number'));
+	VARS_0(:,[egid_col serial_col])=[];
+
+	% Get column indices of our confound variables
+	[sharedvals,scanner_cols_idx]=intersect(VARS_0(1,:),scanner_col_names);
+	site_col        = find(strcmpi(VARS_0(1,:),'abcd_site'));
+	mri_man_col     = find(strcmpi(VARS_0(1,:),'mri_info_manufacturer'));
+	mean_fd_col     = find(strcmpi(VARS_0(1,:),'mean_fd'));
+	bmi_col         = find(strcmpi(VARS_0(1,:),'anthro_bmi_calc'));
+	weight_col      = find(strcmpi(VARS_0(1,:),'anthro_weight_calc'));
+	wholebrain_col  = find(strcmpi(VARS_0(1,:),'smri_vol_subcort.aseg_wholebrain'));
+	intracran_col   = find(strcmpi(VARS_0(1,:),'smri_vol_subcort.aseg_intracranialvolume'));
+
+	% Get column indices of the ICA SMs
+	[sharedvals,ica_sms_idx]=intersect(VARS_0(1,:),ica_sms);
+	
+	% VARS without column names
+	VARS=cell2mat(VARS_0(2:end,:));
+
+	% --- CREATE CONFOUND MATRIX ---
+	% Set up confounds matrix. Confounds are demeaned, any missing data is set to 0
+	% Confounds are:
+	% 1. Individual scanner IDs (subsitute for abcd_site and mri_info_manufacturer)
+	% 2. mean_fd
+	% 3. anthro_bmi_calc
+	% 4. anthro_weight_calc
+	% 5. smri_vol_subcort.aseg_wholebrain
+	% 6. smri_vol_subcort.aseg_intracranialvolume 
+	% Additional confounds from demeaning and squaring all confounds 
+	conf  = palm_inormal([ VARS(:,scanner_cols_idx) VARS(:,[mean_fd_col bmi_col weight_col]) VARS(:,[wholebrain_col intracran_col]).^(1/3) ]);  % Gaussianise
+	conf(isnan(conf)|isinf(conf)) = 0;                % impute missing data as zeros
+	conf  = nets_normalise([conf conf(:,length(scanner_cols_idx):end).^2]);  % add on squared terms and renormalise (all cols other than those for scanner IDs)
+	conf(isnan(conf)|isinf(conf)) = 0;                % again convert NaN/inf to 0 (above line makes them reappear for some reason)
+	
+	% --- SM PROCESSING ---
+	% Matrix S1 (only ICA sms)
+	S1=[VARS(:,ica_sms_idx)];
+
+	% Now, prepare the final SM matrix and run the PCA
+	% S2, formed by gaussianizing the SMs we keep
+	S2=palm_inormal(S1); % Gaussianise
+
+ 	% Now generate S3, formed by deconfounding the 17 confounds out of S2
+	S3=S2;
+	for i=1:size(S3,2) % deconfound ignoring missing data
+		grot=(isnan(S3(:,i))==0);
+		grotconf=nets_demean(conf(grot,:));
+		S3(grot,i)=normalize(S3(grot,i)-grotconf*(pinv(grotconf)*S3(grot,i)));
+	end
+
+	% Determine how much data is missing:
+	sum(sum(isnan(S3)))/(size(S3,1)*size(S3,2))*100
+
+	% Next, we need to generate S4
+	% First, estimate the SubjectsXSubjects covariance matrix (where for any two subjects, SMs missing for either subject are ignored)
+	% The approximate covariance matrix (varsdCOV) is then projected onto the nearest valid covariance matrix using nearestSPD toolbox.
+	% This method avoids imputation, and S4 can be fed into PCA.
+	S3Cov=zeros(size(S3,1));
+	for i=1:size(S3,1) % estimate "pairwise" covariance, ignoring missing data
+		for j=1:size(S3,1)
+			grot=S3([i j],:);
+			grot=cov(grot(:,sum(isnan(grot))==0)');
+			S3Cov(i,j)=grot(1,2);
+		end
+	end
+	S4=nearestSPD(S3Cov); % project onto the nearest valid covariance matrix. This method avoids imputation (we can't have any missing values before running the PCA)
+
+	% Check the before and after correlation:
+	corrcoef(S4,S3Cov)  %0.9999 for the 5013 subject sample
+
+	% Generate S5, the top eigenvectors for SMs, to avoid overfitting and reduce dimensionality
+	[uu,dd]=eigs(S4,N_dim);       % SVD (eigs actually)
+	S5=uu-conf*(pinv(conf)*uu);   % deconfound again just to be safe
+
+	% --- NETMAT PROCESSING ---
+	fprintf("Calculating netmat matrices N1 through N5\n")
+	% N1, formed by 1. demean, 2. globally variance normalize
+	N1=nets_demean(N0);   % 1. Demean
+	N1=N1/std(N1(:));     % 2. variance normalize
+	% N2, formed by 1. Demean, 2. remove columns that are badly conditions due to low z (z<0.1) mean value, 3. global variance normalize the matrix
+	abs_mean_NET=abs(mean(N0));                             % get mean, take abs val
+	N2=nets_demean(N0./repmat(abs_mean_NET,size(N0,1),1));  % 1. demean
+	N2(:,abs_mean_NET<0.1)=[];                              % 2. remove columns with mean value <0.1                          
+	N2=N2/std(N2(:));                                       % 3. variance normalize
+	% N3, formed by horizontally concat N1 and N2
+	N3=[N1 N2]; % Concat horizontally
+	% N4, formed by regressing the confounds matrix out of N3
+	N4=nets_demean(N3-conf*(pinv(conf)*N3));
+	% N5
+	[N5,ss1,vv1]=nets_svds(N4,N_dim); % PCA of netmat via SVD reduction
+
+	% --- Save matrices ---
+	writematrix(conf, sprintf('%s/data/%d/conf.txt', abcd_cca_dir, n_subs));
+	writematrix(N0, sprintf('%s/data/%d/N0.txt', abcd_cca_dir, n_subs));
+	writematrix(N5, sprintf('%s/data/%d/N5.txt', abcd_cca_dir, n_subs));
+	writematrix(S1, sprintf('%s/data/%d/S1.txt', abcd_cca_dir, n_subs));
+	writematrix(S5, sprintf('%s/data/%d/S5.txt', abcd_cca_dir, n_subs));
+
+end
\ No newline at end of file
diff --git a/abcd_cca_80-20.m b/abcd_cca_80-20.m
new file mode 100755
index 0000000..628cc41
--- /dev/null
+++ b/abcd_cca_80-20.m
@@ -0,0 +1,236 @@
+%% Set up
+
+perms_per_batch_in = 2000;
+N_perm_in = 100000;
+N_dim_in = 70;
+n_subs_in = 5013;
+abcd_cca_dir    =   '/data/NIMH_scratch/abcd_cca/abcd_cca_replication/';
+
+if ~isdeployed
+    addpath(genpath(sprintf('%s/dependencies/', abcd_cca_dir)));
+    addpath(genpath(sprintf('%s/data/', abcd_cca_dir)));
+    perms_per_batch =   perms_per_batch_in;
+    N_perm          =   N_perm_in;
+    N_dim           =   N_dim_in;
+    n_subs          =   n_subs_in;
+elseif isdeployed
+    % When compiled matlab, it reads the command line args all as strings so we need to convert
+    perms_per_batch =   str2num(perms_per_batch_in);
+    N_perm          =   str2num(N_perm_in);
+    N_dim           =   str2num(N_dim_in);
+    n_subs          =   str2num(n_subs_in);
+end
+
+melodic_folder  =   sprintf('%s/data_prep/data/stage_3/%d.gica', abcd_cca_dir, n_subs);
+SUMPICS         =   sprintf('%s/melodic_IC_thin.sum', melodic_folder);
+SUMPICS_THICK   =   sprintf('%s/melodic_IC_thick.sum', melodic_folder);
+
+% read in VARS and NET and do some preprocessing
+
+% Load the Subjects X Nodes matrix (should be size Nx19900)
+NET = load(sprintf('%s/data/%d/NET.txt', abcd_cca_dir, n_subs));
+
+% VARS_0 = Subjects X SMs text file
+VARS_0 = strcsvread(sprintf('%s/data/%d/VARS.txt', abcd_cca_dir, n_subs));
+
+% Load list of SMs to be used in ICA (this list is made manually)
+ica_sms_0=fileread(sprintf('%s/data/ica_subject_measures.txt', abcd_cca_dir));
+ica_sms = strsplit(ica_sms_0);
+
+% Load list of names of colums used to encode scanner ID
+scanner_col_names_0=fileread(sprintf('%s/data/%d/scanner_confounds.txt', abcd_cca_dir, n_subs));
+scanner_col_names = strsplit(scanner_col_names_0);
+
+% Drop subject col and device serial number col (they are strings)
+egid_col    = find(strcmpi(VARS_0(1,:),'subjectid'));
+serial_col  = find(strcmpi(VARS_0(1,:),'mri_info_device.serial.number'));
+VARS_0(:,[egid_col serial_col])=[];
+
+% Get column indices of our confound variables
+[sharedvals,scanner_cols_idx]=intersect(VARS_0(1,:),scanner_col_names);
+site_col        = find(strcmpi(VARS_0(1,:),'abcd_site'));
+mri_man_col     = find(strcmpi(VARS_0(1,:),'mri_info_manufacturer'));
+mean_fd_col     = find(strcmpi(VARS_0(1,:),'mean_fd'));
+bmi_col         = find(strcmpi(VARS_0(1,:),'anthro_bmi_calc'));
+weight_col      = find(strcmpi(VARS_0(1,:),'anthro_weight_calc'));
+wholebrain_col  = find(strcmpi(VARS_0(1,:),'smri_vol_subcort.aseg_wholebrain'));
+intracran_col   = find(strcmpi(VARS_0(1,:),'smri_vol_subcort.aseg_intracranialvolume'));
+
+% Now get column indices of the ICA SMs
+[sharedvals,ica_sms_idx]=intersect(VARS_0(1,:),ica_sms);
+
+% Store the original order of the ICA SMs, used later to make our pos-neg axis
+sms_original_order = VARS_0(1,:);
+
+% VARS without column names
+VARS=cell2mat(VARS_0(2:end,:));
+
+% Create confounds matrix
+% NOTE, since we use the same nuisance variable matrix conf (aka Z), this is a PARTIAL CCA where the same nuisance variable matrix (Z) is used for both the SM and connectome matrices
+% (see Winkler Et al. Permutation inference in CCA, Neuroimage 2020)
+
+% Per conversation with Anderson, we might want to consider NOT doing this double normalization thing, instead just add an intercept column to our confounds matrix
+conf  = palm_inormal([ VARS(:,scanner_cols_idx) VARS(:,[mean_fd_col bmi_col weight_col]) VARS(:,[wholebrain_col intracran_col]).^(1/3) ]);  % Gaussianise
+conf(isnan(conf)|isinf(conf)) = 0;                % impute missing data as zeros
+conf  = nets_normalise([conf conf(:,length(scanner_cols_idx):end).^2]);  % add on squared terms and renormalise (all cols other than those for scanner IDs)
+conf(isnan(conf)|isinf(conf)) = 0;                % again convert NaN/inf to 0 (above line makes them reappear for some reason)
+
+%% 10 fold 80-20 validation
+for II=1:10
+    % Sort the sample into train and test groups
+    % get the family IDs
+    fam = VARS(:,37)';
+
+    % G1 will have 80% G2 will have everyone not in G1
+    G1 = [];
+    % families without replacement so same subject isn't drawn twice
+    fam_norepl = fam;
+    idx = 1:5013;
+    G1_logi = logical(zeros(1,length(fam)));
+    G2_logi = logical(zeros(1,length(fam)));
+    n_subs = length(fam);
+
+    % 80% of sample size
+    target_80 = round(n_subs*0.8);
+
+    % while less than 80% of sample size in G1
+    while (length(G1)<target_80)
+        % randomly draw a family from the without replacement list
+        rand = randsample(fam_norepl,1);
+        % find the indicies of all siblings that match family ID
+        sibs = find(fam==rand);
+        % append to G1
+        G1 = [G1 sibs];
+        % create a logical for those 
+        G1_logi(sibs) = 1;
+        % remove family from further drawings
+        fam_norepl(fam_norepl==rand) = [];
+    end
+
+    % G2 is everyone who hasn't been assigned to G1
+    G2_logi(G1_logi==0) = 1;
+    G2 = idx(G2_logi);
+
+    % preprocess the new NET and VARS from G1
+    % for svd
+	Nkeep1=100; 
+	Nkeep2=100;
+
+	% conf for group 1
+	tmpconf=conf(G1,:);
+
+	% NET for group 1, standardize and take svd
+	tmpNET=NET(G1,:);
+	NET1=nets_demean(tmpNET);
+	NET1=NET1/std(NET1(:));
+	amNET=abs(mean(NET));
+	NET3=nets_demean(tmpNET./repmat(amNET,size(tmpNET,1),1));
+	NET3(:,amNET<0.1)=[];
+	NET3=NET3/std(NET3(:));
+	grot=[NET1 NET3]; 
+	NETd=nets_demean(grot-tmpconf*(pinv(tmpconf)*grot)); 
+	[uu1G1,ss1G1,vv1G1]=nets_svds(NETd,Nkeep1); 
+
+	% VARS for group 1, standardize each column
+	varsd=palm_inormal(VARS(G1,:));
+	for i=1:size(varsd,2)
+		grot=(isnan(varsd(:,i))==0); 
+		grotconf=nets_demean(tmpconf(grot,:)); 
+		varsd(grot,i)=normalize(varsd(grot,i)-grotconf*(pinv(grotconf)*varsd(grot,i))); 
+	end
+	varsdCOV=zeros(size(varsd,1));
+	for i=1:size(varsd,1)
+		for j=1:size(varsd,1)
+			grot=varsd([i j],:); 
+			grot=cov(grot(:,sum(isnan(grot))==0)'); 
+			varsdCOV(i,j)=grot(1,2);  
+		end; 
+	end
+	varsdCOV2=nearestSPD(varsdCOV); % scatter(varsdCOV(:),varsdCOV2(:));
+	[uu,dd]=eigs(varsdCOV2,Nkeep2); 
+	uu2G1=uu-tmpconf*(pinv(tmpconf)*uu);   % deconfound again just to be safe
+	ss2G1=sqrt(dd); 
+	grot=uu2G1 * inv(ss2G1); 
+	vv2G1=zeros(size(varsd',1),size(grot,2));    %  vv2G1 = varsd' * grot; % try to get the other eigenvectors
+	for i=1:size(varsd',1)
+	    for j=1:size(grot,2)
+	      groti=isnan(varsd(:,i))==0;
+	      vv2G1(i,j) = varsd(groti,i)' * grot(groti,j) * length(groti) / sum(groti);
+	    end
+    end
+      
+    % Do the CCA on the 80%
+    [grotAG1,grotBG1,grotRG1,grotUG1,grotVG1,grotstatsG1]=canoncorr(uu1G1,uu2G1); 
+
+    % conf
+	tmpconf=conf(G2,:);   % now multiply the CCA outputs into the test dataset
+
+	% NET for group 2, standardize
+	tmpNET=NET(G2,:);
+	NET1=nets_demean(tmpNET);
+	NET1=NET1/std(NET1(:));
+	amNET=abs(mean(NET));
+	NET3=nets_demean(tmpNET./repmat(amNET,size(tmpNET,1),1));
+	NET3(:,amNET<0.1)=[];
+	NET3=NET3/std(NET3(:));
+	grot=[NET1 NET3]; 
+	NETd=nets_demean(grot-tmpconf*(pinv(tmpconf)*grot));
+
+	% VARS for group 2, standardize
+	varsd=palm_inormal(VARS(G2,:));
+	for i=1:size(varsd,2)
+		grot=(isnan(varsd(:,i))==0); 
+		grotconf=nets_demean(tmpconf(grot,:)); 
+		varsd(grot,i)=normalize(varsd(grot,i)-grotconf*(pinv(grotconf)*varsd(grot,i))); 
+    end
+
+    % compare to previous CCA on 80%
+	grot_U2 = NETd  * vv1G1 * ss1G1 * grotAG1;
+	grot=vv2G1 * ss2G1 * grotBG1; 
+	grot_V2=zeros(size(varsd,1),size(grot,2));        % grot_V2 = varsd * vv2G1 * ss2G1 * grotBG1   =  varsd * grot
+	for i=1:size(varsd,1)
+		for j=1:size(grot,2)
+			groti=isnan(varsd(i,:))'==0; 
+			grot_V2(i,j) = varsd(i,groti) * grot(groti,j) * length(groti) / sum(groti); 
+		end; 
+    end
+
+    grotRRR(II) = corr(grot_U2(:,2),grot_V2(:,2))   % correlate the test-data U and V then permute to check p-values
+
+    Nperm=1000;
+    
+    % read in unaltered VARS
+    VARS_forG2 = strcsvread(sprintf('%s/data/%d/VARS.txt', abcd_cca_dir, n_subs));
+   
+    % subset VARS for G2, put header are write to csv
+    G2_vars = VARS_forG2(2:end,:);
+    G2_vars = G2_vars(G2,:);
+    T = cell2table(G2_vars);
+    T.Properties.VariableNames = VARS_forG2(1,:);
+    writetable(T,'G2_vars.csv')
+    
+    % run abcd2blocks
+    [EB,tab] = abcd2blocks('G2_vars.csv', 'G2_blocks.csv', [100 10]);
+    
+    % run palm
+    Pset=palm_quickperms([ ], EB, Nperm, true, false, true, true);
+
+    % run through the permutations
+    for j=1:Nperm
+        grotRRRnull(j)=corr(grot_U2(:,2),grot_V2(Pset(:,j),2)); 
+    end
+	
+    
+    grotRRRp(II)=(1+sum(grotRRRnull(2:end,1)>=grotRRR(II)))/Nperm
+	grotRRRm(II)=mean(grotRRRnull); 
+    grotRRRs(II)=std(grotRRRnull);
+end
+
+save('80-20-mode2.mat','grotRRR','grotRRRp','grotRRRm','grotRRRs') 
+
+[grotRRR' grotRRRp' grotRRRm' grotRRRs']
+mean([grotRRR' grotRRRp' grotRRRm' grotRRRs'])
+    
+
+
+
diff --git a/abcd_cca_batch.m b/abcd_cca_batch.m
new file mode 100755
index 0000000..fc72809
--- /dev/null
+++ b/abcd_cca_batch.m
@@ -0,0 +1,149 @@
+% abcd_cca_batch.m
+% Written by Nikhil Goyal, National Institute of Mental Health, 2019-2020
+% Created: 7/9/20
+% Modified: 7/10/20 (modified to do some calcuations on the set of perms (percentiles, means))
+
+% Script is used in batch processing to calculate CCA for each of the 100,000 permutations we generate
+% This script is given a start index and the number of permutations to calculate
+% NOTE: this script is COMPILED, and it expects input args to be strings (from cmd line), so we type cast the args
+
+% How to compile this script on NIH biowulf using the mcc2 utility:
+% mcc2 -m abcd_cca_batch.m -d compiled_scripts/ -nojvm -a ./dependencies/palm-alpha116/palm_inormal.m
+
+% And to run a SINGLE INSTANCE of it (this is NOT how it is called on slurm):
+%{
+export XAPPLRESDIR=MR/v98/X11/app-defaults
+export LD_LIBRARY_PATH=MR/v98/runtime/glnxa64:MR/v98/bin/glnxa64:MR/v98/sys/os/glnxa64:MR/v98/sys/opengl/lib/glnxa64
+./run_abcd_cca_batch.sh /usr/local/matlab-compiler/v98 1 1000 70 /data/ABCD_MBDU/goyaln2/abcd_cca_replication/ 500
+%}
+
+% To see the command structure for HPC slurm for this script, see gen_batch_permutation_ica_swarm.py
+
+% The output of this script is a .mat file that contains data necessary for null distribution permutation testing
+% For the sake of aggregating this data, some computations are done here (finding percentiles and means) on the num_perms calculated (reduces memory and computational overhead later in analysis when we need to use this data)
+
+function abcd_cca_batch(start_idx_in, num_perms_in, N_dim_in, abcd_cca_dir, n_subs_in)
+    if nargin<5
+        sprintf("ERROR, not enough arguments.")
+        sprintf("Example: abcd_cca_batch(1, 1000, 70, '/data/ABCD_MBDU/goyaln2/abcd_cca_replication/', 1000)")
+        return
+	end
+
+    if ~isdeployed
+        addpath(genpath(sprintf('%s/dependencies/', abcd_cca_dir)));
+        addpath(genpath(sprintf('%s/data/', abcd_cca_dir)));
+        start_idx   =   start_idx_in;
+        num_perms   =   num_perms_in;
+        N_dim   =   N_dim_in;
+        n_subs  =   n_subs_in;
+    elseif isdeployed
+        % When compiled matlab, it reads the command line args all as strings so we need to convert
+        start_idx   =   str2num(start_idx_in);
+        num_perms   =   str2num(num_perms_in);
+        N_dim   =   str2num(N_dim_in);
+        n_subs  =   str2num(n_subs_in);
+    end
+        
+    % Load data
+    % Matrix S1 (only ICA sms)
+    s1  =   sprintf('%s/data/%d/S1.txt', abcd_cca_dir, n_subs);
+    % Matrix S5 (post-PCA SM matrix)
+    s5  =   sprintf('%s/data/%d/S5.txt', abcd_cca_dir, n_subs);
+    % Matrix N0 (raw connectome data)
+    n0  =   sprintf('%s/data/%d/N0.txt', abcd_cca_dir, n_subs);
+    % Matrix N5 (post-PCA connectome matrix)
+    n5  =   sprintf('%s/data/%d/N5.txt', abcd_cca_dir, n_subs);
+
+    S1  =   load(s1);
+    S5  =   load(s5);
+    N0  =   load(n0);
+    N5  =   load(n5);
+  
+    % Permutation matrix
+    pset=sprintf('%s/data/%d/Pset.txt', abcd_cca_dir, n_subs);
+    Pset=load(pset);
+
+    % Temporary version of VARS used for permutation calcs
+    grotvars=palm_inormal(S1);
+    grotvars(:,std(grotvars)<1e-10)=[];
+    grotvars(:,sum(isnan(grotvars)==0)<20)=[];
+
+
+    % Varibles we calculate for each permutation:
+    % r = the vector r for the permutation
+    % nullNETr  =   the permutation's mode 1 connectome weights correlation to the RAW connectome matrix
+    % nullNETv  =   for this permutation, the summation of the correlation values between each mode's connectome weights and the RAW connectome matrix
+    % nullSMr   =   for this permutation, CCA mode 1 SM weights correalated against the S1 matrix
+    % nullSMv   =   for this permutation, the summation of the correlation values between each mode's SM weights and the S1 matrix
+    
+    % Aggregation variables
+    r_agg           =   [];
+    nullNETr_agg    =   [];
+    nullNETv_agg    =   [];
+    nullSMr_agg     =   [];
+    nullSMv_agg     =   [];
+
+    r=zeros(N_dim+1, 1);
+    for perm = start_idx:(start_idx+num_perms-1)
+        % Note, need to range from start_idx:(start_idx+num_perms-1) because without the -1 on the final iteration perm will exceed 100,000 becoming 100,001 and causing an error
+        % Also, need the -1 for only num_perms permutations to be ran
+        [A, B, r(1:end-1), U, V, stats] = canoncorr(N5,S5(Pset(:,perm),:));
+        r(end)=mean(r(1:end-1));
+    
+        nullNETr    =   corr(U(:,1),N0)';
+        nullNETv    =   sum(corr(U,N0).^2,2);
+
+        nullSMr     =   corr(V(:,1),grotvars(Pset(:,perm),:),'rows','pairwise')';
+        nullSMv     =   sum(corr(V,grotvars(Pset(:,perm),:),'rows','pairwise').^2,2);
+
+        r_agg           =   [r_agg;         r' ];
+        nullNETr_agg    =   [nullNETr_agg;  nullNETr'];
+        nullNETv_agg    =   [nullNETv_agg;  nullNETv'];
+        nullSMr_agg     =   [nullSMr_agg;   nullSMr'];
+        nullSMv_agg     =   [nullSMv_agg;   nullSMv'];
+
+    end
+
+    % Now find the summary variables for these 1000 permutatations
+    % These are what we need to compute (taken from Steve Smith's code):
+    % NOTE - these are percentiles of the columns - the result should be a 1xN_dim vector (where the N_dim columns are CCA modes)
+    % prctile(nullNETv,5,1) mean(nullNETv,1) prctile(nullNETv,95,1)
+    % prctile(nullSMv,5,1) mean(nullSMv,1) prctile(nullSMv,95,1)
+    % prctile( max(abs(nullSMr)) ,95)
+    % prctile( max(abs(nullNETr)) ,95)
+
+    s = struct( 'start_idx',                {}, ...
+                'num_perms',                {}, ...
+                'nullNETv_prctile_95',      {}, ...
+                'nullNETv_prctile_5',       {}, ...
+                'nullNETv_mean',            {}, ...
+                'nullSMv_prctile_95',       {}, ...
+                'nullSMv_prctile_5',        {}, ...
+                'nullSMv_mean',             {}, ...
+                'nullNETr_prctile_95',      {}, ...
+                'nullSMr_prctile_95',       {}, ...
+                'r',                        {}  );
+
+    s(1).start_idx             =   start_idx;
+    s(1).num_perms             =   num_perms;
+    
+    s(1).nullNETv_prctile_95   =   prctile( nullNETv_agg,   95,1);
+    s(1).nullNETv_prctile_5    =   prctile( nullNETv_agg,   5,1);
+    s(1).nullNETv_mean         =   mean(    nullNETv_agg,   1);
+    s(1).nullNETv_std	       =   std(nullNETv_agg, 1);
+
+    s(1).nullSMv_prctile_95    =   prctile( nullSMv_agg,    95, 1);
+    s(1).nullSMv_prctile_5     =   prctile( nullSMv_agg,    5, 1);
+    s(1).nullSMv_mean          =   mean(    nullSMv_agg,    1);
+    s(1).nullSMv_std	       =   std(nullSMv_agg, 1);
+
+    s(1).nullNETr_prctile_95   =   prctile( max(abs(nullNETr_agg)), 95);
+    s(1).nullSMr_prctile_95    =   prctile( max(abs(nullSMr_agg)),  95);
+    s(1).r                     =   r_agg;
+    s(1).nullNETr_std	       =	std(nullNETr_agg, 1);
+    s(1).nullSMr_std	       =	std(nullSMr_agg, 1);
+
+    % Save .mat file with the permutations
+    save(sprintf('%s/data/%d/permutations/permutations_%d.mat', abcd_cca_dir, n_subs, start_idx), 's');
+
+end
\ No newline at end of file
diff --git a/abcd_cca_interactive.m b/abcd_cca_interactive.m
new file mode 100755
index 0000000..3e94906
--- /dev/null
+++ b/abcd_cca_interactive.m
@@ -0,0 +1,261 @@
+% NOTE: need the following loaded on path
+%       fsl, connectome-workbench
+
+perms_per_batch_in = 2000
+N_perm_in = 100000
+N_dim_in = 70
+n_subs_in = 5013
+abcd_cca_dir    =   '/data/NIMH_scratch/abcd_cca/abcd_cca_replication/';
+
+if ~isdeployed
+    addpath(genpath(sprintf('%s/dependencies/', abcd_cca_dir)));
+    addpath(genpath(sprintf('%s/data/', abcd_cca_dir)));
+    perms_per_batch =   perms_per_batch_in;
+    N_perm          =   N_perm_in;
+    N_dim           =   N_dim_in;
+    n_subs          =   n_subs_in;
+elseif isdeployed
+    % When compiled matlab, it reads the command line args all as strings so we need to convert
+    perms_per_batch =   str2num(perms_per_batch_in);
+    N_perm          =   str2num(N_perm_in);
+    N_dim           =   str2num(N_dim_in);
+    n_subs          =   str2num(n_subs_in);
+end
+
+melodic_folder  =   sprintf('%s/data_prep/data/stage_3/%d.gica', abcd_cca_dir, n_subs);
+SUMPICS         =   sprintf('%s/melodic_IC_thin.sum', melodic_folder);
+SUMPICS_THICK   =   sprintf('%s/melodic_IC_thick.sum', melodic_folder);
+
+%% --- Read in data, set some variables, create confounds matrix ---
+% load in data from FSLNets calculations
+fslnets_mat     =   load(sprintf('%s/data_prep/data/stage_4/%d/fslnets.mat', abcd_cca_dir, n_subs));
+
+% Load the Subjects X Nodes matrix (should be size Nx19900)
+N0 = load(sprintf('%s/data/%d/NET.txt', abcd_cca_dir, n_subs));
+
+% VARS_0 = Subjects X SMs text file
+VARS_0 = strcsvread(sprintf('%s/data/%d/VARS.txt', abcd_cca_dir, n_subs));
+
+% Load list of SMs to be used in ICA (this list is made manually)
+ica_sms_0=fileread(sprintf('%s/data/ica_subject_measures.txt', abcd_cca_dir));
+ica_sms = strsplit(ica_sms_0);
+
+% Load list of names of colums used to encode scanner ID
+scanner_col_names_0=fileread(sprintf('%s/data/%d/scanner_confounds.txt', abcd_cca_dir, n_subs));
+scanner_col_names = strsplit(scanner_col_names_0);
+
+% Drop subject col and device serial number col (they are strings)
+egid_col    = find(strcmpi(VARS_0(1,:),'subjectid'));
+serial_col  = find(strcmpi(VARS_0(1,:),'mri_info_device.serial.number'));
+VARS_0(:,[egid_col serial_col])=[];
+
+% Get column indices of our confound variables
+[sharedvals,scanner_cols_idx]=intersect(VARS_0(1,:),scanner_col_names);
+site_col        = find(strcmpi(VARS_0(1,:),'abcd_site'));
+mri_man_col     = find(strcmpi(VARS_0(1,:),'mri_info_manufacturer'));
+mean_fd_col     = find(strcmpi(VARS_0(1,:),'mean_fd'));
+bmi_col         = find(strcmpi(VARS_0(1,:),'anthro_bmi_calc'));
+weight_col      = find(strcmpi(VARS_0(1,:),'anthro_weight_calc'));
+wholebrain_col  = find(strcmpi(VARS_0(1,:),'smri_vol_subcort.aseg_wholebrain'));
+intracran_col   = find(strcmpi(VARS_0(1,:),'smri_vol_subcort.aseg_intracranialvolume'));
+
+% Now get column indices of the ICA SMs
+[sharedvals,ica_sms_idx]=intersect(VARS_0(1,:),ica_sms);
+
+% Store the original order of the ICA SMs, used later to make our pos-neg axis
+sms_original_order = VARS_0(1,:);
+
+% VARS without column names
+VARS=cell2mat(VARS_0(2:end,:));
+
+% Create confounds matrix
+% NOTE, since we use the same nuisance variable matrix conf (aka Z), this is a PARTIAL CCA where the same nuisance variable matrix (Z) is used for both the SM and connectome matrices
+% (see Winkler Et al. Permutation inference in CCA, Neuroimage 2020)
+
+% Per conversation with Anderson, we might want to consider NOT doing this double normalization thing, instead just add an intercept column to our confounds matrix
+conf  = palm_inormal([ VARS(:,scanner_cols_idx) VARS(:,[mean_fd_col bmi_col weight_col]) VARS(:,[wholebrain_col intracran_col]).^(1/3) ]);  % Gaussianise
+conf(isnan(conf)|isinf(conf)) = 0;                % impute missing data as zeros
+conf  = nets_normalise([conf conf(:,length(scanner_cols_idx):end).^2]);  % add on squared terms and renormalise (all cols other than those for scanner IDs)
+conf(isnan(conf)|isinf(conf)) = 0;                % again convert NaN/inf to 0 (above line makes them reappear for some reason)
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% --- SM PROCESSING ---
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Matrix S1 (only ICA sms)
+S1=[VARS(:,ica_sms_idx)];
+
+% Now, prepare the final SM matrix and run the PCA
+% S2, formed by gaussianizing the SMs we keep
+S2=palm_inormal(S1); % Gaussianise
+
+ % Now generate S3, formed by deconfounding the 17 confounds out of S2
+S3=S2;
+for i=1:size(S3,2) % deconfound ignoring missing data
+    grot=(isnan(S3(:,i))==0);
+    grotconf=nets_demean(conf(grot,:));
+    S3(grot,i)=normalize(S3(grot,i)-grotconf*(pinv(grotconf)*S3(grot,i)));
+end
+
+% Determine how much data is missing:
+sum(sum(isnan(S3)))/(size(S3,1)*size(S3,2))*100
+
+% Next, we need to generate S4
+% First, estimate the SubjectsXSubjects covariance matrix (where for any two subjects, SMs missing for either subject are ignored)
+% The approximate covariance matrix (varsdCOV) is then projected onto the nearest valid covariance matrix using nearestSPD toolbox.
+% This method avoids imputation, and S4 can be fed into PCA.
+S3Cov=zeros(size(S3,1));
+for i=1:size(S3,1) % estimate "pairwise" covariance, ignoring missing data
+    for j=1:size(S3,1)
+        grot=S3([i j],:);
+        grot=cov(grot(:,sum(isnan(grot))==0)');
+        S3Cov(i,j)=grot(1,2);
+    end
+end
+S4=nearestSPD(S3Cov); % project onto the nearest valid covariance matrix. This method avoids imputation (we can't have any missing values before running the PCA)
+
+% Check the before and after correlation:
+corrcoef(S4,S3Cov)  %0.9999 for the 5013 subject sample
+
+% Generate S5, the top eigenvectors for SMs, to avoid overfitting and reduce dimensionality
+[uu,dd]=eigs(S4,N_dim);       % SVD (eigs actually)
+S5=uu-conf*(pinv(conf)*uu);   % deconfound again just to be safe 
+
+
+% --- NETMAT PROCESSING ---
+fprintf("Calculating netmat matrices N1 through N5\n")
+% N1, formed by 1. demean, 2. globally variance normalize
+N1=nets_demean(N0);   % 1. Demean
+N1=N1/std(N1(:));     % 2. variance normalize
+% N2, formed by 1. Demean, 2. remove columns that are badly conditions due to low z (z<0.1) mean value, 3. global variance normalize the matrix
+abs_mean_NET=abs(mean(N0));                             % get mean, take abs val
+N2=nets_demean(N0./repmat(abs_mean_NET,size(N0,1),1));  % 1. demean
+N2(:,abs_mean_NET<0.1)=[];                              % 2. remove columns with mean value <0.1                          
+N2=N2/std(N2(:));                                       % 3. variance normalize
+% N3, formed by horizontally concat N1 and N2
+N3=[N1 N2]; % Concat horizontally
+% N4, formed by regressing the confounds matrix out of N3
+N4=nets_demean(N3-conf*(pinv(conf)*N3));
+% N5
+[N5,ss1,vv1]=nets_svds(N4,N_dim); % PCA of netmat via SVD reduction
+
+%% --- CCA with Qz---
+fprintf("Running CCA on matrices S5 and N5\n")
+[A, B, R, U, V, initial_stats] = canoncorr(N5,S5);
+
+% Plot
+figure;
+scatter(U(:,1), V(:,1))
+% Plot with regression line
+figure;
+plotregression(U(:,1), V(:,1))
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% --- POSITIVE NEGATIVE AXIS
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% post hoc columns not included in CCA
+bmi_col         = find(strcmpi(VARS_0(1,:),'anthro_bmi_calc'));
+educ_col         = find(strcmpi(VARS_0(1,:),'high.educ'));
+age_col         = find(strcmpi(VARS_0(1,:),'age'));
+income_col         = find(strcmpi(VARS_0(1,:),'household.income.bl'));
+
+% SM indices
+pneg_idx = [ica_sms_idx' bmi_col educ_col age_col income_col];
+% include / exclude vector
+inc_exc = [ones(1,length(ica_sms_idx)) zeros(1,4)]
+% SM names
+pneg_names = VARS_0(1,pneg_idx)
+% SM measures
+pneg_vars = [VARS(:,ica_sms_idx) VARS(:,bmi_col) VARS(:,educ_col) VARS(:,age_col) VARS(:,income_col)];
+
+% specify CCA mode
+I = 2;
+% run the correlations, the sign is arbitrary, so lets flip it to be
+% consistent with Smith
+CorCCA = -1*corr(V(:,I), palm_inormal(pneg_vars), 'rows','pairwise');
+
+% open the figure
+figure;
+plot(CorCCA);   % correlate CCA component 1 with original set of SMs
+% Z scores
+ZCCA          = 12*0.5*log((1+CorCCA)./(1-CorCCA));    % r2z  -  factor x12 gets close to "real" zstats.
+[tmp_B,tmp_I] = sort(CorCCA,'ascend'); % sort by corralation value
+toplist       = [];
+strlist       = {};
+
+% Iterate over the SMs
+for i = 1:length(tmp_I)
+    % Since we sort the values, we use ii to index into the unsorted list and get the proper index of the column from VARS
+    ii = tmp_I(i);
+    % Pull the values for the column associated with the SM being looked at
+    Y                 = pneg_vars(:,ii);
+    Y_no_nan_idx      = ~isnan(Y);
+    Y                 = nets_demean(Y(Y_no_nan_idx));
+    X                 = nets_demean(V(Y_no_nan_idx,I));
+    VarExplained(ii)  = var(X*(pinv(X)*Y)) / var(Y);
+
+    if abs(tmp_B(i))>0.2
+        BVname = string(pneg_names(ii));
+        toplist=[toplist ii];
+        str=sprintf('%.2f %.2f %.2f %s %s',CorCCA(ii),ZCCA(ii),VarExplained(ii),BVname);
+        strlist{end+1} = str;
+    end
+end
+
+x   = ones([length(toplist),1]);
+dx  = 0.1;
+y   = 1:length(toplist);
+dy  = 0.1;
+set(0,'DefaultTextInterpreter','none')
+scatter(x,y);
+text(x+dx, y+dy, strlist);
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% --- VARIANCE/PERMUTATION ANALYSIS ---
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+perm_data  = load(sprintf('%s/data/%d/permutation_data.mat', abcd_cca_dir, n_subs)); 
+tmp_VARS                                = palm_inormal(S1);
+tmp_VARS(:,std(tmp_VARS)<1e-10)         = [];
+tmp_VARS(:,sum(isnan(tmp_VARS)==0)<20)  = [];
+
+for i = 1:N_dim;  % show corrected pvalues
+  Rpval(i) = (1 + sum(perm_data.s_agg(1).r(2:end,1) >= R(i))) / N_perm;
+end
+Rpval;
+Ncca=sum(Rpval<0.05)  % number of significant CCA components
+Rpval(1:Ncca)
+
+variance_data_NET   = [ sum(corr(U, N0).^2,2)'; perm_data.s_agg(1).nullNETv_prctile_5; perm_data.s_agg(1).nullNETv_mean; perm_data.s_agg(1).nullNETv_prctile_95; sum(corr(N5,N0).^2,2)' ] * 100 / size(N0,2);
+variance_data_VARS  = [ sum(corr(V,tmp_VARS,'rows','pairwise').^2,2)'; perm_data.s_agg(1).nullSMv_prctile_5; perm_data.s_agg(1).nullSMv_mean; perm_data.s_agg(1).nullSMv_prctile_95; sum(corr(S5,tmp_VARS,'rows','pairwise').^2,2)' ] * 100 / size(tmp_VARS,2);
+
+z_scores_NET = [ (sum(corr(U,N0).^2,2)-  perm_data.s_agg(1).nullNETv_mean') ./ perm_data.s_agg(1).nullNETv_std' ] ;
+z_scores_SM = [ (sum(corr(V,tmp_VARS,'rows','pairwise').^2,2)- perm_data.s_agg(1).nullSMv_mean') ./ perm_data.s_agg(1).nullSMv_std' ];
+
+% Look at first 20 modes
+I=1:20;
+
+% Connectomes variance
+figure;
+subplot(2,1,1); 
+hold on;
+% Draw the rectangles for null distributions per mode
+for i=1:length(I)
+    rectangle('Position',[i-0.5 variance_data_NET(2,i) 1 variance_data_NET(4,i)-variance_data_NET(2,i)],'FaceColor',[0.8 0.8 0.8],'EdgeColor',[0.8 0.8 0.8]);
+end
+plot(variance_data_NET(3,I),'k');
+plot(variance_data_NET(1,I),'b');
+plot(variance_data_NET(1,I),'b.');
+
+
+% Subject measures variance
+subplot(2,1,2); 
+hold on;
+for i=1:length(I)
+    rectangle('Position',[i-0.5 variance_data_VARS(2,i) 1 variance_data_VARS(4,i)-variance_data_VARS(2,i)],'FaceColor',[0.8 0.8 0.8],'EdgeColor',[0.8 0.8 0.8]);
+end
+plot(variance_data_VARS(3,I),'k');
+plot(variance_data_VARS(1,I),'b');
+plot(variance_data_VARS(1,I),'b.');
diff --git a/abcd_perm_agg.m b/abcd_perm_agg.m
new file mode 100755
index 0000000..52e9e78
--- /dev/null
+++ b/abcd_perm_agg.m
@@ -0,0 +1,113 @@
+% abcd_perm_agg.m
+% Written by Nikhil Goyal, National Institute of Mental Health, 2019-2020
+% Created: 7/1/2020
+% Modified: 7/10/20 (modified to accomodate the outputs of the updated abcd_cca_batch.m which reduces the amount of data in each .mat file)
+
+% Script is used in batch processing to calculate CCA for each of the 100,000 permutations we generate
+
+function abcd_perm_agg(perms_per_batch_in, N_perm_in, N_dim_in, abcd_cca_dir, n_subs_in)
+    if nargin<5
+        sprintf("ERROR, not enough arguments.")
+        sprintf("Example: abcd_perm_agg(2000, 100000, 70, '/data/ABCD_MBDU/goyaln2/abcd_cca_replication/', 5013)")
+        return
+    end
+    
+    if ~isdeployed
+        addpath(genpath(sprintf('%s/dependencies/', abcd_cca_dir)));
+        addpath(genpath(sprintf('%s/data/', abcd_cca_dir)));
+        perms_per_batch =   perms_per_batch_in;
+        N_perm          =   N_perm_in;
+        N_dim           =   N_dim_in;
+        n_subs          =   n_subs_in;
+    elseif isdeployed
+        % When compiled matlab, it reads the command line args all as strings so we need to convert
+        perms_per_batch =   str2num(perms_per_batch_in);
+        N_perm          =   str2num(N_perm_in);
+        N_dim           =   str2num(N_dim_in);
+        n_subs          =   str2num(n_subs_in);
+    end
+
+    r_agg                       =   [];
+    
+    nullNETv_prctile_95_agg     =   [];
+    nullNETv_prctile_5_agg      =   [];
+    nullNETv_mean_agg           =   [];
+    nullNETv_std_agg            =   [];
+
+    nullSMv_prctile_95_agg      =   [];
+    nullSMv_prctile_5_agg       =   [];
+    nullSMv_mean_agg            =   [];
+    nullSMv_std_agg             =   [];
+    
+
+    nullNETr_prctile_95_agg     =   [];
+    nullSMr_prctile_95_agg      =   [];
+    nullNETr_std_agg            =   [];
+    nullSMr_std_agg             =   [];
+
+    for perm = 1:perms_per_batch:N_perm
+        % Iterate over 1 = 1, 1001, 2001, .... 99001
+        % filename of .mat files we need to load are of form permutations_<#>.mat
+
+        f_name = sprintf('%s/data/%d/permutations/permutations_%d.mat',abcd_cca_dir, n_subs, perm);
+
+        % Load the .mat file
+        mat_file = load(f_name);
+
+        r_agg                       =   [r_agg;                     mat_file.s(1).r                     ];
+
+        nullNETv_prctile_95_agg     =   [nullNETv_prctile_95_agg;   mat_file.s(1).nullNETv_prctile_95   ];
+        nullNETv_prctile_5_agg      =   [nullNETv_prctile_5_agg;    mat_file.s(1).nullNETv_prctile_5    ];
+        nullNETv_mean_agg           =   [nullNETv_mean_agg;         mat_file.s(1).nullNETv_mean         ];
+        nullNETv_std_agg	        =	[nullNETv_std_agg;         mat_file.s(1).nullNETv_std];
+        
+        nullSMv_prctile_95_agg      =   [nullSMv_prctile_95_agg;    mat_file.s(1).nullSMv_prctile_95    ];
+        nullSMv_prctile_5_agg       =   [nullSMv_prctile_5_agg;     mat_file.s(1).nullSMv_prctile_5     ];
+        nullSMv_mean_agg            =   [nullSMv_mean_agg;          mat_file.s(1).nullSMv_mean          ];
+        nullSMv_std_agg		        =	[nullSMv_std_agg;         mat_file.s(1).nullSMv_std];
+        
+        nullNETr_prctile_95_agg     =   [nullNETr_prctile_95_agg;   mat_file.s(1).nullNETr_prctile_95   ];
+        nullSMr_prctile_95_agg      =   [nullSMr_prctile_95_agg;    mat_file.s(1).nullSMr_prctile_95    ];
+        nullNETr_std_agg	        =	[nullNETr_std_agg;         mat_file.s(1).nullNETr_std];
+        nullSMr_std_agg		        =	[nullSMr_std_agg;         mat_file.s(1).nullSMr_std];
+
+    end
+
+    s_agg = struct( 'perms_per_batch',          {}, ...
+                    'tot_perms',                {}, ...
+                    'nullNETv_prctile_95',      {}, ...
+                    'nullNETv_prctile_5',       {}, ...
+                    'nullNETv_mean',            {}, ...
+                    'nullSMv_prctile_95',       {}, ...
+                    'nullSMv_prctile_5',        {}, ...
+                    'nullSMv_mean',             {}, ...
+                    'nullNETr_prctile_95',      {}, ...
+                    'nullSMr_prctile_95',       {}, ...
+                    'r',                        {}  );
+
+    s_agg(1).perms_per_batch        =   perms_per_batch;
+    s_agg(1).tot_perms              =   N_perm;
+    
+    s_agg(1).nullNETv_prctile_95    =   mean(    nullNETv_prctile_95_agg,1);
+    s_agg(1).nullNETv_prctile_5     =   mean(    nullNETv_prctile_5_agg,1);
+    
+    s_agg(1).nullNETv_mean          =   mean(       nullNETv_mean_agg,          1);
+    s_agg(1).nullNETv_std           =   mean(nullNETv_std_agg, 1);
+    
+    s_agg(1).nullSMv_prctile_95     =   mean(    nullSMv_prctile_95_agg, 1);
+    s_agg(1).nullSMv_prctile_5      =   mean(    nullSMv_prctile_5_agg, 1);
+    
+    s_agg(1).nullSMv_mean           =   mean(       nullSMv_mean_agg,           1);
+    s_agg(1).nullSMv_std            =   mean(nullSMv_std_agg, 1);
+
+    s_agg(1).nullNETr_prctile_95    =   prctile( max(abs(nullNETr_prctile_95_agg)), 95);
+    s_agg(1).nullSMr_prctile_95     =   prctile( max(abs(nullSMr_prctile_95_agg)),  95);
+    s_agg(1).r                      =   r_agg;
+    s_agg(1).nullNETr_std           =   mean(nullNETr_std_agg, 1);
+    s_agg(1).nullSMr_std            =   mean(nullSMr_std_agg, 1);
+
+
+    % Now save
+    save(sprintf('%s/data/%d/permutation_data.mat', abcd_cca_dir, n_subs), 's_agg');
+
+end
\ No newline at end of file
diff --git a/abcd_perm_gen.m b/abcd_perm_gen.m
new file mode 100755
index 0000000..1410940
--- /dev/null
+++ b/abcd_perm_gen.m
@@ -0,0 +1,44 @@
+% abcd_perm_gen.m
+% Written by Nikhil Goyal, National Institute of Mental Health, 2019-2020
+% Created: 7/1/2020
+% Modified: 7/6/20 (made the script into a function)
+% 7/7/20 (updated the palm_quickperms call to make it faster)
+
+% n_subs = name of subset folder (e.x. 1000) where the data is/should be saved; value is equal to the number of subjects under analysis
+
+% Example call:
+% abcd_perm_gen(100000, "/data/ABCD_MBDU/goyaln2/abcd_cca_replication/", 1000)
+
+
+function abcd_perm_gen(N_perm, abcd_cca_dir, n_subs)
+    if nargin<3
+        sprintf("ERROR, not enough arguments.")
+        sprintf("Example: abcd_perm_gen(100000, '/data/ABCD_MBDU/goyaln2/abcd_cca_replication/', 1000)")
+        return
+    end
+
+    if ~isdeployed
+        addpath(genpath(sprintf('%s/dependencies/', abcd_cca_dir)));
+        addpath(genpath(sprintf('%s/data/', abcd_cca_dir)));
+    end
+
+    % Generate permutations using the hcp2blocks package
+    in_VARS = sprintf('%s/data/%d/VARS.txt', abcd_cca_dir, n_subs);
+    blocksfile = sprintf('%s/data/%d/blocksfile.csv', abcd_cca_dir, n_subs);
+    [EB,tab] = abcd2blocks(in_VARS, blocksfile, [100 10]);
+
+    % NOTE, call palm_quickperms with the following options (for fastest calculations while also finding permutations proper)
+    % [Pset,VG] = palm_quickperms(M,EB,P,EE,ISE,CMCx,CMCp)
+    % M = [] (no design matrix
+    % EB = exchangability blocks from abcd2blocks calc
+    % N_perm = number of permutations
+    % EE = true (so we do permutations proper)
+    % ISE = false (so we don't do sign flips)
+    % CMCx = true (this only applies for design matix, so we set to true)
+    % CMCp = true (allow duplicate permutations, for speed and since we have nearly infinite possible permutations)
+    Pset=palm_quickperms([ ], EB, N_perm, true, false, true, true);
+    % Note, Pset is the final matrix of permuations (one permutation per column)
+
+    % Now save Pset to file
+    writematrix(Pset, sprintf('%s/data/%d/Pset.txt', abcd_cca_dir, n_subs));
+end
\ No newline at end of file