From 2837ae558e98a3984cd86c1889b602df4544ea7e Mon Sep 17 00:00:00 2001
From: Anusri Pampari <p.anushri25@gmail.com>
Date: Tue, 27 Aug 2024 06:59:01 -0700
Subject: [PATCH] upload json scripts

---
 .../chrombpnet/atac_prepare.py                | 116 +++++++
 .../chrombpnet/dnase_prepare.py               | 115 +++++++
 .../READMES/counts.deepshap.README            |   0
 .../counts_contrib_upload/atac_tar.py         | 215 +++++++++++++
 .../atac_tar_k5_and_hep.py                    | 193 +++++++++++
 .../counts_contrib_upload/dnase_tar.py        | 222 +++++++++++++
 .../dnase_tar_k5_and_hep.py                   | 209 ++++++++++++
 .../bias_models/READMEs/bias.training.README  |  63 ++++
 .../atac_bias_model_chrombpnet.csv            |   6 +
 .../bias_models/atac_bias_model_upload.py     |  10 +-
 .../bias_models/atac_bias_upload_utils.py     |  32 +-
 .../chrombpnet/READMEs/bias.models.README     |  92 ++++++
 .../chrombpnet/READMEs/bias.training.README   |  63 ++++
 .../chrombpnet/READMEs/models.README          |  98 ++++++
 .../chrombpnet/READMEs/training.README        |  66 ++++
 .../chrombpnet/atac_bias_model_chrombpnet.csv |  26 ++
 .../chrombpnet/atac_bias_model_upload.py      | 260 +++++++++++++++
 .../chrombpnet/atac_bias_upload_utils.py      | 174 ++++++++++
 .../dnase_bias_model_chrombpnet.csv           |   0
 .../chrombpnet/dnase_bias_model_upload.py     | 283 ++++++++++++++++
 .../chrombpnet/dnase_bias_upload_utils.py     | 301 ++++++++++++++++++
 .../get_gc_matched_negatives_test.py          | 175 ++++++++++
 .../make_test_negatives/run_script.py         |  26 ++
 .../make_test_negatives/run_script_dnase.py   |  36 +++
 .../chrombpnet/make_test_negatives/script.sh  |  15 +
 .../make_test_negatives/script_dnase.sh       |  16 +
 .../chrombpnet/model_upload_utils.py          | 235 ++++++++++++++
 .../bias_models/chrombpnet/temp.sh            |   2 +
 .../chrombpnet/READMEs/bias.models.README     |  92 ++++++
 .../chrombpnet/READMEs/bias.training.README   |  63 ++++
 .../chrombpnet/atac_bias_model_chrombpnet.csv |  26 ++
 .../atac_prepare_file_for_upload_models.py    | 159 +++++++++
 .../dnase_prepare_file_for_upload_models.py   | 204 ++++++++++++
 .../chrombpnet/upload_utils.py                | 281 ++++++++++++++++
 .../dnase_prepare_file_for_upload_models.py   |   2 +-
 .../chrombpnet_models/upload_utils.py         |   6 +-
 .../READMEs/modisco.report.README             |   0
 .../modisco_uploads/atac_prepare.py           |  75 +++++
 .../modisco_uploads/dnase_prepare.py          |  75 +++++
 .../chrombpnet/READMEs/bc.predicted.README    |  71 +++++
 .../chrombpnet/READMEs/predicted.README       |  71 +++++
 .../chrombpnet/atac_prepare.py                |  32 ++
 .../chrombpnet/atac_prepare_tar.py            | 139 ++++++++
 .../chrombpnet/atac_prepare_tar_w_bias.py     | 139 ++++++++
 .../chrombpnet/dnase_prepare.py               |  31 ++
 .../chrombpnet/dnase_prepare_tar.py           | 146 +++++++++
 .../chrombpnet/dnase_prepare_tar_w_bias.py    | 144 +++++++++
 .../dnase_prepare_tar.py                      |   0
 .../READMES/profile.deepshap.README           |   0
 .../profile_contrib_upload/atac_tar.py        | 215 +++++++++++++
 .../atac_tar_k5_and_hep.py                    | 193 +++++++++++
 .../profile_contrib_upload/dnase_tar.py       | 221 +++++++++++++
 .../dnase_tar_k5_and_hep.py                   | 212 ++++++++++++
 53 files changed, 5627 insertions(+), 19 deletions(-)
 create mode 100644 upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/atac_prepare.py
 create mode 100644 upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/dnase_prepare.py
 create mode 100644 upload_jsons/upload_jsons_scripts/counts_contrib_upload/READMES/counts.deepshap.README
 create mode 100644 upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar.py
 create mode 100644 upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar_k5_and_hep.py
 create mode 100644 upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar.py
 create mode 100644 upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar_k5_and_hep.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/READMEs/bias.training.README
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_chrombpnet.csv
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.models.README
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.training.README
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/models.README
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/training.README
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_chrombpnet.csv
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_upload.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_upload_utils.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_model_chrombpnet.csv
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_model_upload.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_upload_utils.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/get_gc_matched_negatives_test.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script_dnase.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script.sh
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script_dnase.sh
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/model_upload_utils.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/temp.sh
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.models.README
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.training.README
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_bias_model_chrombpnet.csv
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_prepare_file_for_upload_models.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/dnase_prepare_file_for_upload_models.py
 create mode 100644 upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/upload_utils.py
 create mode 100644 upload_jsons/upload_jsons_scripts/modisco_uploads/READMEs/modisco.report.README
 create mode 100644 upload_jsons/upload_jsons_scripts/modisco_uploads/atac_prepare.py
 create mode 100644 upload_jsons/upload_jsons_scripts/modisco_uploads/dnase_prepare.py
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/bc.predicted.README
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/predicted.README
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare.py
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar.py
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar_w_bias.py
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare.py
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar.py
 create mode 100644 upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar_w_bias.py
 create mode 100644 upload_jsons/upload_jsons_scripts/profile_bigwigs_uploads/dnase_prepare_tar.py
 create mode 100644 upload_jsons/upload_jsons_scripts/profile_contrib_upload/READMES/profile.deepshap.README
 create mode 100644 upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar.py
 create mode 100644 upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar_k5_and_hep.py
 create mode 100644 upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar.py
 create mode 100644 upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar_k5_and_hep.py

diff --git a/upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/atac_prepare.py b/upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/atac_prepare.py
new file mode 100644
index 00000000..25898df8
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/atac_prepare.py
@@ -0,0 +1,116 @@
+import os
+import json
+import pandas as pd
+import pybedtools
+
+encids = ["K562", "HEPG2", "IMR90", "H1ESC", "GM12878"]
+#encids = ["K562", "HEPG2"]
+#encids = [ "IMR90", "H1ESC", "GM12878"]
+
+def make_bb_file(in_bed, out_bb):
+	assert(os.path.isfile("atac_temp.bed")==False)
+	command = "zcat "+in_bed+" | LC_COLLATE=C sort -k1,1 -k2,2n > atac_temp.bed"
+	print(command)
+	os.system(command)
+
+	command = "bedToBigBed atac_temp.bed /oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/reference/chrom.sizes "+out_bb
+	print(command)
+	os.system(command)
+
+
+	command = "rm atac_temp.bed"
+	print(command)
+	os.system(command)
+
+chrs = list(map(str,list(range(1,23))))
+chrs = ['chr'+i for i in chrs]
+chrs = chrs + ['chrX', 'chrY']
+
+def make_sel_bedfile(in_bed, out_bed):
+	input_bed = pd.read_csv(in_bed, sep="\t", header=None)
+	print(input_bed.shape)
+	input_bed = input_bed[input_bed[0].isin(chrs)]
+	print(input_bed.shape)
+
+	input_bed[1] = input_bed[1]+input_bed[9]-500
+	input_bed[2] = input_bed[1]  + 1000
+	print(input_bed.head())
+	x = pybedtools.BedTool.from_dataframe(input_bed[[0,1,2]])
+	x = x.sort().merge()
+	output_bed = x.to_dataframe()
+	print(output_bed.shape)
+	print(output_bed.head())
+	output_bed.to_csv(out_bed, sep='\t', header=False, index=False)
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"}  
+
+odir='atac/'
+for encid in encids:
+	print(encid)
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.stats"
+	if os.path.isfile(ofile):
+		counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.bw"
+	else:
+		print(ofile)
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.stats"
+		if os.path.isfile(ofile):
+			counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.bw"
+		else:
+			counts_bw = None
+			print(ofile)
+			continue
+
+		
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+	if os.path.isfile(ofile):
+		profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+	else:	
+		print(ofile)
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.stats"
+		if os.path.isfile(ofile):
+			profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.bw"
+		else:
+			profile_bw = None
+			print(ofile)
+
+			continue
+	
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+	if os.path.isfile(ofile):
+		sel_path = os.path.join("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/", encid+"/interpret_upload/average_preds/selected.regions.valid.merged.bed.gz" )
+		sel_path_bb = os.path.join("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/", encid+"/interpret_upload/average_preds/selected.regions.valid.merged.bigBed" )
+		if not os.path.isfile(sel_path):
+			make_sel_bedfile(ofile, sel_path)
+		
+		if os.path.isfile(sel_path) and (not os.path.isfile(sel_path_bb)):
+			make_bb_file(sel_path, sel_path_bb)
+		
+	else:
+		sel_path=None
+		sel_path_bb=None
+		print(ofile)
+		continue
+
+	assert(os.path.isfile(counts_bw)==True)	
+	assert(os.path.isfile(profile_bw)==True)	
+	
+
+	output_json = {}
+	output_json["experiment"] = encode_id[encid]
+	output_json["counts sequence contribution scores bigWig"] = counts_bw
+	output_json["profile sequence contribution scores bigWig"] = profile_bw
+		
+	if os.path.isfile(sel_path_bb):
+		output_json["selected regions for predicted signal and sequence contribution scores bigBed"] = sel_path_bb
+
+	if os.path.isfile(sel_path):
+		output_json["selected regions for predicted signal and sequence contribution scores bed"] = sel_path
+
+	if not os.path.isfile(odir+encode_id[encid]+".json"):
+		f = open(odir+encode_id[encid]+".json", "w")
+		json.dump(output_json, f, indent=4)
+		f.close()
diff --git a/upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/dnase_prepare.py b/upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/dnase_prepare.py
new file mode 100644
index 00000000..f49d31c2
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/counts_bigwig_uploads/chrombpnet/dnase_prepare.py
@@ -0,0 +1,115 @@
+import os
+import json
+import pandas as pd
+import pybedtools
+
+#encids = ["K562", "HEPG2", "IMR90", "H1ESC", "GM12878"]
+#encids = ["K562", "HEPG2"]
+encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+odir='dnase/'
+def make_bb_file(in_bed, out_bb):
+	assert(os.path.isfile("atac_temp.bed")==False)
+	command = "zcat "+in_bed+" | LC_COLLATE=C sort -k1,1 -k2,2n > atac_temp.bed"
+	print(command)
+	os.system(command)
+
+	command = "bedToBigBed atac_temp.bed /oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/reference/chrom.sizes "+out_bb
+	print(command)
+	os.system(command)
+
+
+	command = "rm atac_temp.bed"
+	print(command)
+	os.system(command)
+
+chrs = list(map(str,list(range(1,23))))
+chrs = ['chr'+i for i in chrs]
+chrs = chrs + ['chrX', 'chrY']
+
+def make_sel_bedfile(in_bed, out_bed):
+	input_bed = pd.read_csv(in_bed, sep="\t", header=None)
+	print(input_bed.shape)
+	input_bed = input_bed[input_bed[0].isin(chrs)]
+	print(input_bed.shape)
+
+	input_bed[1] = input_bed[1]+input_bed[9]-500
+	input_bed[2] = input_bed[1]  + 1000
+	print(input_bed.head())
+	x = pybedtools.BedTool.from_dataframe(input_bed[[0,1,2]])
+	x = x.sort().merge()
+	output_bed = x.to_dataframe()
+	print(output_bed.shape)
+	print(output_bed.head())
+	output_bed.to_csv(out_bed, sep='\t', header=False, index=False)
+
+for encid in encids:
+	print(encid)
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.stats"
+	if os.path.isfile(ofile):
+		counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.bw"
+	else:
+		print(ofile)
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.stats"
+		if os.path.isfile(ofile):
+			counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.bw"
+		else:
+			counts_bw = None
+			print(ofile)
+			continue
+
+		
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+	if os.path.isfile(ofile):
+		profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+	else:	
+		print(ofile)
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.stats"
+		if os.path.isfile(ofile):
+			profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.bw"
+		else:
+			profile_bw = None
+			print(ofile)
+
+			continue
+	
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+	if os.path.isfile(ofile):
+		sel_path = os.path.join("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/", encid+"/interpret_upload/average_preds/selected.regions.valid.merged.bed.gz" )
+		sel_path_bb = os.path.join("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/", encid+"/interpret_upload/average_preds/selected.regions.valid.merged.bigBed" )
+		if not os.path.isfile(sel_path):
+			make_sel_bedfile(ofile, sel_path)
+		
+		if os.path.isfile(sel_path) and (not os.path.isfile(sel_path_bb)):
+			make_bb_file(sel_path, sel_path_bb)
+		
+	else:
+		sel_path=None
+		sel_path_bb=None
+		print(ofile)
+		continue
+
+	assert(os.path.isfile(counts_bw)==True)	
+	assert(os.path.isfile(profile_bw)==True)	
+	
+
+	output_json = {}
+	output_json["experiment"] = encode_id[encid]
+	output_json["counts sequence contribution scores bigWig"] = counts_bw
+	output_json["profile sequence contribution scores bigWig"] = profile_bw
+		
+	if os.path.isfile(sel_path_bb):
+		output_json["selected regions for predicted signal and sequence contribution scores bigBed"] = sel_path_bb
+
+	if os.path.isfile(sel_path):
+		output_json["selected regions for predicted signal and sequence contribution scores bed"] = sel_path
+
+	if not os.path.isfile(odir+encode_id[encid]+".json"):
+		f = open(odir+encode_id[encid]+".json", "w")
+		json.dump(output_json, f, indent=4)
+		f.close()
diff --git a/upload_jsons/upload_jsons_scripts/counts_contrib_upload/READMES/counts.deepshap.README b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/READMES/counts.deepshap.README
new file mode 100644
index 00000000..e69de29b
diff --git a/upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar.py b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar.py
new file mode 100644
index 00000000..5196b748
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar.py
@@ -0,0 +1,215 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["IMR90", "H1ESC", "GM12878"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"} 
+odir='atac/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", header=None)
+
+def fetch_per_fold_counts(odir,model_path, encid, i, name):
+
+		model_path_orig=model_path
+		model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.counts.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/ATAC_SE_04.27.2024//chrombpnet_model"
+		
+		# ATAC regions logs
+		
+		model_path=model_path+"/chrombpnet_model"
+		input_log=model_path+"/interpret_dnase/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+		
+			print(input_log)
+		input_log=model_path+"/interpret_dnase/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_dnase/ATAC_peaks_full.counts.interpret.log1.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_dnase/ATAC_peaks_full.counts.interpret.log1.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+
+		# atac regions logs
+		
+		input_log=model_path_orig+"/interpret/merged."+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path_orig+"/interpret/merged."+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		# atac regions logs
+
+
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret/full.counts.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full.counts.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_counts_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["counts sequence contribution scores tar"] = {}
+		readme_file = "READMES/counts.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["counts sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.counts.fold_mean."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.counts_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.counts.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[0]+"/chrombpnet_model/interpret_all/full_"+name+".interpreted_regions_counts.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.counts.interpreted_regions.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_counts(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.counts.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) >= 4)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.bw"
+		else:
+				counts_bw = None
+				print(ofile)
+				
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+		else:   
+				profile_bw = None               
+				print(ofile)
+				continue
+		
+		assert(os.path.isfile(counts_bw)==True) 
+		assert(os.path.isfile(profile_bw)==True)        
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_counts_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR counts tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()
diff --git a/upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar_k5_and_hep.py b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar_k5_and_hep.py
new file mode 100644
index 00000000..30d5f8da
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/atac_tar_k5_and_hep.py
@@ -0,0 +1,193 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["K562", "HEPG2"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"} 
+odir='atac/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", header=None)
+
+def fetch_per_fold_counts(odir,model_path, encid, i, name):
+
+		model_path_orig=model_path
+		model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.counts.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/ATAC_SE_04.27.2024//chrombpnet_model"
+		
+
+		model_path = model_path+"/chrombpnet_model"
+
+		# all regs logs
+		
+		input_log=model_path_orig+"/interpret/merged."+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atacs_regs.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path_orig+"/interpret/merged."+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atacs_regs.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		# atac regs logs
+
+
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atacs_regs.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atacs_regs.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret/full.counts.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atacs_regs.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full.counts.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atacs_regs.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_counts_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["counts sequence contribution scores tar"] = {}
+		readme_file = "READMES/counts.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["counts sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.counts.fold_mean."+encid+".h5"))               
+		else:
+				print(input_h5)
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.counts_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.counts.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				print(modisco_input)
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[1]+"/chrombpnet_model/interpret/full_"+name+".interpreted_regions_counts.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.counts.interpreted_regions.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_counts(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.counts.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) >= 1)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.stats"
+		if os.path.isfile(ofile):
+				counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.bw"
+		else:
+				counts_bw = None
+				print(ofile)
+				
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.bw"
+		else:   
+				profile_bw = None               
+				print(ofile)
+				continue
+		
+		assert(os.path.isfile(counts_bw)==True) 
+		assert(os.path.isfile(profile_bw)==True)        
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_counts_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR counts tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()
diff --git a/upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar.py b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar.py
new file mode 100644
index 00000000..c69a7440
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar.py
@@ -0,0 +1,222 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["IMR90_new", "H1ESC_new", "GM12878_new"]
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+odir='dnase/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2_interpret.csv",sep=",", header=None)
+
+def fetch_per_fold_counts(odir,model_path, encid, i, name):
+
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.counts.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/DNASE_SE_04.27.2024//chrombpnet_model"
+		
+		# dnase regions logs
+		
+		model_path=model_path+"/chrombpnet_model"
+		input_log=model_path+"/interpret_orig/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+		input_log=model_path+"/interpret_orig/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_orig/ATAC_peaks_full.counts.interpret.log1.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_orig/ATAC_peaks_full.counts.interpret.log1.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.dnase_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+
+		# atac regions logs
+		
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/ATAC_peaks_full.counts.interpret.log1.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/ATAC_peaks_full.counts.interpret.log1.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.atac_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+		# ccre regions logs
+
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.ccre_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.ccre_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/full.counts.interpret.log1.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.ccre_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+		input_log=model_path+"/interpret_ccre/full.counts.interpret.log1.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.ccre_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_counts_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["counts sequence contribution scores tar"] = {}
+		readme_file = "READMES/counts.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["counts sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.counts.fold_mean."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.counts_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.counts.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[0]+"/chrombpnet_model/interpret_all_with_ccre/full_"+name+".interpreted_regions_counts.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.counts_scores_new_compressed.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed = input_bed[~(input_bed[0]=="chrM")]
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_counts(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.counts.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) == 12)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores_new_compressed.bw"
+		else:
+				counts_bw = None
+				print(ofile)
+				
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+		else:   
+				profile_bw = None               
+				print(ofile)
+				continue
+		
+		assert(os.path.isfile(counts_bw)==True) 
+		assert(os.path.isfile(profile_bw)==True)        
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_counts_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR counts tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()
diff --git a/upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar_k5_and_hep.py b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar_k5_and_hep.py
new file mode 100644
index 00000000..174a8710
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/counts_contrib_upload/dnase_tar_k5_and_hep.py
@@ -0,0 +1,209 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["K562", "HEPG2"]
+
+encode_id = {"K562": "ENCSR000EOT",
+"HEPG2": "ENCSR149XIL"} 
+odir='dnase/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_dnase.csv",sep=",", header=None)
+
+def fetch_per_fold_counts(odir,model_path, encid, i, name):
+
+		model_path_orig=model_path
+		model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-1]
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.counts.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/ATAC_SE_04.27.2024//chrombpnet_model"
+	
+		# atac regions logs
+
+
+		model_path = model_path+"/chrombpnet_model"
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.ccre_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.ccre_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret_ccre/full.counts.interpret.log1.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.ccre_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/full.counts.interpret.log1.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.all_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+				
+
+		# all regions logs
+		
+		input_log=model_path_orig+"/interpret/merged."+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.all_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+			
+		input_log=model_path_orig+"/interpret/merged."+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.all_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		# atac regions logs
+
+
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.all_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.all_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret/full.counts.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.all_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full.counts.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.counts.all_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_counts_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["counts sequence contribution scores tar"] = {}
+		readme_file = "READMES/counts.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["counts sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_counts_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.counts.fold_mean."+encid+".h5"))               
+		else:
+				print(input_h5)
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.counts_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["counts sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.counts.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				print(modisco_input)
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[1]+"/chrombpnet_model/interpret_all_with_ccre/full_"+name+".interpreted_regions_counts.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.counts.interpreted_regions.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((newf,"logs.seq_contrib.counts.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"].append((input_log, "logs.seq_contrib.counts.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["counts sequence contribution scores tar"]["logs.seq_contrib.counts."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_counts(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["counts sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.counts.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) >= 5)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.stats"
+		if os.path.isfile(ofile):
+				counts_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.counts_scores.bw"
+		else:
+				counts_bw = None
+				print(ofile)
+				
+		
+		assert(os.path.isfile(counts_bw)==True) 
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_counts_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR counts tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/READMEs/bias.training.README b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/READMEs/bias.training.README
new file mode 100644
index 00000000..8faa0ea2
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/READMEs/bias.training.README
@@ -0,0 +1,63 @@
+# Directory Structure Format
+.
+├── peaks.all_input_regions.encid.bed.gz              # Peaks input to the bias training script
+├── logs.bias.training_test_regions.encid             # folder containing log files for peak and nonpeak generation scripts
+│  
+├── fold_0                                            
+│   ├── cv_params.fold_0.json                           # training, validation and test chromosomes used in fold 0
+│   ├── nonpeaks.all_input_regions.fold_0.encid.bed.gz  # Non peaks input to the bias training script
+│   ├── nonpeaks.trainingset.fold_0.encid.bed.gz        # nonpeaks used in training set of fold 0 bias model
+│   ├── nonpeaks.validationset.fold_0.encid.bed.gz      # nonpeaks used in validation set of fold 0 bias model
+│   ├── nonpeaks.testset.fold_0.encid.bed.gz            # nonpeaks used in test set of fold 0 bias model
+│   └── logs.bias.training_test_regions.fold_0.encid    # folder containing log files for training bias model on fold 0
+│
+├── fold_1
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│	
+├── fold_2
+│   └── ...                                           #	similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                           #	similar directory structure as fold_0 directory above
+
+# Bed File Format for Peaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) name - Name given to a region (preferably unique). Use "." if no name is assigned.
+5) score - Indicates how dark the peak will be displayed in the browser (0-1000). If all scores were "'0"' when the data were submitted to the DCC, the DCC assigned scores 1-1000 based on signal value. Ideally the average signalValue per base spread is between 100-1000.
+6) strand - +/- to denote strand or orientation (whenever applicable). Use "." if no orientation is assigned.
+7) signalValue - Measurement of overall (usually, average) enrichment for the region.
+8) pValue - Measurement of statistical significance (-log10). Use -1 if no pValue is assigned.
+9) qValue - Measurement of statistical significance using false discovery rate (-log10). Use -1 if no qValue is assigned.
+10) peak - Point-source called for this peak; 0-based offset from chromStart. Use -1 if no point-source called.
+
+# Bed File Format for Nonpeaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) empty character - "."
+5) empty character - "."
+6) empty character - "."
+7) empty character - "."
+8) empty character - "."
+9) empty character - "."
+10) (chromEnd-chromStart)/2
+
+# Format of file `cv_params.fold_0.json`
+
+A dictionary with following (key,value) pairs,
+
+1) ("CV_type", "chr_holdout")
+2) ("train", list_of_chrs_trainingset)
+3) ("valid", list_of_chrs_validationset)
+4) ("test", list_of_chrs_testset)
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_chrombpnet.csv b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_chrombpnet.csv
new file mode 100644
index 00000000..da180a8d
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_chrombpnet.csv
@@ -0,0 +1,6 @@
+fold_0,K562,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/nautilus_runs/K562_02.17.2022_bias_128_4_1234_0.5_fold_0
+fold_1,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_1_data_type_ATAC_PE
+fold_2,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_2_data_type_ATAC_PE
+fold_3,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_3_data_type_ATAC_PE
+fold_4,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_4_data_type_ATAC_PE
+
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_upload.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_upload.py
index 46c7f0e6..8b534c68 100755
--- a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_upload.py
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_model_upload.py
@@ -12,7 +12,7 @@
 #encids = open("../chromatin_atlas_atac/test_encid.txt").readlines()
 #encids = [line.strip() for line in encids]
 
-model_atac = pd.read_csv("atac_bias_model.csv",sep=",", header=None)
+model_atac = pd.read_csv("atac_bias_model_chrombpnet.csv",sep=",", header=None)
 encode_id = {"K562": "ENCSR868FGK"}       
 data_to_bam = {"K562": ["ENCFF077FBI", "ENCFF128WZG", "ENCFF534DCE"]}        
 def main_fetch_preprocessing_files(encid, args_json, bam_ids, name):
@@ -40,7 +40,7 @@ def main_fetch_preprocessing_files(encid, args_json, bam_ids, name):
 def main_fetch_bias_model_files(encid, args_json, models_path):
 	success = False
 	args_json["bias models tar"] = {}
-	readme_file = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/READMES/bias.models.README"
+	readme_file = "READMEs/bias.training.README"
 	assert(os.path.isfile(readme_file))
 	args_json["bias models tar"]["file.paths"] = [(readme_file, "README.md")]
 	#args_json["bias models tar"]["logs.bias.models."+encid] = {"file.paths": None}
@@ -68,7 +68,7 @@ def main_fetch_bias_training_files(encid, args_json, models_path, name):
 	
 	# find the training test regions
 	args_json["bias training and test regions tar"] = {}	
-	readme_file = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/READMES/bias.training_test_regions.README"
+	readme_file = "READMEs/bias.training.README"
 	assert(os.path.isfile(readme_file))
 	args_json["bias training and test regions tar"]["file.paths"] = [(readme_file, "README.md")]
 
@@ -82,7 +82,7 @@ def main_fetch_bias_training_files(encid, args_json, models_path, name):
 		
 	log_paths = upload_utils.bias_fetch_preprocessing_log_files(odir, encid, main_dir, name)
 	args_json["bias training and test regions tar"]["logs.bias.training_test_regions."+encid] = {"file.paths": log_paths}
-	assert(len(log_paths) == 4)
+	assert(len(log_paths) == 3)
 
 	for i in range(5):
 		data_paths, log_paths = upload_utils.fetch_per_fold_training_data_bias(odir, models_path[i], encid, i, main_dir, name)
@@ -90,6 +90,8 @@ def main_fetch_bias_training_files(encid, args_json, models_path, name):
 		args_json["bias training and test regions tar"]["fold_"+str(i)] = {}
 		args_json["bias training and test regions tar"]["fold_"+str(i)]["file.paths"] = data_paths
 		args_json["bias training and test regions tar"]["fold_"+str(i)]["logs.bias.training_test_regions.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		#print(len(data_paths))
+		#print(data_paths)
 		assert(len(data_paths) == 5)
 		assert(len(log_paths) == 2)	
 
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_upload_utils.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_upload_utils.py
index 1b908615..0c79edb4 100755
--- a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_upload_utils.py
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/atac_bias_upload_utils.py
@@ -13,10 +13,10 @@ def bias_fetch_preprocessing_log_files(odir, encid, main_dir, name):
 	# preprocessing, peak-calling
 	
 	# preprocessing log files
-	temp_dir="/oak/stanford/groups/akundaje/projects/chrombpnet/model_inputs/ENCODE_ATAC_downloads/"
-	preprocessing_log = os.path.join(temp_dir, name + "/script.sh")
-	if os.stat(preprocessing_log).st_size != 0:
-			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script_v1.sh"))
+# 	temp_dir="/oak/stanford/groups/akundaje/projects/chrombpnet/model_inputs/ENCODE_ATAC_downloads/"
+# 	preprocessing_log = os.path.join(temp_dir, name + "/script.sh")
+# 	if os.stat(preprocessing_log).st_size != 0:
+# 			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script_v1.sh"))
 
 	preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_preprocessing.log")
 	if os.stat(preprocessing_log).st_size != 0:
@@ -43,9 +43,10 @@ def fetch_per_fold_training_data_bias(odir, model_dir, encid, fold_num, main_dir
 	if os.path.isfile(filtered_regions_bed):
 		input_paths.append((filtered_regions_bed,"cv_params.fold_"+str(fold_num)+".json"))
 
-	temp_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/atlas_model_k562_fold_0/"
+	#temp_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/atlas_model_k562_fold_0/"
 	if fold_num==0:
-		filtered_regions_bed = os.path.join(temp_dir, "negatives_data/negatives_with_summit.bed.gz")
+		filtered_regions_bed = os.path.join(main_dir, name+"/negatives_data/negatives_with_summit.bed.gz")
+		#print(filtered_regions_bed)
 		if os.path.isfile(filtered_regions_bed):
 			input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
 	else:
@@ -59,15 +60,15 @@ def fetch_per_fold_training_data_bias(odir, model_dir, encid, fold_num, main_dir
 # 	if os.path.isfile(filtered_regions_bed):
 # 		input_paths.append((filtered_regions_bed,"peaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
 
-	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias/nonpeaks.trainingset.bed.gz")
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.trainingset.bed.gz")
 	if os.path.isfile(filtered_regions_bed):
 		input_paths.append((filtered_regions_bed,"nonpeaks.trainingset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
 
-	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias/nonpeaks.validationset.bed.gz")
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.validationset.bed.gz")
 	if os.path.isfile(filtered_regions_bed):
 		input_paths.append((filtered_regions_bed,"nonpeaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
 
-	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias/nonpeaks.testset.bed.gz")
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.testset.bed.gz")
 	if os.path.isfile(filtered_regions_bed):
 		input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
 			
@@ -75,7 +76,9 @@ def fetch_per_fold_training_data_bias(odir, model_dir, encid, fold_num, main_dir
 	#print(filtered_regions_bed)
 	
 	if fold_num==0:
+		#negatives_log = os.path.join(temp_dir, name+"/negatives_data/make_background_regions.log")
 		negatives_log = os.path.join(main_dir, name+"/negatives_data/make_background_regions.log")
+
 		if os.stat(negatives_log).st_size != 0:
 			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
 	else:
@@ -85,7 +88,8 @@ def fetch_per_fold_training_data_bias(odir, model_dir, encid, fold_num, main_dir
 
 
 	if fold_num==0:
-		negatives_log = os.path.join(temp_dir, "negatives_data/negatives_compared_with_foreground.png")
+#		negatives_log = os.path.join(temp_dir, "negatives_data/negatives_compared_with_foreground.png")
+		negatives_log = os.path.join(main_dir, name+"/negatives_data/negatives_compared_with_foreground.png")
 		if os.stat(negatives_log).st_size != 0:
 			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
 	else:
@@ -125,9 +129,11 @@ def fetch_per_fold_bias_models(odir, model_dir, encid, fold_num):
 	#### fetch model training log files ########
 	
 	modelling_log = os.path.join(model_dir, "bias_model/train_bias_model.log")
-	if os.stat(modelling_log).st_size != 0:
-		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
-
+	if os.path.exists(modelling_log)
+		if os.stat(modelling_log).st_size != 0:
+			log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+	else:
+		print(modelling_log)
 	modelling_log = os.path.join(model_dir, "bias_model/bias.args.json")
 	if os.stat(modelling_log).st_size != 0:
 		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".args.json"))
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.models.README b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.models.README
new file mode 100644
index 00000000..315b971b
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.models.README
@@ -0,0 +1,92 @@
+# Directory Structure Format
+.
+├── fold_0
+│   ├── model.bias.fold_0.encid.h5           # bias model in .h5 format
+│   ├── model.bias.fold_0.encid.h5           # bias model in SavedModel format
+│   │                                          after being untarred, it results in a directory named "bias"
+│   └── logs.bias.models.fold_0.encid        # folder containing log files for training models 
+│
+├── fold_1
+│   └── ...                                  # similar directory structure as fold_0 directory above
+│
+├── fold_2
+│   └── ...                                  # similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                  # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                  # similar directory structure as fold_0 directory above
+
+
+# Pseudocode for loading models in .h5 format 
+
+(1) Use the code in python after appropriately defining `model_in_h5_format` and `inputs`. 
+(2) `inputs` is a one hot encoded sequence of shape (N,2114,4). Here N corresponds to the 
+number of tested sequences, 2114 is the input sequence length and 4 corresponds to [A,C,G,T].
+
+```
+import tensorflow as tf
+from tensorflow.keras.utils import get_custom_objects
+from tensorflow.keras.models import load_model
+
+custom_objects={"tf": tf}
+get_custom_objects().update(custom_objects)
+
+model=load_model(model_in_h5_format,compile=False)
+outputs = model(inputs)
+```
+
+The list `outputs` consists of two elements. The first element has a shape of (N, 1000) and
+contains logit predictions for a 1000-base-pair output. The second element, with a shape of
+(N, 1), contains logcount predictions. To transform these predictions into per-base signals, 
+follow the provided pseudo code lines below.
+
+```
+import numpy as np
+
+def softmax(x, temp=1):
+    norm_x = x - np.mean(x,axis=1, keepdims=True)
+    return np.exp(temp*norm_x)/np.sum(np.exp(temp*norm_x), axis=1, keepdims=True)
+    
+predictions = softmax(outputs[0]) * (np.exp(outputs[1])-1)
+```
+
+# Pseudocode for loading models in .tar format
+
+(1) First untar the directory as follows `tar -xvf model.tar`
+(2) Use the code below in python after appropriately defining `model_dir_untared` and `inputs`
+(3) `inputs` is a one hot encoded sequence of shape (N,2114,4). Here N corresponds to the number
+of tested sequences, 2114 is the input sequence length and 4 corresponds to ACGT.
+
+Reference: https://www.tensorflow.org/api_docs/python/tf/saved_model/load
+
+```
+import tensorflow as tf
+
+model = tf.saved_model.load('model_dir_untared')
+outputs = model.signatures['serving_default'](**{'sequence':inputs.astype('float32')})
+```
+
+The variable `outputs` represents a dictionary containing two key-value pairs. The first key
+is `logits_profile_predictions`, holding a value with a shape of (N, 1000). This value corresponds
+to logit predictions for a 1000-base-pair output. The second key, named `logcount_predictions``, 
+is associated with a value of shape (N, 1), representing logcount predictions. To transform these
+predictions into per-base signals, utilize the provided pseudo code lines mentioned below.
+
+```
+import numpy as np
+def softmax(x, temp=1):
+    norm_x = x - np.mean(x,axis=1, keepdims=True)
+    return np.exp(temp*norm_x)/np.sum(np.exp(temp*norm_x), axis=1, keepdims=True)
+    
+predictions = softmax(outputs["logits_profile_predictions"]) * (np.exp(outputs["logcount_predictions"])-1)
+```
+
+# Docker image to load and use the models
+
+https://hub.docker.com/r/kundajelab/chrombpnet-atlas/ (tag:v1)
+
+# Tool box to do downstream analysis with the models
+
+https://github.com/kundajelab/chrombpnet/wiki
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.training.README b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.training.README
new file mode 100644
index 00000000..8faa0ea2
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/bias.training.README
@@ -0,0 +1,63 @@
+# Directory Structure Format
+.
+├── peaks.all_input_regions.encid.bed.gz              # Peaks input to the bias training script
+├── logs.bias.training_test_regions.encid             # folder containing log files for peak and nonpeak generation scripts
+│  
+├── fold_0                                            
+│   ├── cv_params.fold_0.json                           # training, validation and test chromosomes used in fold 0
+│   ├── nonpeaks.all_input_regions.fold_0.encid.bed.gz  # Non peaks input to the bias training script
+│   ├── nonpeaks.trainingset.fold_0.encid.bed.gz        # nonpeaks used in training set of fold 0 bias model
+│   ├── nonpeaks.validationset.fold_0.encid.bed.gz      # nonpeaks used in validation set of fold 0 bias model
+│   ├── nonpeaks.testset.fold_0.encid.bed.gz            # nonpeaks used in test set of fold 0 bias model
+│   └── logs.bias.training_test_regions.fold_0.encid    # folder containing log files for training bias model on fold 0
+│
+├── fold_1
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│	
+├── fold_2
+│   └── ...                                           #	similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                           #	similar directory structure as fold_0 directory above
+
+# Bed File Format for Peaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) name - Name given to a region (preferably unique). Use "." if no name is assigned.
+5) score - Indicates how dark the peak will be displayed in the browser (0-1000). If all scores were "'0"' when the data were submitted to the DCC, the DCC assigned scores 1-1000 based on signal value. Ideally the average signalValue per base spread is between 100-1000.
+6) strand - +/- to denote strand or orientation (whenever applicable). Use "." if no orientation is assigned.
+7) signalValue - Measurement of overall (usually, average) enrichment for the region.
+8) pValue - Measurement of statistical significance (-log10). Use -1 if no pValue is assigned.
+9) qValue - Measurement of statistical significance using false discovery rate (-log10). Use -1 if no qValue is assigned.
+10) peak - Point-source called for this peak; 0-based offset from chromStart. Use -1 if no point-source called.
+
+# Bed File Format for Nonpeaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) empty character - "."
+5) empty character - "."
+6) empty character - "."
+7) empty character - "."
+8) empty character - "."
+9) empty character - "."
+10) (chromEnd-chromStart)/2
+
+# Format of file `cv_params.fold_0.json`
+
+A dictionary with following (key,value) pairs,
+
+1) ("CV_type", "chr_holdout")
+2) ("train", list_of_chrs_trainingset)
+3) ("valid", list_of_chrs_validationset)
+4) ("test", list_of_chrs_testset)
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/models.README b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/models.README
new file mode 100644
index 00000000..90a59aa1
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/models.README
@@ -0,0 +1,98 @@
+# Directory Structure Format
+.
+├── fold_0
+│   ├── model.chrombpnet.fold_0.encid.h5            # full chrombpnet model that combines both bias and corrected model in .h5 format
+│   ├── model.chrombpnet_nobias.fold_0.encid.h5     # bias-corrected accessibility model in .h5 format (Use for all biological discovery)
+│   ├── model.bias_scaled.fold_0.encid.h5           # bias model in .h5 format
+│   ├── model.chrombpnet.fold_0.encid.tar           # full chrombpnet model that combines both bias and  corrected model in SavedModel format.
+│   │                                                 after being untarred, it results in a directory named "chrombpnet".
+│   ├── model.chrombpnet_nobias.fold_0.encid.tar    # bias-corrected accessibility model in SavedModel format (Use for all biological discovery).
+│   │                                                 after being untarred, it results in a directory named "chrombpnet_wo_bias".
+│   ├── model.bias_scaled.fold_0.encid.h5           # bias model in SavedModel format
+│   │                                                 after being untarred, it results in a directory named "bias_model_scaled".
+│   └── logs.models.fold_0.encid                    # folder containing log files for training models 
+│
+├── fold_1
+│   └── ...                                         # similar directory structure as fold_0 directory above
+│
+├── fold_2
+│   └── ...                                         # similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                         # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                         # similar directory structure as fold_0 directory above
+
+
+# Pseudocode for loading models in .h5 format 
+
+(1) Use the code in python after appropriately defining `model_in_h5_format` and `inputs`. 
+(2) `inputs` is a one hot encoded sequence of shape (N,2114,4). Here N corresponds to the 
+number of tested sequences, 2114 is the input sequence length and 4 corresponds to [A,C,G,T].
+
+```
+import tensorflow as tf
+from tensorflow.keras.utils import get_custom_objects
+from tensorflow.keras.models import load_model
+
+custom_objects={"tf": tf}
+get_custom_objects().update(custom_objects)
+
+model=load_model(model_in_h5_format,compile=False)
+outputs = model(inputs)
+```
+
+The list `outputs` consists of two elements. The first element has a shape of (N, 1000) and
+contains logit predictions for a 1000-base-pair output. The second element, with a shape of
+(N, 1), contains logcount predictions. To transform these predictions into per-base signals, 
+follow the provided pseudo code lines below.
+
+```
+import numpy as np
+
+def softmax(x, temp=1):
+    norm_x = x - np.mean(x,axis=1, keepdims=True)
+    return np.exp(temp*norm_x)/np.sum(np.exp(temp*norm_x), axis=1, keepdims=True)
+    
+predictions = softmax(outputs[0]) * (np.exp(outputs[1])-1)
+```
+
+# Pseudocode for loading models in .tar format
+
+(1) First untar the directory as follows `tar -xvf model.tar`
+(2) Use the code below in python after appropriately defining `model_dir_untared` and `inputs`
+(3) `inputs` is a one hot encoded sequence of shape (N,2114,4). Here N corresponds to the number
+of tested sequences, 2114 is the input sequence length and 4 corresponds to ACGT.
+
+Reference: https://www.tensorflow.org/api_docs/python/tf/saved_model/load
+
+```
+import tensorflow as tf
+
+model = tf.saved_model.load('model_dir_untared')
+outputs = model.signatures['serving_default'](**{'sequence':inputs.astype('float32')})
+```
+
+The variable `outputs` represents a dictionary containing two key-value pairs. The first key
+is `logits_profile_predictions`, holding a value with a shape of (N, 1000). This value corresponds
+to logit predictions for a 1000-base-pair output. The second key, named `logcount_predictions``, 
+is associated with a value of shape (N, 1), representing logcount predictions. To transform these
+predictions into per-base signals, utilize the provided pseudo code lines mentioned below.
+
+```
+import numpy as np
+def softmax(x, temp=1):
+    norm_x = x - np.mean(x,axis=1, keepdims=True)
+    return np.exp(temp*norm_x)/np.sum(np.exp(temp*norm_x), axis=1, keepdims=True)
+    
+predictions = softmax(outputs["logits_profile_predictions"]) * (np.exp(outputs["logcount_predictions"])-1)
+```)
+
+# Docker image to load and use the models
+
+https://hub.docker.com/r/kundajelab/chrombpnet-atlas/ (tag:v1)
+
+# Tool box to do downstream analysis with the models
+
+https://github.com/kundajelab/chrombpnet/wiki
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/training.README b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/training.README
new file mode 100644
index 00000000..56f8d835
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/READMEs/training.README
@@ -0,0 +1,66 @@
+# Directory Structure Format
+.
+├── peaks.all_input_regions.encid.bed.gz              # Peaks input to the chrombpnet training script
+├── nonpeaks.all_input_regions.encid.bed.gz           # Non peaks input to the chrombpnet training script
+├── logs.training_test_regions.encid                  # folder containing log files for peak and nonpeak generation scripts
+│  
+├── fold_0                                            
+│   ├── cv_params.fold_0.json                         # training, validation and test chromosomes used in fold 0
+│   ├── peaks.trainingset.fold_0.encid.bed.gz         # peaks used in training set of fold 0 model
+│   ├── nonpeaks.trainingset.fold_0.encid.bed.gz      # nonpeaks used in training set of fold 0 model
+│   ├── peaks.validationset.fold_0.encid.bed.gz       # peaks used in validation set of fold 0 model
+│   ├── nonpeaks.validationset.fold_0.encid.bed.gz    # nonpeaks used in validation set of fold 0 model
+│   ├── peaks.testset.fold_0.encid.bed.gz             # peaks used in test set of fold 0 model
+│   ├── nonpeaks.testset.fold_0.encid.bed.gz          # nonpeaks used in test set of fold 0 model
+│   └── logs.training_test_regions.fold_0.encid       # folder containing log files for training chrombpnet model on fold 0
+│
+├── fold_1
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│	
+├── fold_2
+│   └── ...                                           #	similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                           #	similar directory structure as fold_0 directory above
+
+# Bed File Format for Peaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) name - Name given to a region (preferably unique). Use "." if no name is assigned.
+5) score - Indicates how dark the peak will be displayed in the browser (0-1000). If all scores were "'0"' when the data were submitted to the DCC, the DCC assigned scores 1-1000 based on signal value. Ideally the average signalValue per base spread is between 100-1000.
+6) strand - +/- to denote strand or orientation (whenever applicable). Use "." if no orientation is assigned.
+7) signalValue - Measurement of overall (usually, average) enrichment for the region.
+8) pValue - Measurement of statistical significance (-log10). Use -1 if no pValue is assigned.
+9) qValue - Measurement of statistical significance using false discovery rate (-log10). Use -1 if no qValue is assigned.
+10) peak - Point-source called for this peak; 0-based offset from chromStart. Use -1 if no point-source called.
+
+# Bed File Format for Nonpeaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) empty character - "."
+5) empty character - "."
+6) empty character - "."
+7) empty character - "."
+8) empty character - "."
+9) empty character - "."
+10) midpoint - (chromEnd-chromStart)/2
+
+# Format of file `cv_params.fold_0.json`
+
+A dictionary with following (key,value) pairs,
+
+1) ("CV_type", "chr_holdout")
+2) ("train", list_of_chrs_trainingset)
+3) ("valid", list_of_chrs_validationset)
+4) ("test", list_of_chrs_testset)
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_chrombpnet.csv b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_chrombpnet.csv
new file mode 100644
index 00000000..15190cf2
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_chrombpnet.csv
@@ -0,0 +1,26 @@
+fold_0,GM12878,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/GM12878/nautilus_runs/GM12878_03.01.2022_bias_128_4_1234_0.4_fold_0
+fold_1,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.08.2022_bias_128_4_1234_0.4_fold_1_data_type_ATAC_PE
+fold_2,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.08.2022_bias_128_4_1234_0.4_fold_2_data_type_ATAC_PE
+fold_3,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.14.2022_bias_128_4_1234_0.4_fold_3_data_type_ATAC_PE
+fold_4,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.07.2022_bias_128_4_1234_0.4_fold_4_data_type_ATAC_PE
+fold_0,K562,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/nautilus_runs/K562_02.17.2022_bias_128_4_1234_0.5_fold_0
+fold_1,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_1_data_type_ATAC_PE
+fold_2,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_2_data_type_ATAC_PE
+fold_3,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_3_data_type_ATAC_PE
+fold_4,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_4_data_type_ATAC_PE
+fold_0,HEPG2,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/HEPG2/nautilus_runs_jun16/HEPG2_05.09.2022_bias_128_4_1234_0.8_fold_0
+fold_1,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_06.07.2022_bias_128_4_1234_0.8_fold_1
+fold_2,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_05.24.2022_bias_128_4_1234_0.8_fold_2
+fold_3,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_05.22.2022_bias_128_4_1234_0.8_fold_3
+fold_4,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_05.22.2022_bias_128_4_1234_0.8_fold_4
+fold_0,IMR90,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/IMR90/nautilus_runs_apr12/IMR90_04.09.2022_bias_128_4_1234_0.4_fold_0
+fold_1,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.17.2022_bias_128_4_1234_0.3_fold_1_data_type_ATAC_PE
+fold_2,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.17.2022_bias_128_4_1234_0.3_fold_2_data_type_ATAC_PE
+fold_3,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.08.2022_bias_128_4_1234_0.4_fold_3_data_type_ATAC_PE
+fold_4,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.07.2022_bias_128_4_1234_0.4_fold_4_data_type_ATAC_PE
+fold_0,H1ESC,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/H1ESC/nautilus_runs_jun16/H1ESC_05.09.2022_bias_128_4_1234_0.8_fold_0
+fold_1,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.23.2022_bias_128_4_1234_0.7_fold_1_data_type_ATAC_PE
+fold_2,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.17.2022_bias_128_4_1234_0.8_fold_2_data_type_ATAC_PE
+fold_3,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.17.2022_bias_128_4_1234_0.8_fold_3_data_type_ATAC_PE
+fold_4,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.17.2022_bias_128_4_1234_0.8_fold_4_data_type_ATAC_PE
+
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_upload.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_upload.py
new file mode 100644
index 00000000..d2e9a145
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_model_upload.py
@@ -0,0 +1,260 @@
+import os
+import atac_bias_upload_utils as upload_utils
+import json
+import pandas as pd
+import model_upload_utils
+
+odir = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/ATAC/"
+#output_dir =  "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022-uploads/jsons/ATAC/stage1/jul_17_2023/"
+main_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/"
+output_dir = "atac_production_uploads/"
+
+encids = os.listdir(odir)
+#encids = open("../chromatin_atlas_atac/test_encid.txt").readlines()
+#encids = [line.strip() for line in encids]
+
+model_atac = pd.read_csv("atac_bias_model_chrombpnet.csv",sep=",", header=None)
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"}     
+  
+data_to_bam = {"K562": ["ENCFF077FBI", "ENCFF128WZG", "ENCFF534DCE"],
+"GM12878": ["ENCFF440GRZ", "ENCFF981FXV", "ENCFF962FMH"],
+"HEPG2": ["ENCFF624SON", "ENCFF926KFU", "ENCFF990VCP"],
+"IMR90": ["ENCFF848XMR", "ENCFF715NAV"],
+"H1ESC": ["GSM8260976", "GSM8260977"]
+} 
+
+def main_fetch_training_files(encid, args_json, model_paths, name):
+	success = False
+	
+	# find the training test regions
+	args_json["training and test regions tar"] = {}	
+	readme_file = "READMEs/training.README"
+	assert(os.path.isfile(readme_file))
+	args_json["training and test regions tar"]["file.paths"] = [(readme_file, "README.md")]
+
+	input_peaks = os.path.join(main_dir, name + "/data/peaks_no_blacklist.bed.gz")
+	if os.path.isfile(input_peaks):
+		args_json["training and test regions tar"]["file.paths"].append((input_peaks,"peaks.all_input_regions."+encid+".bed.gz"))		
+	else:
+		success = False
+		return success, args_json
+		
+	log_paths = model_upload_utils.fetch_preprocessing_log_files(odir,encid,main_dir, name)
+	args_json["training and test regions tar"]["logs.training_test_regions."+encid] = {"file.paths": log_paths}
+	assert(len(log_paths) == 3)		
+
+	for i in range(5):
+		data_paths, log_paths = model_upload_utils.fetch_per_fold_training_data(odir,model_paths[i], encid, i, main_dir, name)
+
+		args_json["training and test regions tar"]["fold_"+str(i)] = {}
+		args_json["training and test regions tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["training and test regions tar"]["fold_"+str(i)]["logs.training_test_regions.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		print(len(data_paths))
+		assert(len(data_paths) == 8)
+		assert(len(log_paths) == 2)		
+
+		if len(data_paths) != 8:
+			success = False
+			return success, args_json
+	
+	success = True
+	return success, args_json
+	
+def main_fetch_preprocessing_files_for_k562(encid, args_json, bam_ids, name):
+	# define bam_ids, name
+	
+	success_flag = False
+	
+	args_json["upload bias"] = False
+	#args_json["bias model encid"] = encid 
+
+	# find the bams input
+	preprocessing_path = "/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/"+name+"/data/"+name+"_unstranded.bw"
+
+	if os.path.isfile(preprocessing_path):	
+		args_json["experiment"] = encid
+		args_json["bam files"] = bam_ids
+		args_json["assay"] = "ATAC-seq"
+		success = True
+	else:
+		success = False
+	
+	return	success, args_json
+	
+def main_fetch_preprocessing_files(encid, args_json, bam_ids, name):
+	# define bam_ids, name
+	
+	success_flag = False
+	
+	args_json["upload bias"] = True
+	#args_json["bias model encid"] = encid 
+
+	# find the bams input
+	preprocessing_path = "/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/"+name+"/data/"+name+"_unstranded.bw"
+
+	if os.path.isfile(preprocessing_path):	
+		args_json["experiment"] = encid
+		args_json["bam files"] = bam_ids
+		args_json["assay"] = "ATAC-seq"
+		args_json["observed signal profile bigWig"] = preprocessing_path
+		success = True
+	else:
+		success = False
+	
+	return	success, args_json
+
+def main_fetch_model_files(encid, args_json, model_paths, name):
+	success = False
+	args_json["models tar"] = {}
+	readme_file = "READMEs/models.README"
+	assert(os.path.isfile(readme_file))
+	args_json["models tar"]["file.paths"] = [(readme_file, "README.md")]
+	args_json["models tar"]["logs.models."+encid] = {"file.paths": None}
+
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = model_upload_utils.fetch_per_fold_models(odir,model_paths[i], encid, i)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["models tar"]["fold_"+str(i)] = {}
+		args_json["models tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["models tar"]["fold_"+str(i)]["logs.models.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		assert(len(data_paths) == 6)
+		print(len(log_paths))
+		assert(len(log_paths) >= 6)
+				
+	success=True
+	return success, args_json
+	
+def main_fetch_bias_model_files(encid, args_json, models_path):
+	success = False
+	args_json["bias models tar"] = {}
+	readme_file = "READMEs/bias.models.README" 
+	assert(os.path.isfile(readme_file))
+	args_json["bias models tar"]["file.paths"] = [(readme_file, "README.md")]
+	#args_json["bias models tar"]["logs.bias.models."+encid] = {"file.paths": None}
+
+	for i in range(5):
+		data_paths, log_paths = upload_utils.fetch_per_fold_bias_models(odir, models_path[i], encid, i)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["bias models tar"]["fold_"+str(i)] = {}
+		args_json["bias models tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias models tar"]["fold_"+str(i)]["logs.bias.models.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		# 9 log file expected per model
+		print(len(log_paths))
+		assert(len(log_paths) >= 2)	
+		assert(len(data_paths) == 2)		
+	success=True
+	return success, args_json
+	
+def main_fetch_bias_training_files(encid, args_json, models_path, name):
+	success = False
+	
+	# find the training test regions
+	args_json["bias training and test regions tar"] = {}	
+	readme_file = "READMEs/bias.training.README"
+	assert(os.path.isfile(readme_file))
+	args_json["bias training and test regions tar"]["file.paths"] = [(readme_file, "README.md")]
+
+	input_peaks = os.path.join(main_dir, name + "/data/peaks_no_blacklist.bed.gz")
+	print(input_peaks)
+	if os.path.isfile(input_peaks):
+		args_json["bias training and test regions tar"]["file.paths"].append((input_peaks,"peaks.all_input_regions."+encid+".bed.gz"))		
+	else:
+		success = False
+		return success, args_json
+		
+	log_paths = upload_utils.bias_fetch_preprocessing_log_files(odir, encid, main_dir, name)
+	args_json["bias training and test regions tar"]["logs.bias.training_test_regions."+encid] = {"file.paths": log_paths}
+	assert(len(log_paths) == 3)
+
+	for i in range(5):
+		data_paths, log_paths = upload_utils.fetch_per_fold_training_data_bias(odir, models_path[i], encid, i, main_dir, name)
+
+		args_json["bias training and test regions tar"]["fold_"+str(i)] = {}
+		args_json["bias training and test regions tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias training and test regions tar"]["fold_"+str(i)]["logs.bias.training_test_regions.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		#print(len(data_paths))
+		#print(data_paths)
+		assert(len(data_paths) == 5)
+		assert(len(log_paths) == 2)	
+
+		#if len(data_paths) != 3:
+		#	success = False
+		#	return success, args_json
+	
+	success = True
+	return success, args_json
+	
+	
+		
+if __name__ == "__main__":
+
+	for name in ["K562", "GM12878", "HEPG2", "IMR90", "H1ESC"]:
+		
+		encid=encode_id[name]
+		model_paths = model_atac[model_atac[1]==name][2].values
+		print(model_paths)
+		
+		if os.path.isfile(output_dir+"/"+encid+".json"):
+			continue
+		
+		print(encid)
+
+		args_json = {}
+		
+		success, args_json = main_fetch_preprocessing_files(encid, args_json, data_to_bam[name], name)
+		if not success:
+			print("ERR prep")
+			continue
+
+		success, args_json = main_fetch_bias_training_files(encid, args_json, model_paths, name)
+		if not success:
+			print("ERR bias prep")
+			continue
+		
+		success, args_json = main_fetch_bias_model_files(encid, args_json, model_paths)
+		if not success:
+			print("ERR bias models")
+			continue
+		
+		if name == "K562":
+			with open(output_dir+"/"+encid+"_bias.json", "w") as outfile:
+				json.dump(args_json, outfile, indent=4)
+			
+			args_json = {}
+			main_fetch_preprocessing_files_for_k562(encid, args_json, data_to_bam[name], name)
+			if not success:
+				print("ERR prep")
+				continue
+				
+		success, args_json = main_fetch_model_files(encid, args_json, model_paths, name)
+		if not success:
+			print("fail model")
+			continue
+		
+		success, args_json = main_fetch_training_files(encid, args_json, model_paths, name)
+		if not success:
+			print("fail train prep")
+			continue
+
+	
+		with open(output_dir+"/"+encid+".json", "w") as outfile:
+			json.dump(args_json, outfile, indent=4)
+	
+	#print(args_json)
+
+	
+	
+		
+		
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_upload_utils.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_upload_utils.py
new file mode 100644
index 00000000..5f7d236d
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/atac_bias_upload_utils.py
@@ -0,0 +1,174 @@
+import os
+import json
+import numpy as np
+
+### utils for preprocessing
+
+		
+### utils for training and testing regions
+
+def bias_fetch_preprocessing_log_files(odir, encid, main_dir, name):
+	# do bed file checks
+	log_paths = []
+	# preprocessing, peak-calling
+	
+	# preprocessing log files
+# 	temp_dir="/oak/stanford/groups/akundaje/projects/chrombpnet/model_inputs/ENCODE_ATAC_downloads/"
+# 	preprocessing_log = os.path.join(temp_dir, name + "/script.sh")
+# 	if os.stat(preprocessing_log).st_size != 0:
+# 			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script_v1.sh"))
+
+	preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_preprocessing.log")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout.txt"))
+
+	preprocessing_log = os.path.join(main_dir, name + "/data/"+name.lower()+"_atac_fold_0.sh")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script.sh"))
+
+	preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_bias_pwm.png")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".bias_pwm.png"))
+			
+	return log_paths
+	
+
+def fetch_per_fold_training_data_bias(odir, model_dir, encid, fold_num, main_dir, name):
+	input_paths = []
+	log_paths = []
+	
+	#print(model_dir)
+	opath = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/splits_format/"
+	filtered_regions_bed = os.path.join(opath + "/fold_"+str(fold_num)+".json")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"cv_params.fold_"+str(fold_num)+".json"))
+
+	#temp_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/atlas_model_k562_fold_0/"
+	if fold_num==0:
+		filtered_regions_bed = os.path.join(main_dir, name+"/negatives_data/negatives_with_summit.bed.gz")
+		#print(filtered_regions_bed)
+		if os.path.isfile(filtered_regions_bed):
+			input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+	else:
+		filtered_regions_bed = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/negatives_with_summit.bed.gz")
+		if os.path.isfile(filtered_regions_bed):
+			input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+		
+
+# 	filtered_regions_bed = os.path.join(model_dir, "bias_model/train_test_regions/peaks.testset.bed.gz")
+# 	#print(filtered_regions_bed)
+# 	if os.path.isfile(filtered_regions_bed):
+# 		input_paths.append((filtered_regions_bed,"peaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.trainingset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.trainingset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.validationset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.testset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+			
+	#print(input_paths)
+	#print(filtered_regions_bed)
+	
+	if fold_num==0:
+		#negatives_log = os.path.join(temp_dir, name+"/negatives_data/make_background_regions.log")
+		negatives_log = os.path.join(main_dir, name+"/negatives_data/make_background_regions.log")
+
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+	else:
+		negatives_log = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/make_background_regions.log")
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+
+
+	if fold_num==0:
+#		negatives_log = os.path.join(temp_dir, "negatives_data/negatives_compared_with_foreground.png")
+		negatives_log = os.path.join(main_dir, name+"/negatives_data/negatives_compared_with_foreground.png")
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+	else:
+		negatives_log = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/negatives_compared_with_foreground.png")
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+
+# 	negatives_log = os.path.join(odir, encid + "/negatives_data/test/fold_"+str(fold_num)+"."+encid+"_test.log")
+# 	if os.stat(negatives_log).st_size != 0:
+# 		log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+# 
+	# add preprocessing data main_dir
+	
+	return input_paths, log_paths
+
+
+### utils for model uploads
+
+#just need to add log files
+
+def fetch_per_fold_bias_models(odir, model_dir, encid, fold_num):
+	input_paths = []
+	log_paths = []
+
+	bm_model = os.path.join(model_dir, "bias_model/bias.h5")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		return None, None
+			
+	bm_model = os.path.join(model_dir, "bias_model/new_model_formats_vf/bias.tar")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		return None, None
+					
+	#### fetch model training log files ########
+	
+	modelling_log = os.path.join(model_dir, "bias_model/train_bias_model.log")
+	if os.path.exists(modelling_log):
+		if os.stat(modelling_log).st_size != 0:
+			log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+	#else:
+	#	print(modelling_log)
+	modelling_log = os.path.join(model_dir, "bias_model/bias.args.json")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".args.json"))
+	
+	modelling_log = os.path.join(model_dir, "bias_model/bias_data_params.tsv")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_data_params.tsv"))
+	else:
+		modelling_log = os.path.join(model_dir, "bias_model/newgen/bias_data_params.tsv")
+		if os.path.isfile(modelling_log):
+			log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_data_params.tsv"))
+
+	
+	modelling_log = os.path.join(model_dir, "bias_model/bias_model_params.tsv")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_train_params.tsv"))
+	else:
+		modelling_log = os.path.join(model_dir, "bias_model/newgen/bias_model_params.tsv")
+		if os.path.isfile(modelling_log):
+			log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_train_params.tsv"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias.params.json")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_train_params.json"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias.log")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".epoch_loss.csv"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias.log.batch")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".batch_loss.tsv"))
+
+	return input_paths, log_paths
+
+
+			
+	
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_model_chrombpnet.csv b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_model_chrombpnet.csv
new file mode 100644
index 00000000..e69de29b
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_model_upload.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_model_upload.py
new file mode 100644
index 00000000..74940191
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_model_upload.py
@@ -0,0 +1,283 @@
+import os
+import dnase_bias_upload_utils as upload_utils
+import json
+import pandas as pd
+import model_upload_utils
+
+odir = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"
+#output_dir =  "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022-uploads/jsons/DNASE/stage1/jul_17_2023/"
+output_dir = "dnase_production_uploads/"
+
+encids = os.listdir(odir)
+#encids = open("../chromatin_atlas_atac/test_encid.txt").readlines()
+#encids = [line.strip() for line in encids]
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/upload_jsons/upload_scripts/model_dir_dnase_v2.1_bias.csv",sep=",", header=None)
+model_atac_new = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2.1.csv",sep=",", header=None)
+
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90": "ENCSR477RTP",
+        "GM12878": "ENCSR000EMT",
+        "H1ESC": "ENCSR000EMU"}
+        
+data_to_bam = {"HEPG2": ["ENCFF474LSZ", "ENCFF839SPF"],
+        "K562": ["ENCFF205FNC"],
+        "IMR90": ["ENCFF618FFB"],
+        "GM12878": ["ENCFF467CXY", "ENCFF940NSD"],
+        "H1ESC": ["ENCFF733TCL"]}
+    
+def main_fetch_training_files(encid, args_json, model_paths, name):
+	success = False
+	
+	# find the training test regions
+	args_json["training and test regions tar"] = {}	
+	readme_file = "READMEs/bias.models.README"
+	assert(os.path.isfile(readme_file))
+	args_json["training and test regions tar"]["file.paths"] = [(readme_file, "README.md")]
+
+	if name in ["HEPG2", "K562"]:
+		main_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/"
+		input_peaks = os.path.join(main_dir, name + "/data/peaks_no_blacklist.bed.gz")
+	else:
+		main_dir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"
+		input_peaks = os.path.join(odir, encid + "/preprocessing/downloads/peaks.bed.gz")
+
+	if os.path.isfile(input_peaks):
+		args_json["training and test regions tar"]["file.paths"].append((input_peaks,"peaks.all_input_regions."+encid+".bed.gz"))		
+	else:
+		success = False
+		return success, args_json
+		
+	if name in ["H1ESC"]:
+		main_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/"
+
+	log_paths = model_upload_utils.fetch_preprocessing_log_files(odir,encid,main_dir, name)
+	args_json["training and test regions tar"]["logs.training_test_regions."+encid] = {"file.paths": log_paths}
+	assert(len(log_paths) == 3)		
+
+	for i in range(5):
+		data_paths, log_paths = model_upload_utils.fetch_per_fold_training_data(odir,model_paths[i], encid, i, main_dir, name)
+
+		args_json["training and test regions tar"]["fold_"+str(i)] = {}
+		args_json["training and test regions tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["training and test regions tar"]["fold_"+str(i)]["logs.training_test_regions.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		print(len(data_paths))
+		assert(len(data_paths) == 8)
+		assert(len(log_paths) == 2)		
+
+		if len(data_paths) != 8:
+			success = False
+			return success, args_json
+	
+	success = True
+	return success, args_json
+	    
+
+def main_fetch_preprocessing_files(encid, args_json, bam_ids, name):
+	
+	success_flag = False
+	
+	if name == "HEPG2":
+		args_json["upload bias"] = False
+	else:
+		args_json["upload bias"] = True
+	
+	args_json["bias model encid"] = encid 
+
+	# find the bams input
+	preprocessing_path = "/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/"+name+"/data/"+name+"_unstranded.bw"
+	preprocessing_path_oak = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"+encid+"/preprocessing/bigWigs/"+encid+".bigWig"
+	if os.path.isfile(preprocessing_path):	
+		args_json["experiment"] = encid
+		args_json["bam files"] = bam_ids
+		args_json["assay"] = "DNase-seq"
+		args_json["observed signal profile bigWig"] = preprocessing_path
+		success = True
+	elif os.path.isfile(preprocessing_path_oak):
+		args_json["experiment"] = encid
+		args_json["bam files"] = bam_ids
+		args_json["assay"] = "DNase-seq"
+		args_json["observed signal profile bigWig"] = preprocessing_path_oak
+		success = True		
+	else:
+		success = False
+	
+	return	success, args_json
+
+def main_fetch_model_files(encid, args_json, model_paths, name):
+	success = False
+	args_json["models tar"] = {}
+	readme_file = "READMEs/models.README"
+	assert(os.path.isfile(readme_file))
+	args_json["models tar"]["file.paths"] = [(readme_file, "README.md")]
+	args_json["models tar"]["logs.models."+encid] = {"file.paths": None}
+
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = model_upload_utils.fetch_per_fold_models(odir,model_paths[i], encid, i)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["models tar"]["fold_"+str(i)] = {}
+		args_json["models tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["models tar"]["fold_"+str(i)]["logs.models.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		assert(len(data_paths) == 6)
+		print(len(log_paths))
+		assert(len(log_paths) >= 6)
+				
+	success=True
+	return success, args_json
+	
+def main_fetch_bias_model_files(encid, args_json, models_path):
+	success = False
+	args_json["bias models tar"] = {}
+	readme_file = "READMEs/bias.models.README"
+	assert(os.path.isfile(readme_file))
+	args_json["bias models tar"]["file.paths"] = [(readme_file, "README.md")]
+	#args_json["bias models tar"]["logs.bias.models."+encid] = {"file.paths": None}
+
+	for i in range(5):
+		data_paths, log_paths = upload_utils.fetch_per_fold_bias_models(odir, models_path[i], encid, i)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["bias models tar"]["fold_"+str(i)] = {}
+		args_json["bias models tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias models tar"]["fold_"+str(i)]["logs.bias.models.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		#print(log_paths)
+		# 9 log file expected per model
+		#print(len(log_paths))
+		assert(len(log_paths) >= 2)	
+		assert(len(data_paths) == 2)		
+	success=True
+	return success, args_json
+	
+
+def main_fetch_bias_training_files(encid, args_json, models_path, name):
+	success = False
+	
+	# find the training test regions
+	args_json["bias training and test regions tar"] = {}	
+	readme_file = "READMEs/bias.training.README"
+	assert(os.path.isfile(readme_file))
+	args_json["bias training and test regions tar"]["file.paths"] = [(readme_file, "README.md")]
+
+	if name in ["HEPG2", "K562"]:
+		main_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_PE/"
+		input_peaks = os.path.join(main_dir, name + "/data/peaks_no_blacklist.bed.gz")
+	else:
+		main_dir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"
+		input_peaks = os.path.join(odir, encid + "/preprocessing/downloads/peaks.bed.gz")
+
+	#print(input_peaks)
+	if os.path.isfile(input_peaks):
+		args_json["bias training and test regions tar"]["file.paths"].append((input_peaks,"peaks.all_input_regions."+encid+".bed.gz"))		
+	else:
+		success = False
+		return success, args_json
+		
+	# log files preprocessing and peak-calling
+	if name in ["HEPG2", "K562"]:
+		log_paths = upload_utils.bias_fetch_preprocessing_log_files_set_1(odir, encid, main_dir, name)
+		#print(len(log_paths))
+		assert(len(log_paths) == 3)		
+	elif name in ["H1ESC"]:
+		main_dir="/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/"
+		log_paths = upload_utils.bias_fetch_preprocessing_log_files_set_1(odir, encid, main_dir, name)
+		#print(len(log_paths))
+		assert(len(log_paths) == 3)		
+
+	else:
+		log_paths = upload_utils.bias_fetch_preprocessing_log_files_set_2(odir, encid, main_dir, name)
+		assert(len(log_paths) == 8)	
+
+		
+	args_json["bias training and test regions tar"]["logs.bias.training_test_regions."+encid] = {"file.paths": log_paths}
+	
+
+	for i in range(5):
+		data_paths, log_paths = upload_utils.fetch_per_fold_training_data_bias(odir, models_path[i], encid, i, main_dir, name)
+		#print(data_paths)
+		args_json["bias training and test regions tar"]["fold_"+str(i)] = {}
+		args_json["bias training and test regions tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias training and test regions tar"]["fold_"+str(i)]["logs.bias.training_test_regions.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		#print(log_paths)
+		#print(log_paths)
+		#print(data_paths)
+		assert(len(data_paths) == 5)
+		assert(len(log_paths) == 2)	
+
+		#if len(data_paths) != 3:
+		#	success = False
+		#	return success, args_json
+	
+	success = True
+	return success, args_json
+
+	
+		
+if __name__ == "__main__":
+
+	# define readmes specfic to bias model
+	#for name in ["HEPG2", "GM12878", "K562", "IMR90", "H1ESC"]:
+	for name in ["HEPG2", "K562", "H1ESC"]:
+		
+		encid=encode_id[name]
+		model_paths = model_atac[model_atac[1]==name][2].values
+		
+		model_paths_new = model_atac_new[model_atac_new[1]==name][2].values
+
+		print(model_paths)
+		
+		if os.path.isfile(output_dir+"/"+encid+".json"):
+			continue
+		
+		print(encid)
+
+		args_json = {}
+		
+		success, args_json = main_fetch_preprocessing_files(encid, args_json, data_to_bam[name], name)
+		if not success:
+			print("ERR prep")
+			continue
+
+		if name != "HEPG2":
+		
+			success, args_json = main_fetch_bias_training_files(encid, args_json, model_paths, name)
+			if not success:
+				print("ERR bias prep")
+				continue
+			
+			success, args_json = main_fetch_bias_model_files(encid, args_json, model_paths)
+			if not success:
+				print("ERR bias models")
+				continue
+
+		if name == "H1ESC":
+			model_paths = model_paths_new
+			
+		success, args_json = main_fetch_model_files(encid, args_json, model_paths, name)
+		if not success:
+			print("fail model")
+			continue
+		
+		success, args_json = main_fetch_training_files(encid, args_json, model_paths, name)
+		if not success:
+			print("fail train prep")
+			continue
+			
+		
+		with open(output_dir+"/"+encid+".json", "w") as outfile:
+			json.dump(args_json, outfile, indent=4)
+	
+	#print(args_json)
+
+	
+	
+		
+		
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_upload_utils.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_upload_utils.py
new file mode 100644
index 00000000..053b8008
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/dnase_bias_upload_utils.py
@@ -0,0 +1,301 @@
+import os
+import json
+import numpy as np
+
+### utils for preprocessing
+
+		
+### utils for training and testing regions
+
+
+def bias_fetch_preprocessing_log_files_set_1(odir, encid, main_dir, name):
+	# do bed file checks
+	log_paths = []
+	
+	
+	# preprocessing log files
+	preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_preprocessing.log")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout.txt"))
+
+	preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_DNASE_PE.sh")
+	if os.path.isfile(preprocessing_log):
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script.sh"))
+
+	preprocessing_log = os.path.join(main_dir, name + "/data/h1_dnase_fold_0.sh")
+	if os.path.isfile(preprocessing_log):
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script.sh"))
+
+	preprocessing_log = os.path.join(main_dir,  name + "/data/"+name+"_bias_pwm.png")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".bias_pwm.png"))
+
+	# peak-calling-log-files
+# 	tmpdir = "/oak/stanford/groups/akundaje/projects/chrombpnet/model_inputs/DNASE/caper/"
+# 	
+# 	peaks_log = os.path.join(tmpdir, name + "/metadata.json")
+# 	if os.path.isfile(peaks_log):
+# 			log_paths.append((peaks_log,"logfile.peak_calling."+encid+".metadata.json"))
+# 
+# 	peaks_log = os.path.join(tmpdir, name + "/call-reproducibility_overlap/stdout")
+# 	if os.path.isfile(peaks_log):
+# 			log_paths.append((peaks_log,"logfile.peak_calling."+encid+".stdout.txt"))
+# 
+# 	peaks_log = os.path.join(tmpdir, name + "/call-reproducibility_overlap/stderr")
+# 	if os.path.isfile(peaks_log):
+# 			log_paths.append((peaks_log,"logfile.peak_calling."+encid+".stderr.txt"))
+
+	return log_paths
+	
+def bias_fetch_preprocessing_log_files_set_2(odir, encid, main_dir, name):
+	# do bed file checks
+	log_paths = []
+	
+	# preprocessing log files
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/preprocessing.log.e")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stderr.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/preprocessing.log.o")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/"+encid+".log")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout_v1.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/preprocess_"+encid+".log")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout_v2.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/params_file.json")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".params_file.json"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/bigWigs/"+encid+".png")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".bias_pwm.png"))
+
+	# peak-calling-log-files
+	peaks_log = os.path.join(odir, encid + "/peak_calling/log.e")
+	if os.path.isfile(peaks_log):
+			log_paths.append((peaks_log,"logfile.peak_calling."+encid+".stdout_v1.txt"))
+
+	peaks_log = os.path.join(odir, encid + "/peak_calling/log.o")
+	if os.path.isfile(peaks_log):
+			log_paths.append((peaks_log,"logfile.peak_calling."+encid+".stdout_v2.txt"))
+
+	return log_paths
+		
+def fetch_per_fold_training_data_bias(odir, model_dir, encid, fold_num, main_dir, name):
+	input_paths = []
+	log_paths = []
+	
+	#print(model_dir)
+	opath = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/splits_format/"
+	filtered_regions_bed = os.path.join(opath + "/fold_"+str(fold_num)+".json")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"cv_params.fold_"+str(fold_num)+".json"))
+
+	if fold_num==0:
+		print(name)
+		if name in ["HEPG2", "K562", "H1ESC"]:
+			filtered_regions_bed = os.path.join(main_dir, name+"/negatives_data/negatives_with_summit.bed.gz")
+			print(filtered_regions_bed)
+			if os.path.isfile(filtered_regions_bed):
+				input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+		else:
+
+			filtered_regions_bed = os.path.join(main_dir, name+"/data/negatives_data/negatives_with_summit.bed.gz")
+			if os.path.isfile(filtered_regions_bed):
+				input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+		
+			filtered_regions_bed = os.path.join(odir, encid+"/negatives_data/negatives_with_summit.bed.gz")
+			if os.path.isfile(filtered_regions_bed):
+				input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	else:
+		if name in ["HEPG2", "K562", "H1ESC"]:
+			filtered_regions_bed = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/negatives_with_summit.bed.gz")
+			print(filtered_regions_bed)
+			if os.path.isfile(filtered_regions_bed):
+				input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+		else:
+			filtered_regions_bed = os.path.join(main_dir, name+"/data/negatives_data_"+str(fold_num)+"/negatives_with_summit.bed.gz")
+			print(filtered_regions_bed)
+			if os.path.isfile(filtered_regions_bed):
+				input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+		
+
+			filtered_regions_bed = os.path.join(odir, encid+"/negatives_data_"+str(fold_num)+"/negatives_with_summit.bed.gz")
+			if os.path.isfile(filtered_regions_bed):
+				input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	#filtered_regions_bed = os.path.join(model_dir, "bias_model/train_test_regions/peaks.testset.bed.gz")
+	#print(filtered_regions_bed)
+	#if os.path.isfile(filtered_regions_bed):
+	#	input_paths.append((filtered_regions_bed,"peaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.trainingset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.trainingset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.validationset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_bias_may_7_2024/nonpeaks.testset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+			
+	#print(input_paths)
+	#print(filtered_regions_bed)
+	
+	if fold_num==0:
+		if name in ["HEPG2", "K562", "H1ESC"]:
+			negatives_log = os.path.join(main_dir, name+"/negatives_data/make_background_regions.log")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+		else:
+	
+			negatives_log = os.path.join(main_dir, name+"/data/negatives_data/make_background_regions.log")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+
+			negatives_log = os.path.join(odir, encid+"/negatives_data/make_background_regions.log")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+
+			negatives_log = os.path.join(odir, encid+"/negatives_data/gc_matching.log.o")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout_v2.txt"))
+
+	else:
+		if name in ["HEPG2", "K562", "H1ESC"]:
+			negatives_log = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/make_background_regions.log")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+		else:
+			negatives_log = os.path.join(main_dir, name+"/data/negatives_data_"+str(fold_num)+"/make_background_regions.log")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+
+			negatives_log = os.path.join(odir, encid+"/negatives_data_"+str(fold_num)+"/make_background_regions.log")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+
+
+	if fold_num==0:
+		if name in ["HEPG2", "K562", "H1ESC"]:
+			negatives_log = os.path.join(main_dir, name+"/negatives_data/negatives_compared_with_foreground.png")
+			#print(negatives_log)
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+		else:
+			negatives_log = os.path.join(main_dir, name+"/data/negatives_data/negatives_compared_with_foreground.png")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+
+			negatives_log = os.path.join(odir, encid+"/negatives_data/negatives_compared_with_foreground.png")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+
+	else:
+		if name in ["HEPG2", "K562", "H1ESC"]:
+			negatives_log = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/negatives_compared_with_foreground.png")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+		else:
+			negatives_log = os.path.join(main_dir, name+"/data/negatives_data_"+str(fold_num)+"/negatives_compared_with_foreground.png")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+
+			negatives_log = os.path.join(odir, encid+"/negatives_data_"+str(fold_num)+"/negatives_compared_with_foreground.png")
+			if os.path.isfile(negatives_log):
+				if os.stat(negatives_log).st_size != 0:
+					log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+
+#	negatives_log = os.path.join(odir, encid + "/negatives_data/test/fold_"+str(fold_num)+"."+encid+"_test.log")
+#	if os.stat(negatives_log).st_size != 0:
+#		log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+
+	# add preprocessing data main_dir
+	
+	#print(input_paths)
+	return input_paths, log_paths
+
+
+def fetch_per_fold_bias_models(odir, model_dir, encid, fold_num):
+	input_paths = []
+	log_paths = []
+
+	bm_model = os.path.join(model_dir, "bias_model/bias.h5")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		return None, None
+			
+	bm_model = os.path.join(model_dir, "bias_model/new_model_formats_vf/bias.tar")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		return None, None
+					
+	#### fetch model training log files ########
+	
+	modelling_log = os.path.join(model_dir, "bias_model/train_bias_model.log")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias.args.json")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".args.json"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias_data_params.tsv")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_data_params.tsv"))
+	else:
+		modelling_log = os.path.join(model_dir, "bias_model/newgen/bias_data_params.tsv")
+		if os.path.isfile(modelling_log):
+			log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_data_params.tsv"))
+
+	
+	modelling_log = os.path.join(model_dir, "bias_model/bias_model_params.tsv")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_train_params.tsv"))
+	else:
+		modelling_log = os.path.join(model_dir, "bias_model/newgen/bias_model_params.tsv")
+		if os.path.isfile(modelling_log):
+			log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_train_params.tsv"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias.params.json")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".bias_train_params.json"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias.log")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".epoch_loss.csv"))
+
+	modelling_log = os.path.join(model_dir, "bias_model/bias.log.batch")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".batch_loss.tsv"))
+
+	return input_paths, log_paths
+
+
+			
+			
+	
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/get_gc_matched_negatives_test.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/get_gc_matched_negatives_test.py
new file mode 100644
index 00000000..f0024950
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/get_gc_matched_negatives_test.py
@@ -0,0 +1,175 @@
+import argparse
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+import random
+import csv 
+import json
+import sys
+
+def parse_args():
+    parser=argparse.ArgumentParser(description="generate a bed file of non-peak regions that are gc-matched with foreground")
+    parser.add_argument("-c","--candidate_negatives",help="candidate negatives bed file with gc content in 4th column rounded to 2 decimals")
+    parser.add_argument("-f","--foreground_gc_bed", help="regions with their corresponding gc fractions for matching, 4th column has gc content value rounded to 2 decimals")
+    parser.add_argument("-o","--output_prefix", help="gc-matched non-peaks output file name")
+    parser.add_argument("-fl", "--chr_fold_path", type=str, required=True, help="Fold information - dictionary with test,valid and train keys and values with corresponding chromosomes")
+    parser.add_argument("-npr", "--neg_to_pos_ratio_train", type=int, default=1, help="Ratio of negatives to positives to sample for training")
+    return parser.parse_args()
+
+def remap_chrom(chrom, splits_dict):
+    '''
+    Remapping chromosome names - we will not differentiate between the train/valid/tes chromsomes
+    when sampling negatives.
+    '''
+    if chrom in splits_dict["train"]:
+        chrom_mod = "chrom_train"
+    elif chrom in splits_dict["valid"]:
+        chrom_mod = "chrom_valid"
+    elif chrom in splits_dict["test"]:
+        chrom_mod = "chrom_test"
+    else:
+        chrom_mod = "ignore"
+    return chrom_mod
+
+
+def make_gc_dict(candidate_negatives, splits_dict):
+    """
+    Imports the candidate negatives into a dictionary structure.
+    The `key` is the gc content fraction, and the `values` are a list 
+    containing the (chrom,start,end) of a region with the corresponding 
+    gc content fraction.
+    """
+    data=open(candidate_negatives,'r').readlines()
+    gc_dict={}
+    index=0 
+    ignored_chroms = []
+    for line in tqdm(list(data)):
+        line=line.strip('\n') 
+        index+=1
+        tokens=line.split('\t')
+        chrom=tokens[0]
+        gc=float(tokens[-1])
+        start=tokens[1]
+        end=tokens[2]
+        chrom_real=chrom
+        chrom = remap_chrom(chrom, splits_dict)
+        if chrom == "ignore":
+            ignored_chroms.append(chrom_real)
+            continue
+        if chrom not in gc_dict:
+            gc_dict[chrom]={}
+        if gc not in gc_dict[chrom]:
+            gc_dict[chrom][gc]=[(chrom,start,end,chrom_real)]
+        else:
+            gc_dict[chrom][gc].append((chrom,start,end,chrom_real))
+
+    print("Following background chromosomes {}  were ignored since they are not present in the given fold".format(",".join(list(set(ignored_chroms)))))
+    return gc_dict
+
+def scale_gc(cur_gc):
+    """
+    Randomly increase/decrease the gc-fraction value by 0.01
+    """
+    if random.random()>0.5:
+        cur_gc+=0.01
+    else:
+        cur_gc-=0.01
+    cur_gc=round(cur_gc,2)
+    if cur_gc<=0:
+        cur_gc+=0.01
+    if cur_gc>=1:
+        cur_gc-=0.01
+    assert cur_gc >=0
+    assert cur_gc <=1
+    return cur_gc 
+
+def adjust_gc(chrom,cur_gc,negatives,used_negatives):
+    """
+    Function that checks if (1) the given gc fraction value is available
+    in the negative candidates or (2) if the given gc fraction value has 
+    candidates not already sampled. If eitheir of the condition fails we  
+    sample the neighbouring gc_fraction value by randomly scaling with 0.01.
+    """
+    if chrom  not in used_negatives:
+        used_negatives[chrom]={}
+
+    if cur_gc not in used_negatives[chrom]:
+        used_negatives[chrom][cur_gc]=[]
+
+    while (cur_gc not in negatives[chrom]) or (len(used_negatives[chrom][cur_gc])>=len(negatives[chrom][cur_gc])):
+        cur_gc=scale_gc(cur_gc)
+        if cur_gc not in used_negatives[chrom]:
+            used_negatives[chrom][cur_gc]=[]
+    return cur_gc,used_negatives 
+
+        
+    
+if __name__=="__main__":
+
+    args=parse_args()
+
+    splits_dict=json.load(open(args.chr_fold_path))
+
+    negatives=make_gc_dict(args.candidate_negatives, splits_dict)
+    used_negatives=dict()
+    cur_peaks=pd.read_csv(args.foreground_gc_bed,header=None,sep='\t')
+    negatives_bed = []
+    print(len(list(cur_peaks.iterrows())))
+    
+    foreground_gc_vals = []
+    output_gc_vals = []
+    ignored_chroms = []
+    for index,row in tqdm(list(cur_peaks.iterrows())): 
+
+        chrom=row[0]
+        start=row[1]
+        end=row[2]
+        gc_value=row[3]
+
+        chrom_real=chrom
+        chrom = remap_chrom(chrom, splits_dict)
+        if chrom == "ignore":
+            ignored_chroms.append(chrom_real)
+            continue
+
+        if chrom=="chrom_train" or chrom=="chrom_valid":
+            #neg_to_pos_ratio = args.neg_to_pos_ratio_train
+            continue
+        else:
+            neg_to_pos_ratio = 4
+
+        # for every gc value in positive how many negatives to find
+        # we will keep the ratio of positives to negatives in the test set same
+        for rep in range(neg_to_pos_ratio):
+            cur_gc,used_negatives=adjust_gc(chrom,gc_value,negatives,used_negatives)
+            num_candidates=len(negatives[chrom][cur_gc])
+            rand_neg_index=random.randint(0,num_candidates-1)
+            while rand_neg_index in used_negatives[chrom][cur_gc]:
+                cur_gc,used_negatives=adjust_gc(chrom,cur_gc,negatives,used_negatives)
+                num_candidates=len(negatives[chrom][cur_gc])
+                rand_neg_index=random.randint(0,num_candidates-1)
+
+            used_negatives[chrom][cur_gc].append(rand_neg_index)
+            neg_tuple=negatives[chrom][cur_gc][rand_neg_index]
+            neg_chrom=neg_tuple[0]
+            neg_start=neg_tuple[1]
+            neg_end=neg_tuple[2]
+            neg_chrom_real=neg_tuple[3]
+            negatives_bed.append([neg_chrom_real,int(neg_start),int(neg_end), cur_gc]) 
+            output_gc_vals.append(cur_gc)
+            foreground_gc_vals.append(gc_value)       
+  
+    print("Following foreground chromosomes {} were ignored since they are not present in the given fold".format(",".join(list(set(ignored_chroms)))))     
+    negatives_bed = pd.DataFrame(negatives_bed)
+    negatives_bed.to_csv(args.output_prefix+".bed", sep='\t', index=False, header=False, quoting=csv.QUOTE_NONE)
+
+    # checking how far the true distribution of foreground is compared to the backgrounds generated
+    bins = np.linspace(0, 1, 100)
+    plt.hist([output_gc_vals,foreground_gc_vals], bins, density=True, label=['negatives gc distribution', "foreground gc distribution"])
+    plt.xlabel("GC content")
+    plt.ylabel("Density")
+    plt.legend(loc='upper right')
+    plt.savefig(args.output_prefix+"_compared_with_foreground.png")
+
+    
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script.py
new file mode 100644
index 00000000..763f2c12
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script.py
@@ -0,0 +1,26 @@
+import pandas as pd
+import os
+
+model_atac = pd.read_csv("../atac_bias_model_chrombpnet.csv",sep=",", header=None)
+
+
+print(model_atac.head())
+
+for i,r in model_atac.iterrows():
+
+	print(r)
+	if os.path.isfile(os.path.join(r[2], "train_test_regions_may_7_2024/nonpeaks.testset.bed.gz")):
+		try:
+			tdata = pd.read_csv(os.path.join(r[2], "train_test_regions_may_7_2024/nonpeaks.testset.bed.gz"))
+			continue
+		except:
+			pass
+
+	print(os.path.join(r[2], "train_test_regions_may_7_2024/nonpeaks.testset.bed.gz"))
+	if r[0].split("_")[-1] == "0":
+		command = "bash script.sh "+r[2]+" "+r[1]+" "+r[0]
+	else:
+		command = "bash script.sh "+r[2]+" "+r[1]+" "+r[0]+" "+"_"+str(r[0].split("_")[-1])
+
+	print(command)
+	os.system(command)	
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script_dnase.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script_dnase.py
new file mode 100644
index 00000000..8d03cd81
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/run_script_dnase.py
@@ -0,0 +1,36 @@
+import pandas as pd
+import os
+
+#model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/upload_jsons/upload_scripts/model_dir_dnase_v2.1_bias.csv",sep=",", header=None)
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2.1.csv",sep=",", header=None)
+
+
+print(model_atac.head())
+
+for i,r in model_atac.iterrows():
+
+	if r[1] != "H1ESC":
+		continue
+	if r[1] in ["HEPG2", "K562"]:
+		tag="DNASE_PE"
+		mdir=r[1]
+	else:
+		tag="DNASE_SE"
+	#print(r)
+	if os.path.isfile(os.path.join(r[2], "train_test_regions_may_7_2024/nonpeaks.testset.bed.gz")):
+		try:
+			tdata = pd.read_csv(os.path.join(r[2], "train_test_regions_may_7_2024/nonpeaks.testset.bed.gz"))
+			continue
+		except:
+			pass
+
+	print(os.path.join(r[2], "train_test_regions_may_7_2024/nonpeaks.testset.bed.gz"))
+	if r[0].split("_")[-1] == "0":
+		command = "bash script_dnase.sh "+r[2]+" "+r[1]+" "+r[0]+" "+tag
+	else:
+		command = "bash script_dnase.sh "+r[2]+" "+r[1]+" "+r[0]+" "+tag+" "+"_"+str(r[0].split("_")[-1])
+
+	print(command)
+	os.system(command)	
+
+#/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/H1ESC/n
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script.sh b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script.sh
new file mode 100644
index 00000000..66b6e7d4
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script.sh
@@ -0,0 +1,15 @@
+modeldir=$1
+celll=$2
+foldn=/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/splits_format/$3.json
+fold=$4
+
+python get_gc_matched_negatives_test.py \
+	-c /mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/$celll/negatives_data$fold/candidate.negatives.bed \
+	-f /mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/$celll/negatives_data$fold/foreground.gc.bed \
+	-o $modeldir/train_test_regions_may_7_2024/negatives \
+	-fl $foldn		
+
+awk -v OFS="\t" '{print $1, $2, $3, ".",  ".", ".", ".", ".", ".", "1057"}' $modeldir/train_test_regions_may_7_2024/negatives.bed > $modeldir/train_test_regions_may_7_2024/negatives_with_summit.bed
+
+gzip -c  $modeldir/train_test_regions_may_7_2024/negatives_with_summit.bed >  $modeldir/train_test_regions_may_7_2024/nonpeaks.testset.bed.gz
+
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script_dnase.sh b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script_dnase.sh
new file mode 100644
index 00000000..d8df8a0e
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/make_test_negatives/script_dnase.sh
@@ -0,0 +1,16 @@
+modeldir=$1
+celll=$2
+foldn=/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/splits_format/$3.json
+tag=$4
+fold=$5
+
+python get_gc_matched_negatives_test.py \
+	-c /mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/$tag/$celll/negatives_data$fold/candidate.negatives.bed \
+	-f /mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/$tag/$celll/negatives_data$fold/foreground.gc.bed \
+	-o $modeldir/train_test_regions_may_7_2024/negatives \
+	-fl $foldn		
+
+awk -v OFS="\t" '{print $1, $2, $3, ".",  ".", ".", ".", ".", ".", "1057"}' $modeldir/train_test_regions_may_7_2024/negatives.bed > $modeldir/train_test_regions_may_7_2024/negatives_with_summit.bed
+
+gzip -c  $modeldir/train_test_regions_may_7_2024/negatives_with_summit.bed >  $modeldir/train_test_regions_may_7_2024/nonpeaks.testset.bed.gz
+
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/model_upload_utils.py b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/model_upload_utils.py
new file mode 100644
index 00000000..6b5d3447
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/model_upload_utils.py
@@ -0,0 +1,235 @@
+import os
+import json
+import numpy as np
+
+
+### utils for model uploads
+
+def fetch_per_fold_models(odir, model_dir, encid, fold_num):
+	input_paths = []
+	log_paths = []
+	log_paths_opt = []
+	
+	cmb = os.path.join(model_dir, "chrombpnet_model/chrombpnet_wo_bias.h5")
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet_nobias.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		print(cmb)
+		return None, None, None
+
+	cmb = os.path.join(model_dir, "chrombpnet_model/chrombpnet.h5")
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		print(cmb)
+		return None, None, None
+				
+# 	checks_file = os.path.join(model_dir, "new_chrombpnet_model/check_passed.txt")
+# 	if os.path.isfile(checks_file):
+# 		cm_model = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet.h5")
+# 		if os.path.isfile(cm_model):
+# 			input_paths.append((cm_model,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".h5"))
+# 		else:
+# 			print(cm_model)
+# 			return None, None, None
+# 		
+# 		cm_model = os.path.join(odir, encid + "/" + model_dir + "/new_model_formats/chrombpnet.tar")
+# 		if os.path.isfile(cm_model):
+# 			input_paths.append((cm_model,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".tar"))
+# 		else:
+# 			print(cm_model)
+# 			return None, None, None
+# 
+# 		
+# 	else:
+# 		cm_model = os.path.join(odir, encid + "/" + model_dir + "/new_chrombpnet_model/chrombpnet_new.h5")
+# 		if os.path.isfile(cm_model):
+# 			input_paths.append((cm_model,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".h5"))
+# 		else:
+# 			print(cm_model)
+# 			return None, None, None
+# 		
+# 		cm_model = os.path.join(odir, encid + "/" + model_dir + "/new_chrombpnet_model/chrombpnet.tar")
+# 		if os.path.isfile(cm_model):
+# 			input_paths.append((cm_model,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".tar"))
+# 		else:
+# 			print(cm_model)
+# 			return None, None, None
+
+						
+	bm_model = os.path.join(model_dir, "chrombpnet_model/bias_model_scaled.h5")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias_scaled.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		print(cmb)
+		return None, None, None
+
+	cmb = os.path.join(model_dir, "new_model_formats_may_7_24_vf/chrombpnet.tar")
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		print(cmb)
+
+		return None, None, None
+
+	cmb = os.path.join(model_dir, "new_model_formats_may_7_24_vf/chrombpnet_wo_bias.tar")
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet_nobias.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		print(cmb)
+
+		return None, None, None
+		
+			
+	bm_model = os.path.join(model_dir, "new_model_formats_may_7_24_vf/bias_model_scaled.tar")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias_scaled.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		return None, None, None
+		
+	### fetch main logs
+		
+	modelling_log = os.path.join(model_dir, "chrombpnet_model/chrombpnet.args.json")
+	if os.path.isfile(modelling_log):
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".args.json"))
+	else:
+		print(modelling_log)	
+		
+	modelling_log = os.path.join(model_dir, "chrombpnet_model/chrombpnet_data_params.tsv")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".chrombpnet_data_params.tsv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(model_dir, "chrombpnet_model/chrombpnet_model_params.tsv")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".chrombpnet_model_params.tsv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(model_dir, "chrombpnet_model/chrombpnet.params.json")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".chrombpnet.params.json"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(model_dir, "chrombpnet_model/chrombpnet.log")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".epoch_loss.csv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(model_dir, "chrombpnet_model/chrombpnet.log.batch")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".batch_loss.tsv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(model_dir, "chrombpnet_model/train_chrombpnet_model.log")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+	else:
+		print(modelling_log)
+					
+		
+	return input_paths, log_paths, log_paths_opt
+	
+	
+### utils for training and testing regions
+
+def fetch_preprocessing_log_files(odir, encid, main_dir, name):
+	# do bed file checks
+	log_paths = []
+	
+	preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_preprocessing.log")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout.txt"))
+
+	try:
+		preprocessing_log = os.path.join(main_dir, name + "/data/"+name.lower()+"_atac_fold_0.sh")
+		if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script_v2.sh"))
+	except:
+		try:
+			preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_DNASE_PE.sh")
+			if os.stat(preprocessing_log).st_size != 0:
+				log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script_v2.sh"))
+		except:
+			preprocessing_log = os.path.join(main_dir, name + "/data/"+"h1_dnase_fold_0.sh")
+			if os.stat(preprocessing_log).st_size != 0:
+				log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".script_v2.sh"))
+	
+	preprocessing_log = os.path.join(main_dir, name + "/data/"+name+"_bias_pwm.png")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".bias_pwm.png"))
+
+	return log_paths
+	
+def fetch_per_fold_training_data(odir,model_dir,encid, fold_num, main_dir, name):
+	input_paths = []
+	log_paths = []
+	
+	opath = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/splits_format/"
+	filtered_regions_bed = os.path.join(opath + "/fold_"+str(fold_num)+".json")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"cv_params.fold_"+str(fold_num)+".json"))
+
+	if fold_num==0:
+		filtered_regions_bed = os.path.join(main_dir, name+"/negatives_data/negatives_with_summit.bed.gz")
+		if os.path.isfile(filtered_regions_bed):
+			input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+	else:
+		filtered_regions_bed = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/negatives_with_summit.bed.gz")
+		if os.path.isfile(filtered_regions_bed):
+			input_paths.append((filtered_regions_bed,"nonpeaks.all_input_regions.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_may_7_2024/peaks.trainingset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"peaks.trainingset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_may_7_2024/peaks.validationset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"peaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_may_7_2024/peaks.testset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"peaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_may_7_2024/nonpeaks.trainingset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.trainingset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_may_7_2024/nonpeaks.validationset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(model_dir, "train_test_regions_may_7_2024/nonpeaks.testset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+			
+	# preprocessing logs to include
+
+	if fold_num==0:
+		#negatives_log = os.path.join(temp_dir, name+"/negatives_data/make_background_regions.log")
+		negatives_log = os.path.join(main_dir, name+"/negatives_data/make_background_regions.log")
+
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+	else:
+		negatives_log = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/make_background_regions.log")
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+
+
+	if fold_num==0:
+#		negatives_log = os.path.join(temp_dir, "negatives_data/negatives_compared_with_foreground.png")
+		negatives_log = os.path.join(main_dir, name+"/negatives_data/negatives_compared_with_foreground.png")
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+	else:
+		negatives_log = os.path.join(main_dir, name+"/negatives_data_"+str(fold_num)+"/negatives_compared_with_foreground.png")
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching.fold_"+str(fold_num)+"."+encid+".stdout.png"))
+	
+	return input_paths, log_paths
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/temp.sh b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/temp.sh
new file mode 100644
index 00000000..128fc01a
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/bias_models/chrombpnet/temp.sh
@@ -0,0 +1,2 @@
+gzip -c /mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/H1ESC/negatives_data_4/negatives_with_summit.bed > /mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/DNASE_SE/H1ESC/negatives_data_4/negatives_with_summit.bed.gz
+
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.models.README b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.models.README
new file mode 100644
index 00000000..315b971b
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.models.README
@@ -0,0 +1,92 @@
+# Directory Structure Format
+.
+├── fold_0
+│   ├── model.bias.fold_0.encid.h5           # bias model in .h5 format
+│   ├── model.bias.fold_0.encid.h5           # bias model in SavedModel format
+│   │                                          after being untarred, it results in a directory named "bias"
+│   └── logs.bias.models.fold_0.encid        # folder containing log files for training models 
+│
+├── fold_1
+│   └── ...                                  # similar directory structure as fold_0 directory above
+│
+├── fold_2
+│   └── ...                                  # similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                  # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                  # similar directory structure as fold_0 directory above
+
+
+# Pseudocode for loading models in .h5 format 
+
+(1) Use the code in python after appropriately defining `model_in_h5_format` and `inputs`. 
+(2) `inputs` is a one hot encoded sequence of shape (N,2114,4). Here N corresponds to the 
+number of tested sequences, 2114 is the input sequence length and 4 corresponds to [A,C,G,T].
+
+```
+import tensorflow as tf
+from tensorflow.keras.utils import get_custom_objects
+from tensorflow.keras.models import load_model
+
+custom_objects={"tf": tf}
+get_custom_objects().update(custom_objects)
+
+model=load_model(model_in_h5_format,compile=False)
+outputs = model(inputs)
+```
+
+The list `outputs` consists of two elements. The first element has a shape of (N, 1000) and
+contains logit predictions for a 1000-base-pair output. The second element, with a shape of
+(N, 1), contains logcount predictions. To transform these predictions into per-base signals, 
+follow the provided pseudo code lines below.
+
+```
+import numpy as np
+
+def softmax(x, temp=1):
+    norm_x = x - np.mean(x,axis=1, keepdims=True)
+    return np.exp(temp*norm_x)/np.sum(np.exp(temp*norm_x), axis=1, keepdims=True)
+    
+predictions = softmax(outputs[0]) * (np.exp(outputs[1])-1)
+```
+
+# Pseudocode for loading models in .tar format
+
+(1) First untar the directory as follows `tar -xvf model.tar`
+(2) Use the code below in python after appropriately defining `model_dir_untared` and `inputs`
+(3) `inputs` is a one hot encoded sequence of shape (N,2114,4). Here N corresponds to the number
+of tested sequences, 2114 is the input sequence length and 4 corresponds to ACGT.
+
+Reference: https://www.tensorflow.org/api_docs/python/tf/saved_model/load
+
+```
+import tensorflow as tf
+
+model = tf.saved_model.load('model_dir_untared')
+outputs = model.signatures['serving_default'](**{'sequence':inputs.astype('float32')})
+```
+
+The variable `outputs` represents a dictionary containing two key-value pairs. The first key
+is `logits_profile_predictions`, holding a value with a shape of (N, 1000). This value corresponds
+to logit predictions for a 1000-base-pair output. The second key, named `logcount_predictions``, 
+is associated with a value of shape (N, 1), representing logcount predictions. To transform these
+predictions into per-base signals, utilize the provided pseudo code lines mentioned below.
+
+```
+import numpy as np
+def softmax(x, temp=1):
+    norm_x = x - np.mean(x,axis=1, keepdims=True)
+    return np.exp(temp*norm_x)/np.sum(np.exp(temp*norm_x), axis=1, keepdims=True)
+    
+predictions = softmax(outputs["logits_profile_predictions"]) * (np.exp(outputs["logcount_predictions"])-1)
+```
+
+# Docker image to load and use the models
+
+https://hub.docker.com/r/kundajelab/chrombpnet-atlas/ (tag:v1)
+
+# Tool box to do downstream analysis with the models
+
+https://github.com/kundajelab/chrombpnet/wiki
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.training.README b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.training.README
new file mode 100644
index 00000000..8faa0ea2
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/READMEs/bias.training.README
@@ -0,0 +1,63 @@
+# Directory Structure Format
+.
+├── peaks.all_input_regions.encid.bed.gz              # Peaks input to the bias training script
+├── logs.bias.training_test_regions.encid             # folder containing log files for peak and nonpeak generation scripts
+│  
+├── fold_0                                            
+│   ├── cv_params.fold_0.json                           # training, validation and test chromosomes used in fold 0
+│   ├── nonpeaks.all_input_regions.fold_0.encid.bed.gz  # Non peaks input to the bias training script
+│   ├── nonpeaks.trainingset.fold_0.encid.bed.gz        # nonpeaks used in training set of fold 0 bias model
+│   ├── nonpeaks.validationset.fold_0.encid.bed.gz      # nonpeaks used in validation set of fold 0 bias model
+│   ├── nonpeaks.testset.fold_0.encid.bed.gz            # nonpeaks used in test set of fold 0 bias model
+│   └── logs.bias.training_test_regions.fold_0.encid    # folder containing log files for training bias model on fold 0
+│
+├── fold_1
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│	
+├── fold_2
+│   └── ...                                           #	similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                           #	similar directory structure as fold_0 directory above
+
+# Bed File Format for Peaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) name - Name given to a region (preferably unique). Use "." if no name is assigned.
+5) score - Indicates how dark the peak will be displayed in the browser (0-1000). If all scores were "'0"' when the data were submitted to the DCC, the DCC assigned scores 1-1000 based on signal value. Ideally the average signalValue per base spread is between 100-1000.
+6) strand - +/- to denote strand or orientation (whenever applicable). Use "." if no orientation is assigned.
+7) signalValue - Measurement of overall (usually, average) enrichment for the region.
+8) pValue - Measurement of statistical significance (-log10). Use -1 if no pValue is assigned.
+9) qValue - Measurement of statistical significance using false discovery rate (-log10). Use -1 if no qValue is assigned.
+10) peak - Point-source called for this peak; 0-based offset from chromStart. Use -1 if no point-source called.
+
+# Bed File Format for Nonpeaks
+
+* All the bed files are in narrowpeak format with 10 columns. 
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not included in the display of the feature. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
+4) empty character - "."
+5) empty character - "."
+6) empty character - "."
+7) empty character - "."
+8) empty character - "."
+9) empty character - "."
+10) (chromEnd-chromStart)/2
+
+# Format of file `cv_params.fold_0.json`
+
+A dictionary with following (key,value) pairs,
+
+1) ("CV_type", "chr_holdout")
+2) ("train", list_of_chrs_trainingset)
+3) ("valid", list_of_chrs_validationset)
+4) ("test", list_of_chrs_testset)
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_bias_model_chrombpnet.csv b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_bias_model_chrombpnet.csv
new file mode 100644
index 00000000..15190cf2
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_bias_model_chrombpnet.csv
@@ -0,0 +1,26 @@
+fold_0,GM12878,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/GM12878/nautilus_runs/GM12878_03.01.2022_bias_128_4_1234_0.4_fold_0
+fold_1,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.08.2022_bias_128_4_1234_0.4_fold_1_data_type_ATAC_PE
+fold_2,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.08.2022_bias_128_4_1234_0.4_fold_2_data_type_ATAC_PE
+fold_3,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.14.2022_bias_128_4_1234_0.4_fold_3_data_type_ATAC_PE
+fold_4,GM12878,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/GM12878/GM12878_07.07.2022_bias_128_4_1234_0.4_fold_4_data_type_ATAC_PE
+fold_0,K562,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/K562/nautilus_runs/K562_02.17.2022_bias_128_4_1234_0.5_fold_0
+fold_1,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_1_data_type_ATAC_PE
+fold_2,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_2_data_type_ATAC_PE
+fold_3,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_3_data_type_ATAC_PE
+fold_4,K562,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/K562/K562_07.07.2022_bias_128_4_2356_0.5_fold_4_data_type_ATAC_PE
+fold_0,HEPG2,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/HEPG2/nautilus_runs_jun16/HEPG2_05.09.2022_bias_128_4_1234_0.8_fold_0
+fold_1,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_06.07.2022_bias_128_4_1234_0.8_fold_1
+fold_2,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_05.24.2022_bias_128_4_1234_0.8_fold_2
+fold_3,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_05.22.2022_bias_128_4_1234_0.8_fold_3
+fold_4,HEPG2,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/HEPG2/HEPG2_05.22.2022_bias_128_4_1234_0.8_fold_4
+fold_0,IMR90,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/IMR90/nautilus_runs_apr12/IMR90_04.09.2022_bias_128_4_1234_0.4_fold_0
+fold_1,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.17.2022_bias_128_4_1234_0.3_fold_1_data_type_ATAC_PE
+fold_2,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.17.2022_bias_128_4_1234_0.3_fold_2_data_type_ATAC_PE
+fold_3,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.08.2022_bias_128_4_1234_0.4_fold_3_data_type_ATAC_PE
+fold_4,IMR90,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/IMR90/IMR90_07.07.2022_bias_128_4_1234_0.4_fold_4_data_type_ATAC_PE
+fold_0,H1ESC,/mnt/lab_data2/anusri/chrombpnet/results/chrombpnet/ATAC_PE/H1ESC/nautilus_runs_jun16/H1ESC_05.09.2022_bias_128_4_1234_0.8_fold_0
+fold_1,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.23.2022_bias_128_4_1234_0.7_fold_1_data_type_ATAC_PE
+fold_2,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.17.2022_bias_128_4_1234_0.8_fold_2_data_type_ATAC_PE
+fold_3,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.17.2022_bias_128_4_1234_0.8_fold_3_data_type_ATAC_PE
+fold_4,H1ESC,/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/H1ESC/H1ESC_07.17.2022_bias_128_4_1234_0.8_fold_4_data_type_ATAC_PE
+
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_prepare_file_for_upload_models.py b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_prepare_file_for_upload_models.py
new file mode 100644
index 00000000..e23c945c
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/atac_prepare_file_for_upload_models.py
@@ -0,0 +1,159 @@
+import os
+import upload_utils
+import json
+
+odir = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/ATAC/"
+bw_odir = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/full_deepshaps/bigwigs/ATAC/"
+#output_dir =  "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022-uploads/jsons/ATAC/stage1/jul_17_2023/"
+models_path = ["chrombpnet_model_feb15", "chrombpnet_model_feb15_fold_1", "chrombpnet_model_feb15_fold_2", "chrombpnet_model_feb15_fold_3", "chrombpnet_model_feb15_fold_4"]
+output_dir = "atac_production_uploads/"
+#encids = os.listdir(odir)
+encids = open("data/atac_passed.txt").readlines()
+encids = [line.strip() for line in encids]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"}     
+
+def main_fetch_preprocessing_files(encid, args_json):
+
+	success_flag = False
+	args_json["upload bias"] = False
+	args_json["bias model encid"] = encid
+
+	# find the bams input
+	preprocessing_path = os.path.join(odir, encid + "/preprocessing/bigWigs/"+encid+".bigWig")
+	if os.path.isfile(preprocessing_path):
+		bam_ids = upload_utils.fetch_input_bam_ids(odir,encid)
+		
+		if bam_ids == None:
+			success = False
+			return  success_flag, args_json
+			
+		args_json["experiment"] = encid
+		args_json["bam files"] = bam_ids
+		args_json["assay"] = "ATAC-seq"
+		args_json["observed signal profile bigWig"] = preprocessing_path
+		success = True
+	else:
+		success = False
+	
+	return	success, args_json
+
+def main_fetch_model_files(encid, args_json):
+	success = False
+	args_json["models tar"] = {}
+	readme_file = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/READMES/models.README"
+	assert(os.path.isfile(readme_file))
+	args_json["models tar"]["file.paths"] = [(readme_file, "README.md")]
+	args_json["models tar"]["logs.models."+encid] = {"file.paths": None}
+
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = upload_utils.fetch_per_fold_models(odir,models_path[i], encid, i)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["models tar"]["fold_"+str(i)] = {}
+		args_json["models tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["models tar"]["fold_"+str(i)]["logs.models.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		assert(len(data_paths) == 6)
+		assert(len(log_paths) == 13)
+				
+	success=True
+	return success, args_json
+
+def main_fetch_training_files(encid, args_json):
+	success = False
+	
+	# find the training test regions
+	args_json["training and test regions tar"] = {}	
+	readme_file = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/READMES/training_test_regions.README"
+	assert(os.path.isfile(readme_file))
+	args_json["training and test regions tar"]["file.paths"] = [(readme_file, "README.md")]
+
+	input_peaks = os.path.join(odir, encid + "/preprocessing/downloads/peaks.bed.gz")
+	if os.path.isfile(input_peaks):
+		args_json["training and test regions tar"]["file.paths"].append((input_peaks,"peaks.all_input_regions."+encid+".bed.gz"))		
+	else:
+		success = False
+		return success, args_json
+		
+	input_nonpeaks = os.path.join(odir, encid + "/negatives_data/negatives_with_summit.bed")
+	if os.path.isfile(input_nonpeaks):
+		import pandas as pd
+		#os.system("gzip "+input_nonpeaks)
+		nonpeaks_data = pd.read_csv(input_nonpeaks, sep="\t", header=None)
+		nonpeaks_data.to_csv(input_nonpeaks+".gz", sep="\t", header=False, index=False, compression="gzip")
+		#os.system("rm "+input_nonpeaks)
+
+	input_nonpeaks = os.path.join(odir, encid + "/negatives_data/negatives_with_summit.bed.gz")
+	if os.path.isfile(input_nonpeaks):
+		args_json["training and test regions tar"]["file.paths"].append((input_nonpeaks,"nonpeaks.all_input_regions."+encid+".bed.gz"))
+	else:
+		success = False
+		return success, args_json
+
+	log_paths = upload_utils.fetch_preprocessing_log_files(odir,encid)
+	args_json["training and test regions tar"]["logs.training_test_regions."+encid] = {"file.paths": log_paths}
+	assert(len(log_paths) == 12)		
+
+	for i in range(5):
+		data_paths, log_paths = upload_utils.fetch_per_fold_training_data(odir,models_path[i], encid, i)
+
+		args_json["training and test regions tar"]["fold_"+str(i)] = {}
+		args_json["training and test regions tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["training and test regions tar"]["fold_"+str(i)]["logs.training_test_regions.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		assert(len(data_paths) == 7)
+		assert(len(log_paths) == 4)		
+
+		if len(data_paths) != 7:
+			success = False
+			return success, args_json
+	
+	success = True
+	return success, args_json
+	
+		
+if __name__ == "__main__":
+
+
+	for name in ["K562", "GM12878", "HEPG2", "IMR90", "H1ESC"]:
+
+		
+		encid=encode_id[name]
+		if os.path.isfile(output_dir+"/"+encid+".json"):
+			continue
+		
+		print(encid)
+
+		args_json = {}
+		
+		success, args_json = main_fetch_preprocessing_files(encid, args_json)
+		if not success:
+			print("fail prep")
+			continue
+		
+		success, args_json = main_fetch_model_files(encid, args_json)
+		if not success:
+			print("fail model")
+			continue
+
+		success, args_json = main_fetch_training_files(encid, args_json)
+		if not success:
+			print("fail train prep")
+			continue
+
+		
+		with open(output_dir+"/"+encid+".json", "w") as outfile:
+			json.dump(args_json, outfile, indent=4)
+	
+	#print(args_json)
+
+	
+	
+		
+		
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/dnase_prepare_file_for_upload_models.py b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/dnase_prepare_file_for_upload_models.py
new file mode 100644
index 00000000..58521913
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/dnase_prepare_file_for_upload_models.py
@@ -0,0 +1,204 @@
+import os
+import upload_utils
+import json
+
+odir = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"
+bw_odir = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/full_deepshaps/bigwigs/DNASE/"
+#output_dir =  "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022-uploads/jsons/DNASE/stage1/jul_26_2023/"
+output_dir="dnase_production_uploads/"
+
+tissue_encids = open("../data/tissue_passed.txt").readlines()
+tissue_encids = [line.strip() for line in tissue_encids]
+
+primary_encids = open("../data/primary_passed.txt").readlines()
+primary_encids = [line.strip() for line in primary_encids]
+
+celline_encids = open("../data/cellline_passed.txt").readlines()
+celline_encids = [line.strip() for line in celline_encids]
+
+invitro_encids = open("../data/invitro_passed.txt").readlines()
+invitro_encids = [line.strip() for line in invitro_encids]
+
+
+ary_models_path = ["chrombppnet_model_encsr283tme_bias", "chrombppnet_model_encsr283tme_bias_fold_1", "chrombppnet_model_encsr283tme_bias_fold_2", "chrombppnet_model_encsr283tme_bias_fold_3", "chrombppnet_model_encsr283tme_bias_fold_4"]
+celline_models_path = ["chrombpnet_model_feb15_fold_0", "chrombpnet_model_feb15_fold_1", "chrombpnet_model_feb15_fold_2", "chrombpnet_model_feb15_fold_3", "chrombpnet_model_feb15_fold_4"]
+tissue_models_path = ["chrombpnet_model_encsr880cub_bias","chrombppnet_model_encsr880cub_bias_fold_1","chrombppnet_model_encsr880cub_bias_fold_2","chrombppnet_model_encsr880cub_bias_fold_3","chrombppnet_model_encsr880cub_bias_fold_4"]
+invitro_models_path = ["chrombpnet_model_encsr146kfx_bias", "chrombpnet_model_encsr146kfx_bias_fold_1", "chrombpnet_model_encsr146kfx_bias_fold_2", "chrombpnet_model_encsr146kfx_bias_fold_3", "chrombpnet_model_encsr146kfx_bias_fold_4"]
+
+encids = tissue_encids + primary_encids + celline_encids + invitro_encids
+
+def main_fetch_preprocessing_files(encid, args_json, bias_encid):
+
+	success_flag = False
+	args_json["upload bias"] = False
+	args_json["bias model encid"] = bias_encid
+
+	# find the bams input
+	preprocessing_path = os.path.join(odir, encid + "/preprocessing/bigWigs/"+encid+".bigWig")
+	if os.path.isfile(preprocessing_path):
+		bam_ids = upload_utils.fetch_input_bam_ids(odir,encid)
+		
+		if bam_ids == None:
+			success = False
+			return  success_flag, args_json
+			
+		args_json["experiment"] = encid
+		args_json["bam files"] = bam_ids
+		args_json["assay"] = "DNase-seq"
+		args_json["observed signal profile bigWig"] = preprocessing_path
+		success = True
+	else:
+		success = False
+	
+	return	success, args_json
+
+def main_fetch_model_files(encid, args_json):
+	success = False
+	args_json["models tar"] = {}
+	readme_file = "READMEs/bias.models.README"
+	assert(os.path.isfile(readme_file))
+	args_json["models tar"]["file.paths"] = [(readme_file, "README.md")]
+	#args_json["models tar"]["logs.models."+encid] = {"file.paths": None}
+
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = upload_utils.fetch_per_fold_models(odir,models_path[i], encid, i)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["models tar"]["fold_"+str(i)] = {}
+		args_json["models tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["models tar"]["fold_"+str(i)]["logs.models.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		# 9 log file expected per model
+		assert(len(data_paths) == 6)
+		print(len(log_paths))
+		assert(len(log_paths) == 7)
+
+	success=True
+	return success, args_json
+
+def main_fetch_training_files(encid, args_json):
+	success = False
+	
+	# find the training test regions
+	args_json["training and test regions tar"] = {}	
+	#readme_file = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/READMES/training_test_regions.README"
+	readme_file = "READMEs/bias.models.README"
+	assert(os.path.isfile(readme_file))
+	args_json["training and test regions tar"]["file.paths"] = [(readme_file, "README.md")]
+
+	input_peaks = os.path.join(odir, encid + "/preprocessing/downloads/peaks.bed.gz")
+	if os.path.isfile(input_peaks):
+		args_json["training and test regions tar"]["file.paths"].append((input_peaks,"peaks.all_input_regions."+encid+".bed.gz"))		
+	else:
+		success = False
+		return success, args_json
+
+	input_nonpeaks_gz = os.path.join(odir, encid + "/negatives_data/negatives_with_summit.bed.gz")
+	input_nonpeaks = os.path.join(odir, encid + "/negatives_data/negatives_with_summit.bed")
+	if not os.path.isfile(input_nonpeaks_gz):
+		if os.path.isfile(input_nonpeaks):
+			import pandas as pd
+			#os.system("gzip "+input_nonpeaks)
+			nonpeaks_data = pd.read_csv(input_nonpeaks, sep="\t", header=None)
+			nonpeaks_data.to_csv(input_nonpeaks+".gz", sep="\t", header=False, index=False, compression="gzip")
+		#os.system("rm "+input_nonpeaks)
+
+	input_nonpeaks = os.path.join(odir, encid + "/negatives_data/negatives_with_summit.bed.gz")
+
+	if os.path.isfile(input_nonpeaks):
+		args_json["training and test regions tar"]["file.paths"].append((input_nonpeaks,"nonpeaks.all_input_regions."+encid+".bed.gz"))
+	else:
+		success = False
+		return success, args_json
+
+	log_paths = upload_utils.fetch_preprocessing_log_files(odir,encid)
+	args_json["training and test regions tar"]["logs.training_test_regions."+encid] = {"file.paths": log_paths}
+	#print(len(log_paths))
+	#print(log_paths)
+	assert(len(log_paths) == 12)		
+
+	for i in range(5):
+		data_paths, log_paths = upload_utils.fetch_per_fold_training_data(odir,models_path[i], encid, i)
+
+		args_json["training and test regions tar"]["fold_"+str(i)] = {}
+		args_json["training and test regions tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["training and test regions tar"]["fold_"+str(i)]["logs.training_test_regions.fold_"+str(i)+"."+encid] = {"file.paths": log_paths}
+		#print(data_paths)
+		assert(len(data_paths) == 7)
+		
+		assert(len(log_paths) == 0)	
+
+		if len(data_paths) != 7:
+			success = False
+			return success, args_json
+	
+	success = True
+	return success, args_json
+
+		
+if __name__ == "__main__":
+
+	ignore_list = []
+	
+	for encid in ["ENCSR000EMT", "ENCSR477RTP"]:
+		#if encid  in ignore_list:
+		#	continue
+		
+		if encid in primary_encids:
+			models_path = primary_models_path
+			bias_encid="ENCSR283TME"
+			#print("primary")
+		elif encid in tissue_encids:
+			models_path = tissue_models_path
+			bias_encid="ENCSR880CUB"
+			#print("tissue")
+		elif encid in invitro_encids:
+			models_path = invitro_models_path
+			bias_encid="ENCSR146KFX"
+			#print("invitro")
+		elif encid in celline_encids:
+			models_path = celline_models_path
+			bias_encid="ENCSR149XIL"
+			#print("celline")
+		else:
+			print(encid)
+			print("type not found")
+			continue
+				
+		if os.path.isfile(output_dir+"/"+encid+".json"):
+			continue
+		
+		print(encid)
+		args_json = {}
+		
+		
+		success, args_json = main_fetch_preprocessing_files(encid, args_json, bias_encid)
+		if not success:
+			print(encid)
+			print("exit preprocessing")
+			continue
+		
+		success, args_json = main_fetch_model_files(encid, args_json)
+		if not success:
+			print(encid)
+			print("exit models")
+			continue
+
+		success, args_json = main_fetch_training_files(encid, args_json)
+		if not success:
+			print(encid)
+			print("exit train test regions")
+			continue
+
+		
+		with open(output_dir+"/"+encid+".json", "w") as outfile:
+			json.dump(args_json, outfile, indent=4)
+	
+	#print(args_json)
+
+	
+	
+		
+		
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/upload_utils.py b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/upload_utils.py
new file mode 100644
index 00000000..9f7f1415
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/chrombpnet/upload_utils.py
@@ -0,0 +1,281 @@
+import os
+import json
+import numpy as np
+
+### utils for preprocessing
+
+def fetch_input_bam_ids(odir,encid):
+	log_path = os.path.join(odir, encid + "/preprocessing/preprocess_"+encid+".log")
+	logd = open(log_path).readlines()
+	set_cflag=False
+	set_bflag=False
+	
+	bams_ids = []
+	
+	for line in logd:
+	
+		if set_cflag:
+			words = line.strip().split()
+			if words[1] == "cp":
+				if words[2].split("/")[-1].endswith("bam"):
+					bam_enc = words[2].split("/")[-1].replace(".bam","")
+					bams_ids.append(bam_enc)
+					return bams_ids
+				else:
+					print(encid,"error")
+					return
+			else:
+				print(encid,"error")
+				return
+		
+		if set_bflag:
+			words = line.strip().split()
+			if words[1] == "samtools" and words[2] == "merge":
+				encids = words[6:]
+				for encid in encids:
+					if encid.split("/")[-1].endswith(".bam"):
+						bam_enc = encid.split("/")[-1].replace(".bam","")
+						bams_ids.append(bam_enc)
+					else:
+						print(encid,"error")
+						return
+				return bams_ids
+			else:
+				print(encid,"error")
+				return
+				
+		if "Only one source bam file found. Copying over as merged file." in line:
+			set_cflag=True
+		if "Merging bam files" in line:
+			set_bflag=True	
+		
+### utils for training and testing regions
+
+def fetch_preprocessing_log_files(odir, encid):
+	# do bed file checks
+	log_paths = []
+	
+	# preprocessing (6 files)
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/preprocessing.log.e")
+	if os.stat(preprocessing_log).st_size != 0:
+		log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stderr.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/preprocessing.log.o")
+	if os.stat(preprocessing_log).st_size != 0:
+		log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/"+encid+".log")
+	if os.stat(preprocessing_log).st_size != 0:
+		log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout_v1.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/preprocess_"+encid+".log")
+	if os.stat(preprocessing_log).st_size != 0:
+		log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".stdout_v2.txt"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/params_file.json")
+	if os.stat(preprocessing_log).st_size != 0:
+		log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".params_file.json"))
+
+	preprocessing_log = os.path.join(odir, encid + "/preprocessing/bigWigs/"+encid+".png")
+	if os.stat(preprocessing_log).st_size != 0:
+			log_paths.append((preprocessing_log,"logfile.preprocessing."+encid+".bias_pwm.png"))
+
+	# peak_logs (2 files)
+	negatives_log = os.path.join(odir, encid + "/peak_calling/log.e")
+	if os.path.isfile(negatives_log):
+		log_paths.append((negatives_log,"logfile.peak_calling."+encid+".stdout_v1.txt"))
+
+	negatives_log = os.path.join(odir, encid + "/peak_calling/log.o")
+	if os.path.isfile(negatives_log):
+		log_paths.append((negatives_log,"logfile.peak_calling."+encid+".stdout_v2.txt"))
+
+	# negative logs	(4 files)
+	negatives_log = os.path.join(odir, encid + "/negatives_data/make_background_regions.log")
+	if os.path.isfile(negatives_log):
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching."+encid+".stdout_v1.txt"))
+
+	negatives_log = os.path.join(odir, encid + "/negatives_data/"+encid+".log")
+	if os.path.isfile(negatives_log):
+		if os.stat(negatives_log).st_size != 0:
+			log_paths.append((negatives_log,"logfile.gc_matching."+encid+".stdout_v1.txt"))
+
+
+	negatives_log = os.path.join(odir, encid + "/negatives_data/gc_matching.log.o")
+	if os.stat(negatives_log).st_size != 0:
+		log_paths.append((negatives_log,"logfile.gc_matching."+encid+".stdout.txt"))
+		
+	negatives_log = os.path.join(odir, encid + "/negatives_data/gc_matching.log.e")
+	if os.stat(negatives_log).st_size != 0:
+		log_paths.append((negatives_log,"logfile.gc_matching."+encid+".stderr.txt"))
+
+
+	negatives_log = os.path.join(odir, encid + "/negatives_data/negatives_compared_with_foreground.png")
+	if os.stat(negatives_log).st_size != 0:
+		log_paths.append((negatives_log,"logfile.gc_matching."+encid+".stdout.png"))
+
+	return log_paths
+	
+def fetch_per_fold_training_data(odir,model_dir,encid, fold_num):
+	input_paths = []
+	log_paths = []
+	
+	opath = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/splits_format/"
+	filtered_regions_bed = os.path.join(opath + "/fold_"+str(fold_num)+".json")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"cv_params.fold_"+str(fold_num)+".json"))
+
+	filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions_may_7_2024/peaks.trainingset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"peaks.trainingset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions_may_7_2024/peaks.validationset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"peaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions_may_7_2024/peaks.testset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"peaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions_may_7_2024/nonpeaks.trainingset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.trainingset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions_may_7_2024/nonpeaks.validationset.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	#filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions_may_7_2024/nonpeaks.testset.bed.gz")
+	#if os.path.isfile(filtered_regions_bed):
+	#	input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(odir, encid + "/negatives_data/test/test.fold_"+str(fold_num)+".filtered.negatives_with_summit.bed.gz")
+	if os.path.isfile(filtered_regions_bed):
+		input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+			
+	# preprocessing logs to include
+
+		
+	return input_paths, log_paths
+
+### utils for model uploads
+
+def fetch_per_fold_models(odir, model_dir, encid, fold_num):
+	input_paths = []
+	log_paths = []
+	log_paths_opt = []
+	
+	cmb = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet_wo_bias.h5")
+	print(cmb)
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet_nobias.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		return None, None, None
+
+	cmb = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet.h5")
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		return None, None, None
+		
+
+						
+	bm_model = os.path.join(odir, encid + "/" + model_dir + "/bias_model_scaled.h5")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias_scaled.fold_"+str(fold_num)+"."+encid+".h5"))
+	else:
+		return None, None, None
+
+	cmb = os.path.join(odir, encid + "/" + model_dir + "/new_model_formats_may_7_24_vf/chrombpnet_wo_bias.tar")
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet_nobias.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		return None, None, None
+
+	cmb = os.path.join(odir, encid + "/" + model_dir + "/new_model_formats_may_7_24_vf/chrombpnet.tar")
+	if os.path.isfile(cmb):
+		input_paths.append((cmb,"model.chrombpnet.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		return None, None, None
+
+			
+	bm_model = os.path.join(odir, encid + "/" + model_dir + "/new_model_formats_may_7_24_vf/bias_model_scaled.tar")
+	if os.path.isfile(bm_model):
+		input_paths.append((bm_model,"model.bias_scaled.fold_"+str(fold_num)+"."+encid+".tar"))
+	else:
+		return None, None, None
+		
+	### fetch main logs
+		
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet.args.json")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".args.json"))
+	else:
+		print(modelling_log)	
+		
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet_data_params.tsv")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".chrombpnet_data_params.tsv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet_model_params.tsv")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".chrombpnet_model_params.tsv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet.params.json")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".chrombpnet.params.json"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet.log")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".epoch_loss.csv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/chrombpnet.log.batch")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".batch_loss.tsv"))
+	else:
+		print(modelling_log)
+		
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/train_chrombpnet_model.log")
+	if os.stat(modelling_log).st_size != 0:
+		log_paths.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stdout_v1.txt"))
+	else:
+		print(modelling_log)
+					
+	#### fetch model training log files ########
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/modelling.log.e")
+	if os.path.isfile(modelling_log):
+		if os.stat(modelling_log).st_size != 0:
+			log_paths_opt.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stderr.txt"))
+		else:
+			print(modelling_log)
+	else:
+		print(modelling_log)
+			
+	modelling_log = os.path.join(odir, encid + "/" + model_dir + "/modelling.log.o")
+	if os.path.isfile(modelling_log):
+		if os.stat(modelling_log).st_size != 0:
+			log_paths_opt.append((modelling_log,"logfile.modelling.fold_"+str(fold_num)+"."+encid+".stdout.txt"))
+		else:
+			print(modelling_log)
+	else:
+		print(modelling_log)
+		
+	#### fetch model conversion log files ########
+	#print(log_paths)		
+	return input_paths, log_paths, log_paths_opt
+
+
+
+
+			
+			
+	
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/dnase_prepare_file_for_upload_models.py b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/dnase_prepare_file_for_upload_models.py
index 56da09b4..d7050441 100755
--- a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/dnase_prepare_file_for_upload_models.py
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/dnase_prepare_file_for_upload_models.py
@@ -127,7 +127,7 @@ def main_fetch_training_files(encid, args_json):
 		
 		assert(len(log_paths) == 4)	
 
-		if len(data_paths) != 7:
+		if len(data_paths) != 8:
 			success = False
 			return success, args_json
 	
diff --git a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/upload_utils.py b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/upload_utils.py
index e4b6d6b5..ddf149f6 100755
--- a/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/upload_utils.py
+++ b/upload_jsons/upload_jsons_scripts/model_uploads/chrombpnet_models/upload_utils.py
@@ -165,7 +165,11 @@ def fetch_per_fold_training_data(odir,model_dir,encid, fold_num):
 	if os.path.isfile(filtered_regions_bed):
 		input_paths.append((filtered_regions_bed,"nonpeaks.validationset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
 
-	filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions/nonpeaks.testset.bed.gz")
+	#filtered_regions_bed = os.path.join(odir, encid + "/" + model_dir + "/train_test_regions/nonpeaks.testset.bed.gz")
+	#if os.path.isfile(filtered_regions_bed):
+	#	input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
+
+	filtered_regions_bed = os.path.join(odir, encid + "/negatives_data/test/test.fold_"+str(fold_num)+".filtered.negatives_with_summit.bed.gz")
 	if os.path.isfile(filtered_regions_bed):
 		input_paths.append((filtered_regions_bed,"nonpeaks.testset.fold_"+str(fold_num)+"."+encid+".bed.gz"))
 			
diff --git a/upload_jsons/upload_jsons_scripts/modisco_uploads/READMEs/modisco.report.README b/upload_jsons/upload_jsons_scripts/modisco_uploads/READMEs/modisco.report.README
new file mode 100644
index 00000000..e69de29b
diff --git a/upload_jsons/upload_jsons_scripts/modisco_uploads/atac_prepare.py b/upload_jsons/upload_jsons_scripts/modisco_uploads/atac_prepare.py
new file mode 100644
index 00000000..dc63d61b
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/modisco_uploads/atac_prepare.py
@@ -0,0 +1,75 @@
+import os
+import json
+
+encids = ["IMR90", "H1ESC", "GM12878", "HEPG2", "K562"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"} 
+
+ooutdir='atac/'
+
+
+
+
+
+for name in encids:
+
+	encid = encode_id[name]
+	args_json = {}
+	args_json["experiment"] = encode_id[name]
+	args_json["sequence motifs tar"] = {}
+
+	success=True
+	readme_file="READMEs/modisco.report.README"
+	if os.path.isfile(readme_file):
+		args_json["sequence motifs tar"]["file.paths"] = [(readme_file, "README.md")]
+		
+	args_json["sequence motifs tar"]["counts"] = {"file.paths": []}
+	args_json["sequence motifs tar"]["profile"] = {"file.paths": []}
+
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"
+	counts_modisco=odir+name+"/merge_folds_new_may_05_24/counts/modisco_counts.h5"
+	if os.path.isfile(counts_modisco):
+		args_json["sequence motifs tar"]["counts"]["file.paths"].append((counts_modisco, "tfmodisco.raw_output.counts."+encid+".hd5"))
+	else:
+		print(counts_modisco)
+		continue
+	
+	profile_modisco=odir+name+"/merge_folds_new_may_05_24/profile/modisco_profile.h5"
+	if os.path.isfile(profile_modisco):
+		args_json["sequence motifs tar"]["profile"]["file.paths"].append((profile_modisco, "tfmodisco.raw_output.profile."+encid+".hd5"))
+	else:
+		print(profile_modisco)
+		continue
+	
+	args_json["sequence motifs tar"]["counts"]["tfmodisco.seq_contrib.counts.meme."+encid] = {"file.paths": []}
+	
+	for formats in ["CWM", "CWM-PFM", "hCWM", "hCWM-PFM", "PFM"]:
+		meme_file=odir+name+"/merge_folds_new_may_05_24/counts/"+formats
+		if os.path.isfile(meme_file):
+			args_json["sequence motifs tar"]["counts"]["tfmodisco.seq_contrib.counts.meme."+encid]["file.paths"].append((meme_file,"tfmodisco.seq_contrib.counts."+formats+".meme."+encid))
+		else:
+			print(meme_file)
+			continue
+	args_json["sequence motifs tar"]["profile"]["tfmodisco.seq_contrib.profile.meme."+encid] = {"file.paths": []}
+
+	for formats in ["CWM", "CWM-PFM", "hCWM", "hCWM-PFM", "PFM"]:
+		meme_file=odir+name+"/merge_folds_new_may_05_24/profile/"+formats
+		if os.path.isfile(meme_file):
+			args_json["sequence motifs tar"]["profile"]["tfmodisco.seq_contrib.profile.meme."+encid]["file.paths"].append((meme_file,"tfmodisco.seq_contrib.profile."+formats+".meme."+encid))
+		else:
+			print(meme_file)
+			success=False
+			break
+	if not success:
+		continue
+			
+	if not os.path.isfile(ooutdir+encode_id[name]+".json"):
+		f = open(ooutdir+encode_id[name]+".json", "w")
+		json.dump(args_json, f, indent=4)
+		f.close()
+
+
diff --git a/upload_jsons/upload_jsons_scripts/modisco_uploads/dnase_prepare.py b/upload_jsons/upload_jsons_scripts/modisco_uploads/dnase_prepare.py
new file mode 100644
index 00000000..7a35cd98
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/modisco_uploads/dnase_prepare.py
@@ -0,0 +1,75 @@
+import os
+import json
+
+encids = ["IMR90_new", "H1ESC_new", "GM12878_new", "HEPG2", "K562"]
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+
+ooutdir='dnase/'
+
+
+
+
+
+for name in encids:
+
+	encid = encode_id[name]
+	args_json = {}
+	args_json["experiment"] = encode_id[name]
+	args_json["sequence motifs tar"] = {}
+
+	success=True
+	readme_file="READMEs/modisco.report.README"
+	if os.path.isfile(readme_file):
+		args_json["sequence motifs tar"]["file.paths"] = [(readme_file, "README.md")]
+		
+	args_json["sequence motifs tar"]["counts"] = {"file.paths": []}
+	args_json["sequence motifs tar"]["profile"] = {"file.paths": []}
+
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"
+	counts_modisco=odir+name+"/merge_folds_new_may_05_24/counts/modisco_counts.h5"
+	if os.path.isfile(counts_modisco):
+		args_json["sequence motifs tar"]["counts"]["file.paths"].append((counts_modisco, "tfmodisco.raw_output.counts."+encid+".hd5"))
+	else:
+		print(counts_modisco)
+		continue
+	
+	profile_modisco=odir+name+"/merge_folds_new_may_05_24/profile/modisco_profile.h5"
+	if os.path.isfile(profile_modisco):
+		args_json["sequence motifs tar"]["profile"]["file.paths"].append((profile_modisco, "tfmodisco.raw_output.profile."+encid+".hd5"))
+	else:
+		print(profile_modisco)
+		continue
+	
+	args_json["sequence motifs tar"]["counts"]["tfmodisco.seq_contrib.counts.meme."+encid] = {"file.paths": []}
+	
+	for formats in ["CWM", "CWM-PFM", "hCWM", "hCWM-PFM", "PFM"]:
+		meme_file=odir+name+"/merge_folds_new_may_05_24/counts/"+formats
+		if os.path.isfile(meme_file):
+			args_json["sequence motifs tar"]["counts"]["tfmodisco.seq_contrib.counts.meme."+encid]["file.paths"].append((meme_file,"tfmodisco.seq_contrib.counts."+formats+".meme."+encid))
+		else:
+			print(meme_file)
+			continue
+	args_json["sequence motifs tar"]["profile"]["tfmodisco.seq_contrib.profile.meme."+encid] = {"file.paths": []}
+
+	for formats in ["CWM", "CWM-PFM", "hCWM", "hCWM-PFM", "PFM"]:
+		meme_file=odir+name+"/merge_folds_new_may_05_24/profile/"+formats
+		if os.path.isfile(meme_file):
+			args_json["sequence motifs tar"]["profile"]["tfmodisco.seq_contrib.profile.meme."+encid]["file.paths"].append((meme_file,"tfmodisco.seq_contrib.profile."+formats+".meme."+encid))
+		else:
+			print(meme_file)
+			success=False
+			break
+	if not success:
+		continue
+			
+	if not os.path.isfile(ooutdir+encode_id[name]+".json"):
+		f = open(ooutdir+encode_id[name]+".json", "w")
+		json.dump(args_json, f, indent=4)
+		f.close()
+
+
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/bc.predicted.README b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/bc.predicted.README
new file mode 100644
index 00000000..68a4c99f
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/bc.predicted.README
@@ -0,0 +1,71 @@
+# Directory Structure Format
+.
+├── input_regions.pred.chrombpnet_nobias.encid.bed.gz      # Input bed regions to obtain prediction h5s from chrombpnet_nobias.h5 for each fold
+├── pred.chrombpnet_nobias.fold_mean.encid.h5              # Average of prediction h5s from chrombpnet_nobias.h5 across all folds  (input format discussed below)
+├── logs.pred.chrombpnet_nobias.fold_mean.encid            # Directory containing log files
+├── fold_0
+│   ├── pred.chrombpnet_nobias.fold_0.encid.h5            # prediction h5s for fold_0 from chrombpnet_nobias.h5 (input format discussed below)
+│   └── logs.pred.chrombpnet_nobias.fold_0.encid          # Directory containing log files
+│
+├── fold_1
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+├── fold_2
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                           # similar directory structure as fold_0 directory above
+
+# Format of bed file
+
+* All the bed files are in narrowpeak format with 10 columns and follow GRCh38 assembly coordinates.
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd is a 1-based coordinate. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases 1 to 100.
+4) name - Name given to a region (preferably unique). Use "." if no name is assigned.
+5) score - Indicates how dark the peak will be displayed in the browser (0-1000). If all scores were "'0"' when the data were submitted to the DCC, the DCC assigned scores 1-1000 based on signal value. Ideally the average signalValue per base spread is between 100-1000.
+6) strand - +/- to denote strand or orientation (whenever applicable). Use "." if no orientation is assigned.
+7) signalValue - Measurement of overall (usually, average) enrichment for the region.
+8) pValue - Measurement of statistical significance (-log10). Use -1 if no pValue is assigned.
+9) qValue - Measurement of statistical significance using false discovery rate (-log10). Use -1 if no qValue is assigned.
+10) peak summit - Point-source called for this peak; 0-based offset from chromStart. Use -1 if no point-source called.
+
+# Format of h5s
+
+The h5py object consists of two keys: `coords`, `predictions`
+
+Each `coords` object has three keys - `coords_chrom`, `coords_start_dset`, `coords_end_dset`
+(a) The `coords_chrom` has an array of length N (number of regions) containing chromosome names
+(b) The `coords_start_dset` has an array of length N containing chromosome start coordinates.  The first base in a chromosome is numbered 0. Follows GRCh38 assembly coordinates.
+(c) The `coords_end_dset` has an array of length N containing chromosome end coordinates. The `coords_end_dset` is a 1-based coordinate.
+
+Each `predictions` object has two keys - `logcounts`, `logits`
+(a)  The `logcounts`  is again an array of shape Nx1 with logcount  (log base e) predictions
+(b)  The `logits`  is an array of shape Nx1000, which represents the logits of the base resolution predicted probability profiles over 1000 bp for each of the N profiles
+
+The `predictions` align with regions specified in the bed file, centered at the summit and expanded by 500 base pairs (bp) 
+on each side. The 'coords' object should contain the corresponding coordinates for each prediction, and the difference 
+between 'coords_end_dset' and 'coords_start_dset' should equal 1000.
+
+# Obtaining average h5s and then prediction bigwigs from individual folds
+
+To create the `fold_mean.encid.h5` file from individual h5 files, we start by averaging the logcounts and logits across various folds.
+Next, we utilize a softmax operation on the averaged logits to transition them into probability profiles. In parallel, we exponentiate 
+the logcounts to convert them into counts. Multiplying the counts with the derived probability profiles, we generate base-resolution 
+predictions, which are subsequently recorded into both h5 and bigWig files.
+
+# Pseudocode for loading h5s
+
+```
+import h5py
+data = h5py.File(predictions_h5, "r")
+logcounts_preds = data['predictions']['logcounts']
+logit_preds = data['predictions']['logits']
+chrom_coords = data['coords']['coords_chrom']
+start_coords = data['coords']['coords_start_dset']
+end_coords = data['coords']['coords_end_dset']
+```
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/predicted.README b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/predicted.README
new file mode 100644
index 00000000..847959b2
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/READMEs/predicted.README
@@ -0,0 +1,71 @@
+# Directory Structure Format
+.
+├── input_regions.pred.chrombpnet.encid.bed.gz      # Input bed regions to obtain prediction h5s from chrombpnet.h5 model for each fold
+├── pred.chrombpnet.fold_mean.encid.h5              # Average of prediction h5s from chrombpnet.h5 model across all folds  (input format discussed below)
+├── logs.pred.chrombpnet.fold_mean.encid            # Directory containing log files
+├── fold_0
+│   ├── pred.chrombpnet.fold_0.encid.h5            # prediction h5s for fold_0 from chrombpnet.h5 model (input format discussed below)
+│   └── logs.pred.chrombpnet.fold_0.encid          # Directory containing log files
+│
+├── fold_1
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+├── fold_2
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+├── fold_3
+│   └── ...                                           # similar directory structure as fold_0 directory above
+│
+└── fold_4
+    └── ...                                           # similar directory structure as fold_0 directory above
+
+# Format of bed file
+
+* All the bed files are in narrowpeak format with 10 columns and follow GRCh38 assembly coordinates.
+
+1) chrom - Name of the chromosome (or contig, scaffold, etc.).
+2) chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered 0.
+3) chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd is a 1-based coordinate. For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases 1 to 100.
+4) name - Name given to a region (preferably unique). Use "." if no name is assigned.
+5) score - Indicates how dark the peak will be displayed in the browser (0-1000). If all scores were "'0"' when the data were submitted to the DCC, the DCC assigned scores 1-1000 based on signal value. Ideally the average signalValue per base spread is between 100-1000.
+6) strand - +/- to denote strand or orientation (whenever applicable). Use "." if no orientation is assigned.
+7) signalValue - Measurement of overall (usually, average) enrichment for the region.
+8) pValue - Measurement of statistical significance (-log10). Use -1 if no pValue is assigned.
+9) qValue - Measurement of statistical significance using false discovery rate (-log10). Use -1 if no qValue is assigned.
+10) peak summit - Point-source called for this peak; 0-based offset from chromStart. Use -1 if no point-source called.
+
+# Format of h5s
+
+The h5py object consists of two keys: `coords`, `predictions`
+
+Each `coords` object has three keys - `coords_chrom`, `coords_start_dset`, `coords_end_dset`
+(a) The `coords_chrom` has an array of length N (N is the number of regions) containing chromosome names
+(b) The `coords_start_dset` has an array of length N containing chromosome start coordinates. The first base in a chromosome is numbered 0. Follows GRCh38 assembly coordinates.
+(c) The `coords_end_dset` has an array of length N containing chromosome end coordinates. The `coords_end_dset` is a 1-based coordinate.
+
+Each `predictions` object has two keys - `logcounts`, `logits`
+(a)  The `logcounts`  is again an array of shape Nx1 with logcount (log base e) predictions
+(b)  The `logits`  is an array of shape Nx1000, which represents the logits of the base resolution predicted probability profiles over 1000 bp for each of the N profiles
+
+The `predictions` align with regions specified in the bed file, centered at the summit and expanded by 500 base pairs (bp) 
+on each side. The 'coords' object should contain the corresponding coordinates for each prediction, and the difference 
+between 'coords_end_dset' and 'coords_start_dset' should equal 1000.
+
+# Obtaining average h5s and then prediction bigwigs from individual folds
+
+To create the `fold_mean.encid.h5` file from individual h5 files, we start by averaging the logcounts and logits across various folds.
+Next, we utilize a softmax operation on the averaged logits to transition them into probability profiles. In parallel, we exponentiate 
+the logcounts to convert them into counts. Multiplying the counts with the derived probability profiles, we generate base-resolution 
+predictions, which are subsequently recorded into both h5 and bigWig files.
+
+# Pseudocode for loading h5s
+
+```
+import h5py
+data = h5py.File(predictions_h5, "r")
+logcounts_preds = data['predictions']['logcounts']
+logit_preds = data['predictions']['logits']
+chrom_coords = data['coords']['coords_chrom']
+start_coords = data['coords']['coords_start_dset']
+end_coords = data['coords']['coords_end_dset']
+```
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare.py b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare.py
new file mode 100644
index 00000000..d8014055
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare.py
@@ -0,0 +1,32 @@
+import os
+import json
+
+
+encids = ["K562", "HEPG2", "IMR90", "H1ESC", "GM12878"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"}  
+
+odir='atac/'
+for encid in encids:
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/preds_upload/average_preds/"+encid+".mean_preds_wo_bias.stat"
+	if os.path.isfile(ofile):
+		print(encid)
+		wbias = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/preds_upload/average_preds/"+encid+".mean_preds_w_bias.bw"
+		nobias = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/preds_upload/average_preds/"+encid+".mean_preds_wo_bias.bw"
+
+		assert(os.path.isfile(wbias)==True)	
+		assert(os.path.isfile(nobias)==True)	
+
+		output_json = {}
+		output_json["experiment"] = encode_id[encid]
+		output_json["predicted signal profile bigWig"] = wbias
+		output_json["bias-corrected predicted signal profile bigWig"] = nobias
+		
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+			f = open(odir+encode_id[encid]+".json", "w")
+			json.dump(output_json, f, indent=4)
+			f.close()
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar.py b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar.py
new file mode 100644
index 00000000..4b336606
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar.py
@@ -0,0 +1,139 @@
+import os
+import json
+import pandas as pd
+
+names = ["K562", "HEPG2", "IMR90", "H1ESC", "GM12878"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"}  
+
+encode_id_dnase = {
+"GM12878": "ENCSR000EMT",
+"IMR90": "ENCSR477RTP",
+"H1ESC": "ENCSR000EMU"}  
+
+outdir='atac_tar/'
+
+def fetch_per_fold_preds(odir,model_path, encid, i, name):
+
+	data_paths = []
+	log_paths = []
+	log_paths_opt = []
+	
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/preds_upload/fold_"+str(i)+"/"
+	input_h5 = os.path.join(odir, name+"_wo_bias_all_predictions.h5")
+	data_paths.append((input_h5, "pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".h5"))
+
+	input_log = os.path.join(odir, "pred.counts.log.e")
+	#print(input_log)
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "pred.counts.log.o")
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_atac/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v1.txt"))
+
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_atac/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v1.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_dnase/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v2.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_dnase/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v2.txt"))
+					
+	return data_paths, log_paths, log_paths_opt
+
+def fetch_pred_tar(encid, args_json, model_paths, name):
+	success = False
+	args_json["bias-corrected predicted signal profile tar"] = {}
+	readme_file = "READMEs/bc.predicted.README"
+	assert(os.path.isfile(readme_file))
+	args_json["bias-corrected predicted signal profile tar"]["file.paths"] = [(readme_file, "README.md")]
+	args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid] = {"file.paths": []}
+
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/preds_upload/average_preds/"
+
+	input_h5 = os.path.join(odir, name+".mean_preds_wo_bias_predictions.h5")
+	if os.path.isfile(input_h5):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_h5,"pred.chrombpnet_nobias.fold_mean."+encid+".h5"))		
+	else:
+		success = False
+		return success, args_json
+
+	if name in ["IMR90", "GM12878", "H1ESC"]:
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"+encode_id_dnase[name]+"/preprocessing/downloads/peaks.bed.gz", sep='\t', header=None)
+	else:	
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	
+	bed2 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	print(bed1.shape)
+	print(bed2.shape)
+	bedf = pd.concat([bed1, bed2])
+	print(bedf.shape)
+	
+	input_bed = os.path.join(odir, "input.regions.bed.gz")
+	if os.path.isfile(input_bed):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_bed,"input_regions.pred.chrombpnet_nobias."+encid+".bed.gz"))		
+	else:
+		bedf.to_csv(input_bed, sep='\t', header=False, index=False, compression='gzip')
+
+
+	input_log = os.path.join(odir, "merge.preds.log.e")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "merge.preds.log.o")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stdout.txt"))
+		
+			
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = fetch_per_fold_preds(odir,model_paths[i], encid, i, name)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)] = {}
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		assert(len(data_paths) == 1)
+		print(len(log_paths))
+		assert(len(log_paths) == 6)
+				
+	success=True
+	return success, args_json
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", header=None)
+
+for name in names:
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/preds_upload/average_preds/"+name+".mean_preds_wo_bias.stat"
+	if os.path.isfile(ofile):
+		args_json = {}
+		encid=encode_id[name]
+		args_json['experiment'] = encid
+		model_paths = model_atac[model_atac[1]==name][2].values
+		print(model_paths)
+		success, args_json = fetch_pred_tar(encid, args_json, model_paths, name)
+		if not success:
+			print("ERR preds tar")
+			continue		
+			
+		if not os.path.isfile(outdir+encid+"_wo_bias.json"):
+			f = open(outdir+encode_id[name]+"_wo_bias.json", "w")
+			json.dump(args_json, f, indent=4)
+			f.close()
+
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar_w_bias.py b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar_w_bias.py
new file mode 100644
index 00000000..0f3bbbe7
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/atac_prepare_tar_w_bias.py
@@ -0,0 +1,139 @@
+import os
+import json
+import pandas as pd
+
+names = ["K562", "HEPG2", "IMR90", "H1ESC", "GM12878"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"}  
+
+encode_id_dnase = {
+"GM12878": "ENCSR000EMT",
+"IMR90": "ENCSR477RTP",
+"H1ESC": "ENCSR000EMU"}  
+
+outdir='atac_tar/'
+
+def fetch_per_fold_preds(odir,model_path, encid, i, name):
+
+	data_paths = []
+	log_paths = []
+	log_paths_opt = []
+	
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/preds_upload/fold_"+str(i)+"/"
+	input_h5 = os.path.join(odir, name+"_w_bias_all_predictions.h5")
+	data_paths.append((input_h5, "pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".h5"))
+
+	input_log = os.path.join(odir, "pred.counts.log.e")
+	#print(input_log)
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "pred.counts.log.o")
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_atac/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v1.txt"))
+
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_atac/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v1.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_dnase/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v2.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]+"/chrombpnet_model/preds_dnase/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v2.txt"))
+					
+	return data_paths, log_paths, log_paths_opt
+
+def fetch_pred_tar(encid, args_json, model_paths, name):
+	success = False
+	args_json["bias-corrected predicted signal profile tar"] = {}
+	readme_file = "READMEs/bc.predicted.README"
+	assert(os.path.isfile(readme_file))
+	args_json["bias-corrected predicted signal profile tar"]["file.paths"] = [(readme_file, "README.md")]
+	args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid] = {"file.paths": []}
+
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/preds_upload/average_preds/"
+
+	input_h5 = os.path.join(odir, name+".mean_preds_w_bias_predictions.h5")
+	if os.path.isfile(input_h5):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_h5,"pred.chrombpnet_nobias.fold_mean."+encid+".h5"))		
+	else:
+		success = False
+		return success, args_json
+
+	if name in ["IMR90", "GM12878", "H1ESC"]:
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"+encode_id_dnase[name]+"/preprocessing/downloads/peaks.bed.gz", sep='\t', header=None)
+	else:	
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	
+	bed2 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	print(bed1.shape)
+	print(bed2.shape)
+	bedf = pd.concat([bed1, bed2])
+	print(bedf.shape)
+	
+	input_bed = os.path.join(odir, "input.regions.bed.gz")
+	if os.path.isfile(input_bed):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_bed,"input_regions.pred.chrombpnet_nobias."+encid+".bed.gz"))		
+	else:
+		bedf.to_csv(input_bed, sep='\t', header=False, index=False, compression='gzip')
+
+
+	input_log = os.path.join(odir, "merge.preds.log.e")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "merge.preds.log.o")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stdout.txt"))
+		
+			
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = fetch_per_fold_preds(odir,model_paths[i], encid, i, name)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)] = {}
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		assert(len(data_paths) == 1)
+		print(len(log_paths))
+		assert(len(log_paths) == 6)
+				
+	success=True
+	return success, args_json
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", header=None)
+
+for name in names:
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/preds_upload/average_preds/"+name+".mean_preds_w_bias.stat"
+	if os.path.isfile(ofile):
+		args_json = {}
+		encid=encode_id[name]
+		args_json["experiment"] = encid
+		model_paths = model_atac[model_atac[1]==name][2].values
+		print(model_paths)
+		success, args_json = fetch_pred_tar(encid, args_json, model_paths, name)
+		if not success:
+			print("ERR preds tar")
+			continue		
+			
+		if not os.path.isfile(outdir+encid+"_w_bias.json"):
+			f = open(outdir+encode_id[name]+".json", "w")
+			json.dump(args_json, f, indent=4)
+			f.close()
+
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare.py b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare.py
new file mode 100644
index 00000000..6f31826f
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare.py
@@ -0,0 +1,31 @@
+import os
+import json
+
+
+encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+odir='dnase/'
+for encid in encids:
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/preds_upload/average_preds_with_ccre_vf/"+encid+".mean_preds_wo_bias.stat"
+	if os.path.isfile(ofile):
+		print(encid)
+		wbias = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/preds_upload/average_preds_with_ccre_vf/"+encid+".mean_preds_w_bias.bw"
+		nobias = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/preds_upload/average_preds_with_ccre_vf/"+encid+".mean_preds_wo_bias.bw"
+
+		assert(os.path.isfile(wbias)==True)	
+		assert(os.path.isfile(nobias)==True)	
+
+		output_json = {}
+		output_json["experiment"] = encode_id[encid]
+		output_json["predicted signal profile bigWig"] = wbias
+		output_json["bias-corrected predicted signal profile bigWig"] = nobias
+		
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+			f = open(odir+encode_id[encid]+".json", "w")
+			json.dump(output_json, f, indent=4)
+			f.close()
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar.py b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar.py
new file mode 100644
index 00000000..c1d0ce01
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar.py
@@ -0,0 +1,146 @@
+import os
+import json
+import pandas as pd
+
+names = ["IMR90_new", "H1ESC_new", "GM12878_new"]
+#names = ["K562", "HEPG2"]
+
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2_interpret.csv",sep=",", header=None)
+#model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2.csv",sep=",", header=None)
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+
+
+encode_id_dnase = {
+"GM12878_new": "ENCSR000EMT",
+"IMR90_new": "ENCSR477RTP",
+"H1ESC_new": "ENCSR000EMU"}  
+
+outdir='dnase_tar/'
+
+def fetch_per_fold_preds(odir,model_path, encid, i, name):
+
+	data_paths = []
+	log_paths = []
+	log_paths_opt = []
+	
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/preds_upload/fold_"+str(i)+"/"
+	input_h5 = os.path.join(odir, name+"_wo_bias_all_predictions.h5")
+	data_paths.append((input_h5, "pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".h5"))
+
+	input_log = os.path.join(odir, "pred.counts.log.e")
+	print(input_log)
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "pred.counts.log.o")
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_atac/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v1.txt"))
+
+	print(input_log)
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_atac/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v1.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_dnase/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v2.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_dnase/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v2.txt"))
+					
+	return data_paths, log_paths, log_paths_opt
+
+def fetch_pred_tar(encid, args_json, model_paths, name):
+	success = False
+	args_json["bias-corrected predicted signal profile tar"] = {}
+	readme_file = "READMEs/bc.predicted.README"
+	assert(os.path.isfile(readme_file))
+	args_json["bias-corrected predicted signal profile tar"]["file.paths"] = [(readme_file, "README.md")]
+	args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid] = {"file.paths": []}
+
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/preds_upload/average_preds/"
+
+	input_h5 = os.path.join(odir, name+".mean_preds_wo_bias_predictions.h5")
+	if os.path.isfile(input_h5):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_h5,"pred.chrombpnet_nobias.fold_mean."+encid+".h5"))		
+	else:
+		success = False
+		return success, args_json
+
+	if name in ["IMR90_new", "GM12878_new", "H1ESC_new"]:
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"+encode_id_dnase[name]+"/preprocessing/downloads/peaks.bed.gz", sep='\t', header=None)
+	else:	
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	
+	bed2 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name.replace("_new","")+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	print(bed1.shape)
+	print(bed2.shape)
+	bedf = pd.concat([bed1, bed2])
+	print(bedf.shape)
+	
+	input_bed = os.path.join(odir, "input.regions.bed.gz")
+	if os.path.isfile(input_bed):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_bed,"input_regions.pred.chrombpnet_nobias."+encid+".bed.gz"))		
+	else:
+		bedf.to_csv(input_bed, sep='\t', header=False, index=False, compression='gzip')
+
+
+	input_log = os.path.join(odir, "merge.preds.log.e")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "merge.preds.log.o")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stdout.txt"))
+		
+			
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = fetch_per_fold_preds(odir,model_paths[i], encid, i, name)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)] = {}
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		assert(len(data_paths) == 1)
+		print(len(log_paths))
+		assert(len(log_paths) == 6)
+		#assert(len(log_paths) >= 2)
+				
+	success=True
+	return success, args_json
+
+
+for name in names:
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/preds_upload/average_preds/"+name+".mean_preds_wo_bias.stat"
+	if os.path.isfile(ofile):
+		args_json = {}
+		encid=encode_id[name]
+		args_json["experiment"] = encid
+		model_paths = model_atac[model_atac[1]==name.replace("_new","")][2].values
+		print(model_paths)
+		success, args_json = fetch_pred_tar(encid, args_json, model_paths, name)
+		if not success:
+			print("ERR preds tar")
+			continue		
+			
+		if not os.path.isfile(outdir+encid+"_wo_bias.json"):
+			f = open(outdir+encode_id[name]+"_wo_bias.json", "w")
+			json.dump(args_json, f, indent=4)
+			f.close()
+
diff --git a/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar_w_bias.py b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar_w_bias.py
new file mode 100644
index 00000000..f70a1c30
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/predction_uploads/chrombpnet/dnase_prepare_tar_w_bias.py
@@ -0,0 +1,144 @@
+import os
+import json
+import pandas as pd
+
+names = ["IMR90_new", "H1ESC_new", "GM12878_new"]
+#names = ["K562", "HEPG2"]
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2_interpret.csv",sep=",", header=None)
+#model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2.csv",sep=",", header=None)
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+
+
+encode_id_dnase = {
+"GM12878_new": "ENCSR000EMT",
+"IMR90_new": "ENCSR477RTP",
+"H1ESC_new": "ENCSR000EMU"}  
+
+outdir='dnase_tar/'
+
+def fetch_per_fold_preds(odir,model_path, encid, i, name):
+
+	data_paths = []
+	log_paths = []
+	log_paths_opt = []
+	
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/preds_upload/fold_"+str(i)+"/"
+	input_h5 = os.path.join(odir, name+"_w_bias_all_predictions.h5")
+	data_paths.append((input_h5, "pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".h5"))
+
+	input_log = os.path.join(odir, "pred.counts.log.e")
+	print(input_log)
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "pred.counts.log.o")
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_atac/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v1.txt"))
+
+	print(input_log)
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_atac/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v1.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_dnase/pred.counts.log.o"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stdout_v2.txt"))
+		
+	input_log="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-2]+"/chrombpnet_model/preds_dnase/pred.counts.log.e"
+	if os.path.isfile(input_log):
+		log_paths.append((input_log, "logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid+".stderr_v2.txt"))
+					
+	return data_paths, log_paths, log_paths_opt
+
+def fetch_pred_tar(encid, args_json, model_paths, name):
+	success = False
+	args_json["bias-corrected predicted signal profile tar"] = {}
+	readme_file = "READMEs/bc.predicted.README"
+	assert(os.path.isfile(readme_file))
+	args_json["bias-corrected predicted signal profile tar"]["file.paths"] = [(readme_file, "README.md")]
+	args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid] = {"file.paths": []}
+
+	odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/preds_upload/average_preds/"
+
+	input_h5 = os.path.join(odir, name+".mean_preds_w_bias_predictions.h5")
+	if os.path.isfile(input_h5):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_h5,"pred.chrombpnet_nobias.fold_mean."+encid+".h5"))		
+	else:
+		success = False
+		return success, args_json
+
+	if name in ["IMR90_new", "GM12878_new", "H1ESC_new"]:
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/DNASE/"+encode_id_dnase[name]+"/preprocessing/downloads/peaks.bed.gz", sep='\t', header=None)
+	else:	
+		bed1 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	
+	bed2 = pd.read_csv("/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name.replace("_new","")+"/peaks_no_blacklist.bed", sep='\t', header=None)
+
+	print(bed1.shape)
+	print(bed2.shape)
+	bedf = pd.concat([bed1, bed2])
+	print(bedf.shape)
+	
+	input_bed = os.path.join(odir, "input.regions.bed.gz")
+	if os.path.isfile(input_bed):
+		args_json["bias-corrected predicted signal profile tar"]["file.paths"].append((input_bed,"input_regions.pred.chrombpnet_nobias."+encid+".bed.gz"))		
+	else:
+		bedf.to_csv(input_bed, sep='\t', header=False, index=False, compression='gzip')
+
+
+	input_log = os.path.join(odir, "merge.preds.log.e")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stderr.txt"))
+
+	input_log = os.path.join(odir, "merge.preds.log.o")
+	if os.path.isfile(input_log):
+		args_json["bias-corrected predicted signal profile tar"]["logs.pred.chrombpnet_nobias.fold_mean."+encid]["file.paths"].append((input_log, "logs.pred.chrombpnet_nobias.fold_mean."+encid+".stdout.txt"))
+		
+			
+	for i in range(5):
+		data_paths, log_paths, log_paths_opt = fetch_per_fold_preds(odir,model_paths[i], encid, i, name)
+
+		if data_paths is None:
+			success = False
+			return success, args_json
+			
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)] = {}
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["file.paths"] = data_paths
+		args_json["bias-corrected predicted signal profile tar"]["fold_"+str(i)]["logs.pred.chrombpnet_nobias.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+		assert(len(data_paths) == 1)
+		print(len(log_paths))
+		assert(len(log_paths) == 6)
+				
+	success=True
+	return success, args_json
+
+
+for name in names:
+	ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/preds_upload/average_preds/"+name+".mean_preds_w_bias.stat"
+	if os.path.isfile(ofile):
+		args_json = {}
+		encid=encode_id[name]
+		args_json["experiment"] = encid
+		model_paths = model_atac[model_atac[1]==name.replace("_new","")][2].values
+		print(model_paths)
+		success, args_json = fetch_pred_tar(encid, args_json, model_paths, name)
+		if not success:
+			print("ERR preds tar")
+			continue		
+			
+		if not os.path.isfile(outdir+encid+".json"):
+			f = open(outdir+encode_id[name]+".json", "w")
+			json.dump(args_json, f, indent=4)
+			f.close()
+
diff --git a/upload_jsons/upload_jsons_scripts/profile_bigwigs_uploads/dnase_prepare_tar.py b/upload_jsons/upload_jsons_scripts/profile_bigwigs_uploads/dnase_prepare_tar.py
new file mode 100644
index 00000000..e69de29b
diff --git a/upload_jsons/upload_jsons_scripts/profile_contrib_upload/READMES/profile.deepshap.README b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/READMES/profile.deepshap.README
new file mode 100644
index 00000000..e69de29b
diff --git a/upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar.py b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar.py
new file mode 100644
index 00000000..b44dfb3c
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar.py
@@ -0,0 +1,215 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["IMR90", "H1ESC", "GM12878"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"} 
+odir='atac/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", header=None)
+
+def fetch_per_fold_profile(odir,model_path, encid, i, name):
+
+		model_path_orig=model_path
+		model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.profile.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/ATAC_SE_04.27.2024//chrombpnet_model"
+		
+		# ATAC regions logs
+		
+		model_path=model_path+"/chrombpnet_model"
+		input_log=model_path+"/interpret_dnase/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+		
+			print(input_log)
+		input_log=model_path+"/interpret_dnase/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_dnase/ATAC_peaks_full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_dnase/ATAC_peaks_full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+
+		# atac regions logs
+		
+		input_log=model_path_orig+"/interpret/merged."+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path_orig+"/interpret/merged."+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		# atac regions logs
+
+
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret/full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_profile_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["profile sequence contribution scores tar"] = {}
+		readme_file = "READMES/profile.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["profile sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.profile.fold_mean."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.profile_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.profile.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[0]+"/chrombpnet_model/interpret_all/full_"+name+".interpreted_regions_profile.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.profile.interpreted_regions.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_profile(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.profile.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) >= 4)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+		else:
+				profile_bw = None
+				print(ofile)
+				
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+		else:   
+				profile_bw = None               
+				print(ofile)
+				continue
+		
+		assert(os.path.isfile(profile_bw)==True) 
+		assert(os.path.isfile(profile_bw)==True)        
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_profile_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR profile tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()
diff --git a/upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar_k5_and_hep.py b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar_k5_and_hep.py
new file mode 100644
index 00000000..326ac362
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/atac_tar_k5_and_hep.py
@@ -0,0 +1,193 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["K562", "HEPG2"]
+
+encode_id = {"K562": "ENCSR868FGK",
+"GM12878": "ENCSR637XSC",
+"HEPG2": "ENCSR291GJU",
+"IMR90": "ENCSR200OML",
+"H1ESC": "GSE267154"} 
+odir='atac/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_atac.csv",sep=",", header=None)
+
+def fetch_per_fold_profile(odir,model_path, encid, i, name):
+
+		model_path_orig=model_path
+		model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/"+model_path.split("/")[-1]
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.profile.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/ATAC_SE_04.27.2024//chrombpnet_model"
+		
+
+		model_path = model_path+"/chrombpnet_model"
+
+		# all regs logs
+		
+		input_log=model_path_orig+"/interpret/merged."+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atacs_regs.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path_orig+"/interpret/merged."+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atacs_regs.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		# atac regs logs
+
+
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atacs_regs.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atacs_regs.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret/full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atacs_regs.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atacs_regs.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_profile_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["profile sequence contribution scores tar"] = {}
+		readme_file = "READMES/profile.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["profile sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.profile.fold_mean."+encid+".h5"))               
+		else:
+				print(input_h5)
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.profile_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.profile.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				print(modisco_input)
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[1]+"/chrombpnet_model/interpret/full_"+name+".interpreted_regions_profile.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/merge_folds_new_may_05_24/in_peaks.profile.interpreted_regions.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_profile(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.profile.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) >= 1)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.bw"
+		else:
+				profile_bw = None
+				print(ofile)
+				
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/ATAC/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.bw"
+		else:   
+				profile_bw = None               
+				print(ofile)
+				continue
+		
+		assert(os.path.isfile(profile_bw)==True) 
+		assert(os.path.isfile(profile_bw)==True)        
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_profile_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR profile tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()
diff --git a/upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar.py b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar.py
new file mode 100644
index 00000000..65c84a25
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar.py
@@ -0,0 +1,221 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["IMR90_new", "H1ESC_new", "GM12878_new"]
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+odir='dnase/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/v1/model_dir_dnase_v2_interpret.csv",sep=",", header=None)
+
+def fetch_per_fold_profile(odir,model_path, encid, i, name):
+
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.profile.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/DNASE_SE_04.27.2024//chrombpnet_model"
+		
+		# dnase regions logs
+		
+		model_path=model_path+"/chrombpnet_model"
+		input_log=model_path+"/interpret_orig/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+		input_log=model_path+"/interpret_orig/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_orig/ATAC_peaks_full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_orig/ATAC_peaks_full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.dnase_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+
+		# atac regions logs
+		
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/ATAC_peaks_full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/ATAC_peaks_full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.atac_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+		# ccre regions logs
+
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.ccre_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.ccre_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/ATAC_peaks_full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.ccre_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+		input_log=model_path+"/interpret_ccre/ATAC_peaks_full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.ccre_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_profile_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["profile sequence contribution scores tar"] = {}
+		readme_file = "READMES/profile.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["profile sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.profile.fold_mean."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.profile_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.profile.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[0]+"/chrombpnet_model/interpret_all_with_ccre/full_"+name+".interpreted_regions_profile.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.profile_scores_new_compressed.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_profile(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.profile.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) == 12)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+		else:
+				profile_bw = None
+				print(ofile)
+				
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores_new_compressed.bw"
+		else:   
+				profile_bw = None               
+				print(ofile)
+				continue
+		
+		assert(os.path.isfile(profile_bw)==True) 
+		assert(os.path.isfile(profile_bw)==True)        
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_profile_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR profile tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()
diff --git a/upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar_k5_and_hep.py b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar_k5_and_hep.py
new file mode 100644
index 00000000..5583b77f
--- /dev/null
+++ b/upload_jsons/upload_jsons_scripts/profile_contrib_upload/dnase_tar_k5_and_hep.py
@@ -0,0 +1,212 @@
+import os
+import json
+import pandas as pd
+
+#encids = ["K562", "HEPG2", "IMR90_new", "H1ESC_new", "GM12878_new"]
+encids = ["K562", "HEPG2"]
+
+encode_id = {"HEPG2": "ENCSR149XIL",
+        "K562": "ENCSR000EOT",
+        "IMR90_new": "ENCSR477RTP",
+        "GM12878_new": "ENCSR000EMT",
+        "H1ESC_new": "ENCSR000EMU"}
+odir='dnase/'
+
+model_atac = pd.read_csv("/mnt/lab_data2/anusri/chrombpnet/logs/checkpoint/JAN_02_2023/model_dir_dnase.csv",sep=",", header=None)
+
+def fetch_per_fold_profile(odir,model_path, encid, i, name):
+
+		model_path_orig=model_path
+		model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/"+model_path.split("/")[-1]
+		data_paths = []
+		log_paths = []
+		log_paths_opt = []
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/fold_"+str(i)+"/"
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		data_paths.append((input_h5, "seq_contrib.profile.fold_"+str(i)+"."+encid+".h5"))
+		
+		#model_path="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/ATAC_SE_04.27.2024//chrombpnet_model"
+	
+		# atac regions logs
+
+
+		model_path = model_path+"/chrombpnet_model"
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.ccre_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.ccre_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret_ccre/ATAC_peaks_full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.ccre_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret_ccre/ATAC_peaks_full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.all_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+				
+
+		# all regions logs
+		
+		input_log=model_path_orig+"/interpret/merged."+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.all_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+			
+		input_log=model_path_orig+"/interpret/merged."+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.all_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		# atac regions logs
+
+
+		input_log=model_path+"/interpret/full_"+name+".interpret.args.json"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.all_regions.fold_"+str(i)+"."+encid+".args.json"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full_"+name+".interpet.log"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.all_regions.fold_"+str(i)+"."+encid+".log"))
+		else:
+			print(input_log)
+			
+		input_log=model_path+"/interpret/full.profile.interpret.log.e"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.all_regions.fold_"+str(i)+"."+encid+".stderr.txt"))
+		else:
+			print(input_log)
+
+		input_log=model_path+"/interpret/full.profile.interpret.log.o"
+		if os.path.isfile(input_log):
+			log_paths.append((input_log, "logs.seq_contrib.profile.all_regions.fold_"+str(i)+"."+encid+".stdout.txt"))
+		else:
+			print(input_log)
+			
+                          
+		return data_paths, log_paths, log_paths_opt
+        
+def fetch_profile_tar(encid, args_json, model_paths, name):
+		success = False
+		args_json["profile sequence contribution scores tar"] = {}
+		readme_file = "READMES/profile.deepshap.README"
+		assert(os.path.isfile(readme_file))
+		args_json["profile sequence contribution scores tar"]["file.paths"] = [(readme_file, "README.md")]
+		args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid] = {"file.paths": []}
+		
+		## full h5 path
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_h5 = os.path.join(odir, name+"_profile_attribs_reformatted.h5")
+		if os.path.isfile(input_h5):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((input_h5,"seq_contrib.profile.fold_mean."+encid+".h5"))               
+		else:
+				print(input_h5)
+				success = False
+				return success, args_json
+		
+		## modisoc h5 path
+		
+		modisco_input = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.profile_scores_new_compressed.h5"
+		if os.path.isfile(modisco_input):
+				args_json["profile sequence contribution scores tar"]["file.paths"].append((modisco_input,"seq_contrib.profile.fold_mean.modisco_input."+encid+".h5"))               
+		else:
+				print(modisco_input)
+				success = False
+				return success, args_json
+		
+		# log files 
+		
+		
+		input_file=model_paths[1]+"/chrombpnet_model/interpret_all_with_ccre/full_"+name+".interpreted_regions_profile.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/per_folds.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, compression='gzip', sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions.per_fold."+encid+".bed.gz"))              
+		
+		
+		input_file="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/merge_folds_new_may_05_24/in_peaks.profile.interpreted_regions.bed"
+		newf="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/modisco.inputs.bed.gz"
+		input_bed = pd.read_csv(input_file, sep='\t', header=None) 
+		if os.path.isfile(input_file):
+			if not os.path.isfile(newf):
+				input_bed.to_csv(newf, sep='\t', header=False, index=False, compression='gzip')
+			args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((newf,"logs.seq_contrib.profile.input_regions."+encid+".bed.gz"))              
+		
+		odir="/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+name+"/interpret_upload/average_preds/"
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stderr.txt"))
+		
+		input_log = os.path.join(odir, "reformat.log.e")
+		if os.path.isfile(input_log):
+				args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"].append((input_log, "logs.seq_contrib.profile.fold_mean.reformat"+encid+".stdout.txt"))
+			   
+		assert(len(args_json["profile sequence contribution scores tar"]["logs.seq_contrib.profile."+encid]["file.paths"])==4) 
+						
+		for i in range(5):
+				data_paths, log_paths, log_paths_opt = fetch_per_fold_profile(odir,model_paths[i], encid, i, name)
+		
+				if data_paths is None:
+						success = False
+						return success, args_json
+						
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)] = {}
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["file.paths"] = data_paths
+				args_json["profile sequence contribution scores tar"]["fold_"+str(i)]["logs.seq_contrib.profile.fold_"+str(i)+"."+encid] = {"file.paths": log_paths+log_paths_opt}
+				assert(len(data_paths) == 1)
+				print(len(log_paths))
+				assert(len(log_paths) >= 5)
+								
+		success=True
+		return success, args_json
+
+for encid in encids:
+		print(encid)
+		
+		
+		ofile = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.stats"
+		if os.path.isfile(ofile):
+				profile_bw = "/oak/stanford/groups/akundaje/projects/chromatin-atlas-2022/chrombpnet/folds/DNASE/"+encid+"/interpret_upload/average_preds/"+encid+"_folds_merged.profile_scores.bw"
+		else:
+				profile_bw = None
+				print(ofile)
+				
+		
+		assert(os.path.isfile(profile_bw)==True) 
+		
+		model_paths = model_atac[model_atac[1]==encid.replace("_new","")][2].values
+		print(model_paths)
+		args_json = {}
+		args_json["experiment"] = encode_id[encid]
+		
+		
+		success, args_json = fetch_profile_tar(encode_id[encid], args_json, model_paths, encid)
+		if not success:
+				print("ERR profile tar")
+				continue                
+
+		if not os.path.isfile(odir+encode_id[encid]+".json"):
+				f = open(odir+encode_id[encid]+".json", "w")
+				json.dump(args_json, f, indent=4)
+				f.close()