Merge pull request #1 from moka-guys/v1.0.0

V1.0.0
moka-guys · Jan 5, 2022 · b4d1f2b · b4d1f2b
2 parents 296a499 + 2f74afa
commit b4d1f2b
Show file tree

Hide file tree

Showing 5 changed files with 253 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1,2 +1,47 @@
-# dnanexus_ED_cnv_calling
+# dnanexus_ED_cnv_analysis_v1.0.0
 Performs CNV calling using ExomeDepth.
+
+Exome depth is run in two stages. Firstly, read counts are calculated, before CNVs are called using the read counts. Read counts are calculated over the entire genome whereas the CNV calling can be performed using a subpanel.
+
+# What does the app do?
+This app runs the CNV calling stage.
+
+# How the app works?
+
+A Docker image containing Exome depth is downloaded from 001 - In this release [#1220d31](https://github.com/moka-guys/seglh-cnv/tree/1220d31e2eed1d4488eb461e70615a0fad8b5eb1) is used.
+
+The app takes in a readcount file generated by https://github.com/moka-guys/dnanexus\_ED\_readcount_analysis, the docker image will pick the most appropriate reference samples and run exomedepth for a given sample. 
+
+
+For further details on the usage of the docker image please refer to https://github.com/moka-guys/seglh-cnv/tree/main/exomedepth
+
+# Input
+- DNAnexus project name where the BAM and the indexes are stored (BAMs/BAIs MUST be present in the /output folder)
+- Readcount file generated using https://github.com/moka-guys/dnanexus\_ED\_readcount_analysis
+- QC file containing QC parameters (Optional) - generated using https://github.com/moka-guys/dnanexus\_ED\_readcount_analysis
+- List of comma seperated pan numbers (Pan4127,Pan4129,Pan4130,Pan4049)
+- Test specific BED file
+See CLI command below for an example of inputs.
+
+# Output
+- output.pdf - Exomedepth CNV report with all QC information
+- output.tex - Intermediate file used to create PDF
+- tables-1.pdf, tables-2.pdf etc - Plots for inclusion in the generated reports
+- output.bed - CNVs in BED format (whole panel)
+- output.RData 
+
+# Running from the CLI:
+
+The app can be run from the dx CLI.  The example below shows the code used to run test samples through this app:
+
+```bash
+dx run project-G0pKxX80pgqFk9Vy8p6vQbKv:applet-G7B5Zxj0pgq9Q8JfP0jpY3y4 -iproject_name=003_220103_exomeDepth_calling_test -ireadcount_file=project-G6jb1k807Xjj1J984K6kfP13:file-G6kg5q80gvvz37qZ4ZPbvZ8Q -ibamfile_pannumbers=Pan4127,Pan4129,Pan4130,Pan4049 -isubpanel_bed=project-ByfFPz00jy1fk6PjpZ95F27J:file-G6kZpqQ0jy1q1Zk94G3qbVyV -iQC_file=project-G6jb1k807Xjj1J984K6kfP13:file-G66X1Z80p6PG52GFK4zfpY7y
+
+```
+# Debugging
+
+For debugging issues with the `docker` image it can be helpful to `ssh` into a 'held for debugging' job in DNA nexus and run the `docker` image in interactive mode:
+
+```bash
+docker run -v /home/dnanexus:/home/dnanexus/ --rm -it --entrypoint /bin/bash seglh/exomedepth:1220d31
+```
diff --git a/dxapp.json b/dxapp.json
@@ -0,0 +1,80 @@
+{
+  "name": "ED_cnv_calling_v1.0.0",
+  "title": "ED_cnv_calling_v1.0.0",
+  "summary": "v1.0.0 - Performs CNV calling using ExomeDepth",
+  "dxapi": "1.0.0",
+  "inputSpec": [
+    {
+      "name": "project_name",
+      "label": "project_name",
+      "help": "The project containing the bamfiles.",
+      "class": "string"
+    },
+    {
+      "name": "readcount_file",
+      "label": "readcount.Rdata",
+      "help": "readcount file",
+      "class": "file",
+      "patterns": ["*.RData"],
+      "optional": false
+    },
+    {
+      "name": "QC_file",
+      "label": "QC_file",
+      "help": "QC metrics specific for each panel",
+      "class": "file",
+      "patterns": ["*.RData"],
+      "optional": true
+    },
+    {
+      "name": "bamfile_pannumbers",
+      "label": "bamfile_pannumbers",
+      "help": "comma separated string on pan numbers found within the BAM file name",
+      "class": "string"
+    },
+    {
+      "name": "subpanel_bed",
+      "label": "Exomedepth BED file",
+      "help": "BED file used to call variants",
+      "class": "file",
+      "patterns": ["*.bed"],
+      "optional": false
+    }
+  ],
+  "outputSpec": [
+    {
+      "name": "exomedepth_output",
+      "label": "exomedepth output",
+      "help": "PDF output from ExomeDepth.",
+      "class": "array:file"
+    }
+  ],
+  "runSpec": {
+    "interpreter": "bash",
+    "timeoutPolicy": {
+      "*": {
+        "hours": 48
+      }
+    },
+    "distribution": "Ubuntu",
+    "release": "20.04",
+    "version": "0",
+    "file": "src/code.sh"
+  },
+  "access": {
+    "network": [
+      "*"
+    ],
+    "allProjects": "VIEW"
+  },
+  "ignoreReuse": false,
+  "regionalOptions": {
+    "aws:us-east-1": {
+      "systemRequirements": {
+        "*": {
+          "instanceType": "mem1_ssd1_v2_x4"
+        }
+      }
+    }
+  }
+}
diff --git a/resources/usr/bin/mark-section b/resources/usr/bin/mark-section
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+echo '{"error": {"type": "AppError", "message": "Error while '"$@"'; please refer to the job log for more details."}}' > ~/job_error.json
diff --git a/resources/usr/bin/mark-success b/resources/usr/bin/mark-success
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+rm -f ~/job_error.json
diff --git a/src/code.sh b/src/code.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# exomedepth_cnv_analysis_v1.0.0
+
+# The following line causes bash to exit at any point if there is any error
+# and to output each line as it is executed -- useful for debugging
+set -e -x -o pipefail
+
+### Set up parameters
+# split project name to get the NGS run number(s)
+run=$(echo $project_name |  sed -n 's/^.*_\(NGS.*\)\.*/\1/p') 
+
+# Get names of input files as strings
+subpanel_bed_name=$(dx describe --name "$subpanel_bed")
+readcount_file_name=$(dx describe --name "$readcount_file")
+
+subpanel_bed_prefix=$(echo "$subpanel_bed_name" | sed -r  's/^[^0-9]*(Pan[0-9]+).*/\1/')
+
+# Location of the ExomeDepth docker file
+docker_file=project-ByfFPz00jy1fk6PjpZ95F27J:file-G6kfZYQ0jy1vZ0BF33KZpQjJ
+
+#read the DNA Nexus api key as a variable
+API_KEY=$(dx cat project-FQqXfYQ0Z0gqx7XG9Z2b4K43:mokaguys_nexus_auth_key)
+
+#make output dir
+mkdir -p /home/dnanexus/out/exomedepth_output/exomedepth_output/
+# make folder to hold downloaded files
+mkdir to_test
+
+# Download inputs
+dx-download-all-inputs --parallel
+
+# cd to test dir
+cd to_test
+
+mark-section "determine run specific variables"
+echo "Run = " "$run"
+echo "sub_panel_BED = " "$subpanel_bed_name"
+echo "All Pan numbers to be assessed using this BED file = " "$bamfile_pannumbers"
+# echo "QC_file = " $QC_file_name
+
+mark-section "Check that there are bam files matching provided Pan numbers"
+
+# Create an array of all the Pan numbers from the bam files in the provided project
+readarray -t pans_from_bams < <(dx find data --name "*.bam" --project "$project_name" --folder /output --auth "$API_KEY" |  sed -n 's/^.*_\(Pan[0-9]*\)\_.*/\1/p' | sort | uniq)
+
+mark-section "download bams files and indexes"
+# $bamfile_pannumbers is a comma seperated list of pannumbers that should be analysed together.
+# split this into an array and loop through to download BAM and BAI files
+IFS=',' read -ra pannum_array <<<  $bamfile_pannumbers
+for panel in "${pannum_array[@]}"; do
+
+if [[ " ${pans_from_bams[*]} " =~ " ${panel} " ]]; then
+    # If requested pan number has matching bam files
+	dx download "$project_name":output/*"$panel"*001.ba* --auth "$API_KEY"
+else
+    echo "WARNING: No bam/bai files found for ${panel}"
+fi
+done
+
+#Get list of all BAMs 
+bam_list=(/home/dnanexus/to_test/*bam)
+echo "bam list = " "${bam_list[@]}"
+
+#count the files. Make sure there are at least 3 samples for this pan number as this is a requirement of the dockerised R script, else stop
+bamfilecount=$(find . -maxdepth 1 -name "*001.bam"  | wc -l)
+if (( bamfilecount < 3 )); then
+	echo "LESS THAN THREE BAM FILES FOUND FOR THIS ANALYSIS" 1>&2
+	exit 1
+fi
+
+# Ensure that every bam file has a bai file
+baifilecount=$(find . -maxdepth 1 -name "*001.bai"  | wc -l)
+if (( baifilecount < bamfilecount )); then
+	echo "ONE OR MORE BAM FILE IS MISSING A BAI INDEX FILE" 1>&2
+	exit 1
+fi
+
+# cd out of to_test
+cd ..
+
+mark-section "setting up Exomedepth docker image"
+# download the docker file from 001_Tools...
+dx download $docker_file --auth "${API_KEY}"
+docker load -i '/home/dnanexus/seglh_exomedepth_1220d31.tgz'
+
+mark-section "Run CNV analysis using docker image"
+
+
+# docker run - mount the home directory as a share
+# Write log direct into output folder
+# Get read count for all samples
+
+for bam in /home/dnanexus/to_test/*bam
+do
+samplename=$(basename "$bam" _R1_001.bam) 
+echo "samplename:" "$samplename"
+echo "bam:" "$bam"
+echo "subpanel:" "$subpanel_bed_name" 
+
+# Handle optional argument relating to QC file
+if [ -z "$QC_file" ]; then
+	echo "Optional QC file not provided by user"
+	bam_and_QC_command="$bam":"$samplename"
+	else
+	QC_file_path="/home/dnanexus/in/QC_file/*.RData"
+	bam_and_QC_command="$bam:$samplename $QC_file_path"
+fi
+
+#for each bam run exomedepth - the string in the format v1.0.0 will be concatenated to the ouput as the app version
+docker run -v /home/dnanexus:/home/dnanexus/ \
+	--rm  seglh/exomedepth:1220d31 \
+	exomeDepth.R \
+	'v1.0.0' \
+	/home/dnanexus/out/exomedepth_output/exomedepth_output/"$samplename"_output.pdf \
+	/home/dnanexus/in/subpanel_bed/"$subpanel_bed_name":"$subpanel_bed_prefix" \
+	/home/dnanexus/in/readcount_file/"$readcount_file_name" \
+	"$bam_and_QC_command"
+done
+
+# Upload results
+dx-upload-all-outputs
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#!/bin/bash

		echo '{"error": {"type": "AppError", "message": "Error while '"$@"'; please refer to the job log for more details."}}' > ~/job_error.json