From 1ddb55bb4227d9673d23271aced5e215c3e1c5e7 Mon Sep 17 00:00:00 2001 From: npklein Date: Mon, 10 Oct 2016 17:14:18 +0200 Subject: [PATCH 1/4] ConvertPLtoGL use gzipped vcf instead of vcf --- .../protocols/MergeFastq.sh | 67 +++++++++++++++++++ .../workflowKallisto.csv | 3 + .../BIOS_phasing/protocols/ConvertPLtoGL.sh | 52 ++++++++++++++ 3 files changed, 122 insertions(+) create mode 100755 compute5/Public_RNA-seq_quantification/protocols/MergeFastq.sh create mode 100644 compute5/Public_RNA-seq_quantification/workflowKallisto.csv create mode 100755 molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertPLtoGL.sh diff --git a/compute5/Public_RNA-seq_quantification/protocols/MergeFastq.sh b/compute5/Public_RNA-seq_quantification/protocols/MergeFastq.sh new file mode 100755 index 00000000..e48f943a --- /dev/null +++ b/compute5/Public_RNA-seq_quantification/protocols/MergeFastq.sh @@ -0,0 +1,67 @@ +#MOLGENIS nodes=1 ppn=1 mem=10gb walltime=01:00:00 + +### variables to help adding to database (have to use weave) +#string sampleName +#string project +### +#list reads1FqGz,reads2FqGz + + +#Function to check if array contains value +array_contains () { + local array="$1[@]" + local seeking=$2 + local in=1 + for element in "${!array-}"; do + if [[ "$element" == "$seeking" ]]; then + in=0 + break + fi + done + return $in +} + + +echo "## "$(date)" Start $0" +echo "ID (project-sampleName): ${${project}-${sampleName}" + +#check modules +module list + +for file in "${reads1FqGz[@]}" "${reads2FqGz[@]}"; do + echo "getFile file='$file'" + getFile $file +done + + +#Create string with input fastq files to merge +#This check needs to be performed because Compute generates duplicate values in array +INPUTFQ1=() +INPUTFQ2=() + +echo "merging" +for fq in "${reads1FqGz[@]}" +do + echo $fq + array_contains INPUTFQ1 "$fq" || INPUTFQ1+=("$fq") # If fqFile does not exist in array add it +done +echo "done" +for fq in "${reads2FqGz[@]}" +do + echo $fq + array_contains INPUTFQ1 "$fq" || INPUTFQ2+=("$fq") # If fqFile does not exist in array add it +done + +echo "writing to $(dirname reads1FqGz[@])/${sampleName}_R2.fq.gz" + +if cat ${INPUTFQ1[*]} > $(dirname reads1FqGz[@])/${sampleName}_R1.fq.gz && cat ${INPUTFQ2[*]} > $(dirname reads1FqGz[@])/${sampleName}_R2.fq.gz +then + echo "returncode: $?"; putFile $(dirname reads1FqGz[@])/${sampleName} + putFile $(dirname reads1FqGz[@])/${sampleName}_R1.fq.gz + echo "succes moving files"; +else + echo "returncode: $?"; + echo "fail"; +fi + +echo "## "$(date)" ## $0 Done " diff --git a/compute5/Public_RNA-seq_quantification/workflowKallisto.csv b/compute5/Public_RNA-seq_quantification/workflowKallisto.csv new file mode 100644 index 00000000..18e02536 --- /dev/null +++ b/compute5/Public_RNA-seq_quantification/workflowKallisto.csv @@ -0,0 +1,3 @@ +step,protocol,dependencies +MergeFastq,protocols/MergeFastq.sh, +Kallisto,protocols/Kallisto.sh,MergeFastq diff --git a/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertPLtoGL.sh b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertPLtoGL.sh new file mode 100755 index 00000000..205df01d --- /dev/null +++ b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertPLtoGL.sh @@ -0,0 +1,52 @@ +#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=2 + +### variables to help adding to database (have to use weave) +#string project +### +#string stage +#string checkStage + +#string WORKDIR +#string projectDir +#string genotypedChrVcfGLDir +#string genotypedChrVcfGL +#string vcf +#string biopythonVersion +#string genotypedChrVcfGL +#string ngsutilsVersion + +echo "## "$(date)" Start $0" + +getFile ${vcf} + +${stage} Biopython/${biopythonVersion} +${stage} ngs-utils/${ngsutilsVersion} +${checkStage} + +mkdir -p ${genotypedChrVcfGLDir} + +echo "Starting conversion." + + +#Run conversion script beagle vcf to shapeit format +if python $EBROOTNGSMINUTILS/PL_to_GL_reorder.py \ + --vcf ${vcf} \ + --out ${genotypedChrVcfGL} + +then + echo "returncode: $?"; + putFile ${genotypedChrVcfGL} + cd ${genotypedChrVcfGLDir} + bname=$(basename ${genotypedChrVcfGL}) + md5sum ${bname} > ${bname}.md5 + cd - + echo "succes moving files"; +else + echo "returncode: $?"; + echo "fail"; +fi + +echo "Finished conversion." + +echo "## "$(date)" ## $0 Done " + From 5b198fc39cd91c1882819479f2b85a414d662425 Mon Sep 17 00:00:00 2001 From: npklein Date: Tue, 11 Oct 2016 16:35:51 +0200 Subject: [PATCH 2/4] load GCC version without GCCXX error --- .../compute5/BIOS_phasing/parameters.csv | 63 +++++++++++++++++++ .../protocols/ConvertBeagleToShapeit.sh | 62 ++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100755 molgenis-pipelines/compute5/BIOS_phasing/parameters.csv create mode 100755 molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh diff --git a/molgenis-pipelines/compute5/BIOS_phasing/parameters.csv b/molgenis-pipelines/compute5/BIOS_phasing/parameters.csv new file mode 100755 index 00000000..5cd0e7ad --- /dev/null +++ b/molgenis-pipelines/compute5/BIOS_phasing/parameters.csv @@ -0,0 +1,63 @@ +######################################################################## +## General parameters for SLURM settings and cluster directory paths +# +queue,ll +defaultInterpreter,#!/bin/bash +stage,module load +checkStage,module list +WORKDIR,/groups/ +root,${WORKDIR} +group,umcg-bios +tmp,tmp04 +resDir,/groups/umcg-wijmenga/tmp04/resources/ +toolDir,/apps/software/ +projectDir,${root}/${group}/${tmp}/projects/${project}/ +fvdProjectDir,${root}/${group}/${tmp}/projects/umcg-fvandijk/projects/ +######################################################################## +## Software and data versions/builds/paths +# +beagleVersion,09Feb16.2b7-Java-1.8.0_45 +beagleJarVersion,09Feb16.2b7 +shapeitVersion,v2.r837-static +GCCVersion,4.9.3-binutils-2.25 +intervaltreeVersion,2.1.0-foss-2015b-Python-2.7.9 +pyvcfVersion,0.6.7-foss-2015b-Python-2.7.9 +samtoolsVersion,1.2-foss-2015b +bedtoolsVersion,2.23.0-foss-2015b +pythonVersion,3.4.1-foss-2015b +biopythonVersion,1.65-foss-2015b-Python-3.4.1 +ngsutilsVersion,16.06.1 +zlibVersion,1.2.8 +bzip2Version,1.0.6-foss-2015b +GLibVersion,2.45.2-foss-2015b +vcftoolsVersion,0.1.12b-goolf-1.7.20-Perl-5.20.2-bare +RVersion,3.2.1-foss-2015b +phaserVersion,f085550 +tabixVersion,0.2.6-goolf-1.7.20 +referenceFastaName,human_g1k_v37 +genomeBuild,b37 +onekgGenomeFasta,${resDir}/${genomeBuild}/indices/${referenceFastaName}.fasta +geneticMapDir,/apps/data/www.shapeit.fr/genetic_map_b37/ +geneticMapChr,${geneticMapDir}/genetic_map_chr${chromosome}_combined_b37.txt +OneKgPhase3VCF,/apps/data/1000G/release/20130502//ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.vcf.gz +######################################################################## +## Specific tools paths +# +## Input Beagle from GATK GenotypeGVF +genotypedChrVcfGLDir,${projectDir}/genotypeVcfGL/ +genotypedChrVcfGL,${genotypedChrVcfGLDir}/${project}.chr${chromosome}.genotypeGVCF.gg.vcf.gz +genotypedChrVcfTbi,${genotypedChrVcfGL}.tbi +## Beagle +beagleDir,${projectDir}/beagle/ +genotypedChrVcfBeagleGenotypeProbabilities,${beagleDir}/${project}.chr${chromosome}.beagle.genotype.probs.gg +genotypedChrVcfShapeitInputPrefix,${beagleDir}/${project}.chr${chromosome}.beagle.genotype.probs.gg +## Shapeit +shapeitDir,${projectDir}/shapeit/ +phasedScaffoldDir,/groups/umcg-lld/tmp04/projects/genotypingRelease3/selectionLldeep/lldeepPhased/ +shapeitPhasedOutputPrefix,${shapeitDir}/${project}.chr${chromosome}.shapeit.phased +## phASER +mapq,0 +baseq,0 +phaserDir,${projectDir}/phASER +## genotype concordance +comparisonFileDir,${fvdProjectDir}RNA-seq_rare_variants/comparison_files/ diff --git a/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh new file mode 100755 index 00000000..fe003a22 --- /dev/null +++ b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh @@ -0,0 +1,62 @@ +#MOLGENIS walltime=23:59:00 mem=8gb nodes=1 ppn=2 + +### variables to help adding to database (have to use weave) +#string project +### +#string stage +#string checkStage + +#string WORKDIR +#string projectDir +#string beagleDir +#string genotypedChrVcfGL +#string genotypedChrVcfBeagleGenotypeProbabilities +#string genotypedChrVcfShapeitInputPrefix +#string GLibVersion +#string ngsutilsVersion +#string zlibVersion +#string bzip2Version + + +echo "## "$(date)" Start $0" + +getFile ${genotypedChrVcfGL} +getFile ${genotypedChrVcfBeagleGenotypeProbabilities}.vcf.gz + +${stage} ngs-utils/${ngsutilsVersion} +${stage} GLib/${GLibVersion} +${stage} zlib/${zlibVersion} +${stage} bzip2/${bzip2Version} +${stage} GCC/4.9.3-binutils-2.25 +${checkStage} + +#Run conversion script beagle vcf to shapeit format +if $EBROOTNGSMINUTILS/prepareGenFromBeagle4_modified20160601/bin/prepareGenFromBeagle4 \ + --likelihoods ${genotypedChrVcfGL} \ + --posteriors ${genotypedChrVcfBeagleGenotypeProbabilities}.vcf.gz \ + --threshold 0.995 \ + --output ${genotypedChrVcfShapeitInputPrefix} +then + echo "returncode: $?"; + putFile ${genotypedChrVcfShapeitInputPrefix}.gen.gz + putFile ${genotypedChrVcfShapeitInputPrefix}.gen.sample + putFile ${genotypedChrVcfShapeitInputPrefix}.hap.gz + putFile ${genotypedChrVcfShapeitInputPrefix}.hap.sample + cd ${beagleDir} + bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.gen.gz) + md5sum ${bname} > ${bname}.md5 + bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.gen.sample) + md5sum ${bname} > ${bname}.md5 + bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.hap.gz) + md5sum ${bname} > ${bname}.md5 + bname=$(basename ${genotypedChrVcfShapeitInputPrefix}.hap.sample) + md5sum ${bname} > ${bname}.md5 + cd - + echo "succes moving files"; +else + echo "returncode: $?"; + echo "fail"; +fi + +echo "## "$(date)" ## $0 Done " + From 4305fcb50764bef689b197baa41fbe9ba98657c5 Mon Sep 17 00:00:00 2001 From: npklein Date: Tue, 11 Oct 2016 16:40:30 +0200 Subject: [PATCH 3/4] add comments --- .../compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh index fe003a22..e2502be8 100755 --- a/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh +++ b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh @@ -27,7 +27,8 @@ ${stage} ngs-utils/${ngsutilsVersion} ${stage} GLib/${GLibVersion} ${stage} zlib/${zlibVersion} ${stage} bzip2/${bzip2Version} -${stage} GCC/4.9.3-binutils-2.25 +# THIS NEEDS TO BE LOADED AFTER NGS-UTILS TO PREVENT GCCXX ERROR +${stage} GCC/${GCCversion} ${checkStage} #Run conversion script beagle vcf to shapeit format From adf14f5d72c57077548407007257698201e27485 Mon Sep 17 00:00:00 2001 From: npklein Date: Tue, 11 Oct 2016 16:54:11 +0200 Subject: [PATCH 4/4] declare GCC version string --- .../compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh index e2502be8..ece87e39 100755 --- a/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh +++ b/molgenis-pipelines/compute5/BIOS_phasing/protocols/ConvertBeagleToShapeit.sh @@ -16,7 +16,7 @@ #string ngsutilsVersion #string zlibVersion #string bzip2Version - +#string GCCversion echo "## "$(date)" Start $0"