From d7bfcd7831cb45dbe8802e2fd3cdb6a120854a71 Mon Sep 17 00:00:00 2001
From: Adrian Seyboldt <adrian.seyboldt@gmail.com>
Date: Fri, 9 Sep 2016 15:19:09 +0200
Subject: [PATCH 01/11] Load bowtie2 in jobscript

---
 jobscript.sh | 1 +
 1 file changed, 1 insertion(+)
 mode change 100755 => 100644 jobscript.sh

diff --git a/jobscript.sh b/jobscript.sh
old mode 100755
new mode 100644
index 7e18136..28bfb4f
--- a/jobscript.sh
+++ b/jobscript.sh
@@ -12,6 +12,7 @@ set -e
 module load bio/fastqc/0.10
 module load qbic/anaconda
 module load qbic/htseq/0.6.1p2
+module load qbic/bowtie2/2.2.3
 module load qbic/tophat
 module load bio/samtools/1.2
 

From dfe0aa714fad8393a36e1a2b0d97603f106a2516 Mon Sep 17 00:00:00 2001
From: qbicStefanC <stefan.czemmel@uni-tuebingen.de>
Date: Tue, 8 Nov 2016 12:51:22 +0100
Subject: [PATCH 02/11] Update Snakefile

---
 Snakefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 231b274..9b9b13a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -208,8 +208,13 @@ rule MergeAdapters:
     output: "MergeAdapters/merged.fasta"
     shell: "cat {input} > {output}"
 
+rule subset_Adapters:
+    input: "MergeAdapters/merged.fasta",
+    output: "MergeAdapters/merged.subset.fasta"
+    shell: "awk '/^>/ {{P=index($0,""No Hit"")==0}} {{if(P) print}} ' {input} > {output}"
+
 rule CutAdapt:
-    input: "MergeAdapters/merged.fasta", "PreFilterReads/{name}.fastq"
+    input: "MergeAdapters/merged.subset.fasta", "PreFilterReads/{name}.fastq"
     output: "CutAdaptMerge/{name}.fastq"
     run:
         with open(str(input[0])) as f:

From fe25802e2570058817a8d4ec539339de1f557720 Mon Sep 17 00:00:00 2001
From: qbicStefanC <stefan.czemmel@uni-tuebingen.de>
Date: Tue, 8 Nov 2016 15:54:46 +0100
Subject: [PATCH 03/11] Update Snakefile

---
 Snakefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 9b9b13a..8a6b065 100644
--- a/Snakefile
+++ b/Snakefile
@@ -211,7 +211,10 @@ rule MergeAdapters:
 rule subset_Adapters:
     input: "MergeAdapters/merged.fasta",
     output: "MergeAdapters/merged.subset.fasta"
-    shell: "awk '/^>/ {{P=index($0,""No Hit"")==0}} {{if(P) print}} ' {input} > {output}"
+    shell:
+        """
+	awk '/^>/ {{P=index($0,"No Hit")==0}} {{if(P) print}} ' {input} > {output}
+        """
 
 rule CutAdapt:
     input: "MergeAdapters/merged.subset.fasta", "PreFilterReads/{name}.fastq"

From cd28335ae96903b8d8173ee52df443970c0f02b0 Mon Sep 17 00:00:00 2001
From: Timo Lucas <timo-niklas.lucas@student.uni-tuebingen.de>
Date: Fri, 4 May 2018 12:20:49 +0200
Subject: [PATCH 04/11] Update Snakefile

---
 Snakefile | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 8a6b065..180de5f 100644
--- a/Snakefile
+++ b/Snakefile
@@ -101,6 +101,9 @@ OUTPUT_FILES.extend(expand("TopHat2/{name}/accepted_hits.bai", name=INPUT_FILES,
 OUTPUT_FILES.extend(expand("Summary/MappingStats/{name}.txt", name=INPUT_FILES, result=RESULT))
 #OUTPUT_FILES.append("checksums.ok")
 OUTPUT_FILES.append(result('all_counts.csv'))
+OUTPUT_FILES.append("Summary/software_versions.txt")
+
+
 
 rule all:
     input: OUTPUT_FILES
@@ -213,7 +216,7 @@ rule subset_Adapters:
     output: "MergeAdapters/merged.subset.fasta"
     shell:
         """
-	awk '/^>/ {{P=index($0,"No Hit")==0}} {{if(P) print}} ' {input} > {output}
+    awk '/^>/ {{P=index($0,"No Hit")==0}} {{if(P) print}} ' {input} > {output}
         """
 
 rule CutAdapt:
@@ -293,3 +296,19 @@ rule NumreadsOrig:
     input: "fastq/{name}.fastq"
     output: "Summary/NumReads/Original/{name}.txt"
     shell: '''dc -e "$(wc -l {input} | cut -f1 -d' ') 4 / p" > {output}'''
+"""
+Rule to get software versions of used programs in workflow. Rule either calls program with --version flag if possible, or runs
+it without parameters displaying output row containing version information with unix tail command.
+It then redirects it to Summary/software_versions.txt
+"""
+rule SoftwareVersions:
+    input: result("all_counts.csv")
+    output: "Summary/software_versions.txt"
+    run:
+        shell("anaconda --version > Summary/software_versions.txt")
+        shell("conda --version >> Summary/software_versions.txt")
+        shell("fastqc --version >> Summary/software_versions.txt")
+        shell("htseq-count -h | tail -1 >> Summary/software_versions.txt")
+        shell("bowtie2 --version | head -1 >> Summary/software_versions.txt")
+        shell("tophat2 --version >> Summary/software_versions.txt")
+        shell("samtools --version | head -2 >> Summary/software_versions.txt")

From 61cb543a3aa4b3c29598d504b80c94595470e059 Mon Sep 17 00:00:00 2001
From: Timo Lucas <timo-niklas.lucas@student.uni-tuebingen.de>
Date: Fri, 4 May 2018 12:25:47 +0200
Subject: [PATCH 05/11] Software Versions Rule

Added a rule to display the versions of the software used by the workflow.
It simply runs the used tools with --version flag (if possible) and stores the info in a text file in the Summary folder.
As htseq-count does not provide the --version flag I just took the last line when calling it without parameters.
---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 180de5f..d5d3a5b 100644
--- a/Snakefile
+++ b/Snakefile
@@ -299,7 +299,7 @@ rule NumreadsOrig:
 """
 Rule to get software versions of used programs in workflow. Rule either calls program with --version flag if possible, or runs
 it without parameters displaying output row containing version information with unix tail command.
-It then redirects it to Summary/software_versions.txt
+It then redirects it to Summary/software_versions.txt 
 """
 rule SoftwareVersions:
     input: result("all_counts.csv")

From cffe75ec8cedba6059b4b1f653714228e29a129d Mon Sep 17 00:00:00 2001
From: Timo Lucas <timo-niklas.lucas@student.uni-tuebingen.de>
Date: Fri, 4 May 2018 12:30:31 +0200
Subject: [PATCH 06/11] indentation

Fixed small indentation error
---
 Snakefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index d5d3a5b..1f14489 100644
--- a/Snakefile
+++ b/Snakefile
@@ -216,7 +216,7 @@ rule subset_Adapters:
     output: "MergeAdapters/merged.subset.fasta"
     shell:
         """
-    awk '/^>/ {{P=index($0,"No Hit")==0}} {{if(P) print}} ' {input} > {output}
+        awk '/^>/ {{P=index($0,"No Hit")==0}} {{if(P) print}} ' {input} > {output}
         """
 
 rule CutAdapt:
@@ -296,11 +296,13 @@ rule NumreadsOrig:
     input: "fastq/{name}.fastq"
     output: "Summary/NumReads/Original/{name}.txt"
     shell: '''dc -e "$(wc -l {input} | cut -f1 -d' ') 4 / p" > {output}'''
+        
 """
 Rule to get software versions of used programs in workflow. Rule either calls program with --version flag if possible, or runs
 it without parameters displaying output row containing version information with unix tail command.
 It then redirects it to Summary/software_versions.txt 
 """
+
 rule SoftwareVersions:
     input: result("all_counts.csv")
     output: "Summary/software_versions.txt"

From 111dd7f6ac95a9d812896540982064374cc88333 Mon Sep 17 00:00:00 2001
From: Timo Lucas <timo-niklas.lucas@student.uni-tuebingen.de>
Date: Fri, 4 May 2018 12:31:44 +0200
Subject: [PATCH 07/11] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 1f14489..0b2c728 100644
--- a/Snakefile
+++ b/Snakefile
@@ -216,7 +216,7 @@ rule subset_Adapters:
     output: "MergeAdapters/merged.subset.fasta"
     shell:
         """
-        awk '/^>/ {{P=index($0,"No Hit")==0}} {{if(P) print}} ' {input} > {output}
+       awk '/^>/ {{P=index($0,"No Hit")==0}} {{if(P) print}} ' {input} > {output}
         """
 
 rule CutAdapt:

From b2fba368df72887730238bce846e76cdff05751b Mon Sep 17 00:00:00 2001
From: Timo Lucas <timo-niklas.lucas@student.uni-tuebingen.de>
Date: Fri, 4 May 2018 12:33:50 +0200
Subject: [PATCH 08/11] Update Snakefile

---
 Snakefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 0b2c728..623a3ff 100644
--- a/Snakefile
+++ b/Snakefile
@@ -103,8 +103,6 @@ OUTPUT_FILES.extend(expand("Summary/MappingStats/{name}.txt", name=INPUT_FILES,
 OUTPUT_FILES.append(result('all_counts.csv'))
 OUTPUT_FILES.append("Summary/software_versions.txt")
 
-
-
 rule all:
     input: OUTPUT_FILES
 

From 4fb1afaae012760b5219a5a53a040d6e398214d6 Mon Sep 17 00:00:00 2001
From: Timo Lucas <timo-niklas.lucas@student.uni-tuebingen.de>
Date: Fri, 18 May 2018 13:49:30 +0200
Subject: [PATCH 09/11] Update README.md

---
 README.md | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index dce21d0..6acc3d5 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,14 @@
-rna seq workflow for use with qproject.
+RNA-Seq workflow for single-end data. The workflow can be downloaded and run on a cluster environment using the tool qproject provided on github: https://github.com/qbicsoftware/qproject
 
-Add a config file "params.json" in `etc`:
+The workflow uses a module system to load the required software. Be sure to check the `jobscript.sh` file to see which software modules are required. The modules are loaded automatically when using `qproject run` to start the workflow, otherwise they have to be loaded manually.
+
+One should use qproject to download the files. This also creates all folders necessary for the workflow.
+
+```
+qproject create -t . -w github:qbicsoftware/rnaseq
+```
+
+Be sure to add a config file "params.json" in `etc` which should look like this:
 
 ```json
 {
@@ -17,6 +25,55 @@ Add a config file "params.json" in `etc`:
 where `indexed_genome` and `gtf` are paths relative to `ref`.
 
 `indexed_genome` is the basename of a bowtie2 index.
+`gtf` is the .gtf file of the reference genome
 
 The parameters `stranded`, `overlap_mode`, `feature_type` and `gff_attribute`
 are explained in the htseq documentation.
+
+Members of QBiC can download the data for analysis using `qpostman`: https://github.com/qbicsoftware/postman-cli
+
+It should be installed on the computing stations and can be loaded with:
+
+```
+module load qbic/qpostman/0.1.2.3
+```
+
+To download the data navigate to the data folder and either provide a QBiC ID
+```
+java -jar qpostman.jar -i <QBiC ID> -u <your_qbic_username>
+```
+
+or a file containing the project QBiC IDs:
+
+```
+postman-0.1.2.3 -f sample_IDs.csv -u user-name
+```
+
+If you're not using `qpostman` just put the relevant files in the data folder (formats supported: `.fastq`, `.fastq.gz`).
+
+To run the workflow navigate to the `src` folder.
+Using `snakemake -n` one can display the operations the workflow will perform.
+Using the `--dag` parameter and piping it to `dot` one can create a .pdf version of the directed acyclic graph used by snakemake to inspect the behavious of the workflow on a local machine.
+
+```
+cd src/
+snakemake -n
+snakemake --dag | dot -Tpdf > dag.pdf
+```
+
+To run the workflow:
+
+```
+qproject run -t ..
+```
+
+While running one can inspect the log files (e.g. in a different screen session) for the progress and errors generated by the workflow:
+
+```
+cd logs/
+tail snake.err -f
+```
+
+And to check the jobs on the computing cluster one can use `qstat`.
+
+Alternatively to using `qproject run` one could use `snakemake -j` to run the workflow, but then be sure to check the `jobscript.sh` to load the required modules manually and also note that this would also not use `qsub` to submit the jobs.

From f56e69b65c0a08d782788bc29e062c34a25feb50 Mon Sep 17 00:00:00 2001
From: qbicStefanC <stefan.czemmel@uni-tuebingen.de>
Date: Mon, 13 Aug 2018 15:54:11 +0200
Subject: [PATCH 10/11] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 6acc3d5..ad2da05 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,7 @@ Using `snakemake -n` one can display the operations the workflow will perform.
 Using the `--dag` parameter and piping it to `dot` one can create a .pdf version of the directed acyclic graph used by snakemake to inspect the behavious of the workflow on a local machine.
 
 ```
+module load qbic/anaconda
 cd src/
 snakemake -n
 snakemake --dag | dot -Tpdf > dag.pdf

From 8cc7f75f93a5a7e10fa3a72271fafe3dbf78c8fe Mon Sep 17 00:00:00 2001
From: qbicStefanC <stefan.czemmel@uni-tuebingen.de>
Date: Tue, 23 Oct 2018 13:03:23 +0200
Subject: [PATCH 11/11] Update jobscript.sh

updated samtools load of module
---
 jobscript.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jobscript.sh b/jobscript.sh
index c79a60d..8a4b49c 100644
--- a/jobscript.sh
+++ b/jobscript.sh
@@ -14,7 +14,7 @@ module load qbic/anaconda
 module load qbic/htseq/0.6.1p2
 module load qbic/bowtie2/2.2.3
 module load qbic/tophat
-module load bio/samtools/1.2
+module load qbic/samtools
 
 {exec_job}
 exit 0