genepi · lukfor · Mar 2, 2024 · Mar 1, 2024 · Mar 1, 2024 · Mar 1, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -13,9 +13,10 @@ RUN \
 
 # Install mutserve (not as conda package available)
 
+ENV MUTSERV_VERSION=2.0.0-rc12
 RUN mkdir /opt/mutserve
 WORKDIR "/opt/mutserve"
-#RUN wget https://github.com/seppinho/mutserve/releases/download/v2.0.0-rc12/mutserve.zip
+#RUN wget https://github.com/seppinho/mutserve/releases/download/v${MUTSERV_VERSION}/mutserve.zip
 COPY files/mutserve.zip .
 RUN unzip mutserve.zip && \
     rm mutserve.zip
@@ -24,18 +25,20 @@ ENV PATH="/opt/mutserve:${PATH}"
 
 # Install haplocheck 1.3.3
 
+ENV HAPLOCHECK_VERSION=1.3.3
 RUN mkdir /opt/haplocheck
 WORKDIR "/opt/haplocheck"
-RUN wget https://github.com/genepi/haplocheck/releases/download/v1.3.3/haplocheck.zip
+RUN wget https://github.com/genepi/haplocheck/releases/download/v${HAPLOCHECK_VERSION}/haplocheck.zip
 RUN unzip haplocheck.zip && \
     rm haplocheck.zip
 ENV PATH="/opt/haplocheck:${PATH}"
 
+ENV HAPLOGREP_VERSION=3.2.1
 RUN mkdir /opt/haplogrep
 WORKDIR "/opt/haplogrep"
-RUN wget https://github.com/genepi/haplogrep3/releases/download/v3.2.1/haplogrep3-3.2.1-linux.zip && \
-    unzip haplogrep3-3.2.1-linux.zip && \
-    rm haplogrep3-3.2.1-linux.zip
+RUN wget https://github.com/genepi/haplogrep3/releases/download/v${HAPLOGREP_VERSION}/haplogrep3-${HAPLOGREP_VERSION}-linux.zip && \
+    unzip haplogrep3-${HAPLOGREP_VERSION}-linux.zip && \
+    rm haplogrep3-${HAPLOGREP_VERSION}-linux.zip
 ENV PATH="/opt/haplogrep:${PATH}"
 
 WORKDIR "/opt"

diff --git a/modules/local/merging_variants.nf b/modules/local/merging_variants.nf
@@ -14,7 +14,7 @@ process MERGING_VARIANTS {
 
     csvtk sort \
         -t variants.concat.txt \
-        -k ID:N -k Pos:n -k Type:nr \
+        -k ID:N -k Pos:n -k Ref:N -k Variant:N -k Type:nr \
         -T -o variants.sorted.txt
 
     if [[ ${mode} == "fusion" ]]

diff --git a/modules/local/quality_control.nf b/modules/local/quality_control.nf
@@ -1,6 +1,6 @@
 process QUALITY_CONTROL {
 
-    publishDir "${params.output_reports}", mode: "copy", pattern: '*.html'
+    publishDir "${params.output_reports}/multiqc", mode: "copy", pattern: '*.html'
 
     input:
     path excluded_samples
@@ -10,6 +10,6 @@ process QUALITY_CONTROL {
 	path "*.html"
 
 	"""
-	multiqc . 
+	multiqc . --filename index.html
 	"""
 }
diff --git a/modules/local/report.nf b/modules/local/report.nf
@@ -26,6 +26,9 @@ process REPORT {
   echo -e "Base Quality\t${params.baseQ}" >> params.txt
   echo -e "Map Quality\t${params.mapQ}" >> params.txt
   echo -e "Alignment Quality\t${params.alignQ}" >> params.txt  
+  echo -e "Mutserv\t\${MUTSERV_VERSION}" >> params.txt 
+  echo -e "Haplocheck\t\${HAPLOCHECK_VERSION}" >> params.txt 
+  echo -e "Haplogrep\t\${HAPLOGREP_VERSION}" >> params.txt 
 
   Rscript -e "require('rmarkdown'); render('${report}',
    params = list(

diff --git a/modules/local/sample_report.nf b/modules/local/sample_report.nf
@@ -0,0 +1,58 @@
+process SAMPLE_REPORT {
+
+    publishDir "${params.output_reports}/samples", mode: 'copy'
+
+    input:
+    path report
+    path variants
+    path haplogroups
+    path haplocheck
+    path statistics
+    path mapping
+    path excluded
+    tuple val(sample_filename), val(sample), file(sample_bam_file)
+
+    output:
+    file "*.html" 
+
+    """
+    echo -e "Parameter\tValue" > params.txt
+    echo -e "Version\t${workflow.manifest.version}" >> params.txt
+    echo -e "Job\t${params.project}" >> params.txt
+    echo -e "Date\t${params.project_date}" >> params.txt  
+    echo -e "Repository\t${params.service.github}" >> params.txt    
+    echo -e "Variant Caller\t${params.mode}" >> params.txt
+    echo -e "Detection Limit\t${params.detection_limit}" >> params.txt
+    echo -e "Reference\t${params.reference}" >> params.txt
+    echo -e "Base Quality\t${params.baseQ}" >> params.txt
+    echo -e "Map Quality\t${params.mapQ}" >> params.txt
+    echo -e "Alignment Quality\t${params.alignQ}" >> params.txt  
+    echo -e "Mutserv\t\${MUTSERV_VERSION}" >> params.txt 
+    echo -e "Haplocheck\t\${HAPLOCHECK_VERSION}" >> params.txt 
+    echo -e "Haplogrep\t\${HAPLOGREP_VERSION}" >> params.txt 
+
+    #TODO: split fwd and reverse? https://bioinformatics.stackexchange.com/questions/8649/how-can-i-calculate-coverage-at-single-bases-using-a-bam-file
+
+    # Extract the value of Contig and store it in a variable
+    contig=`awk '\$2 == "Contig" {print \$3; exit}' "${statistics}"`
+
+    echo -e "Contig\tPosition\tCoverage" > coverage.tsv
+    samtools index ${sample_bam_file}
+    samtools depth -a  ${sample_bam_file} -r \$contig >> coverage.tsv
+
+    Rscript -e "require('rmarkdown'); render('${report}',
+    params = list(
+        pipeline_parameters = 'params.txt',
+        variants = '${variants}',
+        haplogroups = '${haplogroups}',
+        haplocheck = '${haplocheck}',
+        statistics = '${statistics}',
+        mapping = '${mapping}',
+        excluded_samples = '${excluded}',
+        sample = '${sample}',
+        coverage = 'coverage.tsv'
+    ),
+    knit_root_dir='\$PWD', output_file='\$PWD/${sample}.html')"
+    """
+
+}
diff --git a/reports/report.Rmd b/reports/report.Rmd
@@ -4,7 +4,8 @@ output:
   flexdashboard::flex_dashboard:
     orientation: rows
     navbar:
-      - { title: "About", href: "https://mitoverse.i-med.ac.at/#!pages/contact", align: right }
+      - { title: "MultiQC", target: "_blank", href: "multiqc/index.html" }
+      - { title: "About",  target: "_blank", href: "https://mitoverse.i-med.ac.at/#!pages/contact", align: right }
 params:
   haplogroups: ../tests/data/report/haplogroups.txt
   haplocheck: ../tests/data/report/haplocheck.txt
@@ -33,15 +34,19 @@ library(knitr)
 knitr::opts_chunk$set(echo = FALSE)
 ```
 
-```{r echo=FALSE, results='asis'}
+```{r echo=FALSE, warning=FALSE, error=FALSE, include=FALSE}
+
+link_to_sample <- function(sample) {
+  paste0('<a target=_blank href=samples/', sample, '.html>', sample,'</a>' )
+}
 
 variants <- read.delim(params$variants)
 contamination <- read.delim(params$haplocheck)
 haplogroups <- read.delim(params$haplogroups)
 statistics <- read.delim(params$statistics)
 pipeline_params <- read.delim(params$pipeline_parameters)
 
-excluded_samples <- try(read.delim(params$excluded_samples, header = FALSE, ))
+excluded_samples <- try(read.delim(params$excluded_samples, header = FALSE))
 if(inherits(excluded_samples, "try-error")) {
   excluded_samples = data.frame(V1=c())
 }
@@ -125,30 +130,48 @@ Row
 
 ### Summary  {data-width=750}
 
-```{r echo=FALSE, results='asis'}
+```{r}
 
 colors <- c("#28a745", "#dc3545")
 names(colors) <- c("PASSED", "FAILED")
 
+colors_contamination <- c("#dddddd","#28a745", "#dc3545")
+names(colors_contamination) <- c("-", "NO", "YES")
+
 statistics %>%
-  select("Sample_Label", "MeanDepth", "CoveragePercentage", "CoveredBases", "MeanBaseQuality", "MeanMapQuality", "qc") %>%
+  mutate (
+    Sample_Link = link_to_sample(Sample_Label),
+    contamination = case_when(
+      qc == "FAILED" ~ "-",
+      Sample_Label %in% contaminated_samples$Sample_Label ~ "YES",
+      TRUE ~ "NO"
+    )
+  ) %>%
+  select("Sample_Link", "MeanDepth", "CoveragePercentage", "CoveredBases", "MeanBaseQuality", "MeanMapQuality", "qc", "contamination") %>%
   datatable(
-    colnames=c("Sample", "Mean Coverage", "Covered Bases (%)", "Covered Bases", "Mean Base Quality", "Mean Mapping Quality", "QC"),
+    colnames=c("Sample", "Mean Coverage", "Covered Bases (%)", "Covered Bases", "Mean Base Quality", "Mean Mapping Quality", "QC", "Cont."),
     options = list(
       bPaginate = FALSE
-    )
+    ),
+    escape = FALSE,
+    class = 'cell-border stripe'
   ) %>% 
   formatStyle(
     'qc',
     backgroundColor = styleEqual(c("PASSED", "FAILED"), colors),
     color = 'white'
+  ) %>% 
+  formatStyle(
+    'contamination',
+    backgroundColor = styleEqual(c("-", "NO", "YES"), colors_contamination),
+    color = 'white'
   )
 ```
 
 
 ### Workflow parameters and Software versions {data-width=250}
 
-```{r echo=FALSE, results='asis'}
+```{r}
 pipeline_params %>%
   kable()
 ```
@@ -159,7 +182,7 @@ Row
 
 ### Mean Coverage per sample
 
-```{r echo=FALSE, results='asis'}
+```{r}
 
 ggplotly(
   ggplot(statistics) +
@@ -175,7 +198,7 @@ ggplotly(
 
 ### Mean Base Quality per sample
 
-```{r echo=FALSE, results='asis'}
+```{r}
 ggplotly(
   ggplot(statistics) +
     geom_col(aes(x=Sample_Label, y=MeanBaseQuality, fill=qc)) +
@@ -192,14 +215,17 @@ ggplotly(
 
 # Contamination
 
-```{r echo=FALSE, results='asis'}
+```{r}
 contaminated_samples %>%
-  select("Sample_Label", "Contamination_Status", "Contamination.Level") %>%
+  mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
+  select("Sample_Link", "Contamination_Status", "Contamination.Level") %>%
   datatable(
     colnames=c("Sample", "Contamination","Level"),
     options = list(
       bPaginate = FALSE
-    )
+    ),
+    escape = FALSE,
+    class = 'cell-border stripe'
   )
 ```
 
@@ -212,14 +238,17 @@ Row
 
 ### Summary
 
-```{r echo=FALSE, results='asis'}
+```{r}
 variants_count %>%
-  select("Sample_Label", "N") %>%
+  mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
+  select("Sample_Link", "N") %>%
   datatable(
     colnames=c("Sample", "Number of Variants"),
     options = list(
       bPaginate = FALSE
-    )
+    ),
+    escape = FALSE,
+    class = 'cell-border stripe'
   )
 ```
 
@@ -229,7 +258,7 @@ Row
 ### Common Variants and Hetereoplasmies
 
 
-```{r echo=FALSE, results='asis'}
+```{r}
 variants <- variants %>%
   mutate(Type = case_when(
     Type == "0/1" ~ "2",
@@ -269,14 +298,17 @@ Row
 
 ### Summary
 
-```{r echo=FALSE, results='asis'}
+```{r}
 haplogroups %>%
-  select("Sample_Label", "Haplogroup", "Quality") %>%
+  mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
+  select("Sample_Link", "Haplogroup", "Quality") %>%
   datatable(
     colnames=c("Sample", "Haplogroup", "Quality"),
     options = list(
       bPaginate = FALSE
-    )
+    ),
+    escape = FALSE,
+    class = 'cell-border stripe'
   )
 ```
 
@@ -285,7 +317,7 @@ Row
 
 ### Quality per Sample
 
-```{r echo=FALSE, results='asis'}
+```{r}
 ggplotly(
   ggplot(haplogroups) +
     geom_col(aes(y=Quality, x=Sample_Label), alpha=0.7) +