Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a report per sample with details #14

Merged
merged 12 commits into from
Mar 2, 2024
13 changes: 8 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ RUN \

# Install mutserve (not as conda package available)

ENV MUTSERV_VERSION=2.0.0-rc12
RUN mkdir /opt/mutserve
WORKDIR "/opt/mutserve"
#RUN wget https://github.com/seppinho/mutserve/releases/download/v2.0.0-rc12/mutserve.zip
#RUN wget https://github.com/seppinho/mutserve/releases/download/v${MUTSERV_VERSION}/mutserve.zip
COPY files/mutserve.zip .
RUN unzip mutserve.zip && \
rm mutserve.zip
Expand All @@ -24,18 +25,20 @@ ENV PATH="/opt/mutserve:${PATH}"

# Install haplocheck 1.3.3

ENV HAPLOCHECK_VERSION=1.3.3
RUN mkdir /opt/haplocheck
WORKDIR "/opt/haplocheck"
RUN wget https://github.com/genepi/haplocheck/releases/download/v1.3.3/haplocheck.zip
RUN wget https://github.com/genepi/haplocheck/releases/download/v${HAPLOCHECK_VERSION}/haplocheck.zip
RUN unzip haplocheck.zip && \
rm haplocheck.zip
ENV PATH="/opt/haplocheck:${PATH}"

ENV HAPLOGREP_VERSION=3.2.1
RUN mkdir /opt/haplogrep
WORKDIR "/opt/haplogrep"
RUN wget https://github.com/genepi/haplogrep3/releases/download/v3.2.1/haplogrep3-3.2.1-linux.zip && \
unzip haplogrep3-3.2.1-linux.zip && \
rm haplogrep3-3.2.1-linux.zip
RUN wget https://github.com/genepi/haplogrep3/releases/download/v${HAPLOGREP_VERSION}/haplogrep3-${HAPLOGREP_VERSION}-linux.zip && \
unzip haplogrep3-${HAPLOGREP_VERSION}-linux.zip && \
rm haplogrep3-${HAPLOGREP_VERSION}-linux.zip
ENV PATH="/opt/haplogrep:${PATH}"

WORKDIR "/opt"
Expand Down
2 changes: 1 addition & 1 deletion modules/local/merging_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process MERGING_VARIANTS {

csvtk sort \
-t variants.concat.txt \
-k ID:N -k Pos:n -k Type:nr \
-k ID:N -k Pos:n -k Ref:N -k Variant:N -k Type:nr \
-T -o variants.sorted.txt

if [[ ${mode} == "fusion" ]]
Expand Down
4 changes: 2 additions & 2 deletions modules/local/quality_control.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process QUALITY_CONTROL {

publishDir "${params.output_reports}", mode: "copy", pattern: '*.html'
publishDir "${params.output_reports}/multiqc", mode: "copy", pattern: '*.html'

input:
path excluded_samples
Expand All @@ -10,6 +10,6 @@ process QUALITY_CONTROL {
path "*.html"

"""
multiqc .
multiqc . --filename index.html
"""
}
3 changes: 3 additions & 0 deletions modules/local/report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ process REPORT {
echo -e "Base Quality\t${params.baseQ}" >> params.txt
echo -e "Map Quality\t${params.mapQ}" >> params.txt
echo -e "Alignment Quality\t${params.alignQ}" >> params.txt
echo -e "Mutserv\t\${MUTSERV_VERSION}" >> params.txt
echo -e "Haplocheck\t\${HAPLOCHECK_VERSION}" >> params.txt
echo -e "Haplogrep\t\${HAPLOGREP_VERSION}" >> params.txt

Rscript -e "require('rmarkdown'); render('${report}',
params = list(
Expand Down
58 changes: 58 additions & 0 deletions modules/local/sample_report.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
process SAMPLE_REPORT {

publishDir "${params.output_reports}/samples", mode: 'copy'

input:
path report
path variants
path haplogroups
path haplocheck
path statistics
path mapping
path excluded
tuple val(sample_filename), val(sample), file(sample_bam_file)

output:
file "*.html"

"""
echo -e "Parameter\tValue" > params.txt
echo -e "Version\t${workflow.manifest.version}" >> params.txt
echo -e "Job\t${params.project}" >> params.txt
echo -e "Date\t${params.project_date}" >> params.txt
echo -e "Repository\t${params.service.github}" >> params.txt
echo -e "Variant Caller\t${params.mode}" >> params.txt
echo -e "Detection Limit\t${params.detection_limit}" >> params.txt
echo -e "Reference\t${params.reference}" >> params.txt
echo -e "Base Quality\t${params.baseQ}" >> params.txt
echo -e "Map Quality\t${params.mapQ}" >> params.txt
echo -e "Alignment Quality\t${params.alignQ}" >> params.txt
echo -e "Mutserv\t\${MUTSERV_VERSION}" >> params.txt
echo -e "Haplocheck\t\${HAPLOCHECK_VERSION}" >> params.txt
echo -e "Haplogrep\t\${HAPLOGREP_VERSION}" >> params.txt

#TODO: split fwd and reverse? https://bioinformatics.stackexchange.com/questions/8649/how-can-i-calculate-coverage-at-single-bases-using-a-bam-file

# Extract the value of Contig and store it in a variable
contig=`awk '\$2 == "Contig" {print \$3; exit}' "${statistics}"`

echo -e "Contig\tPosition\tCoverage" > coverage.tsv
samtools index ${sample_bam_file}
samtools depth -a ${sample_bam_file} -r \$contig >> coverage.tsv

Rscript -e "require('rmarkdown'); render('${report}',
params = list(
pipeline_parameters = 'params.txt',
variants = '${variants}',
haplogroups = '${haplogroups}',
haplocheck = '${haplocheck}',
statistics = '${statistics}',
mapping = '${mapping}',
excluded_samples = '${excluded}',
sample = '${sample}',
coverage = 'coverage.tsv'
),
knit_root_dir='\$PWD', output_file='\$PWD/${sample}.html')"
"""

}
74 changes: 53 additions & 21 deletions reports/report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ output:
flexdashboard::flex_dashboard:
orientation: rows
navbar:
- { title: "About", href: "https://mitoverse.i-med.ac.at/#!pages/contact", align: right }
- { title: "MultiQC", target: "_blank", href: "multiqc/index.html" }
- { title: "About", target: "_blank", href: "https://mitoverse.i-med.ac.at/#!pages/contact", align: right }
params:
haplogroups: ../tests/data/report/haplogroups.txt
haplocheck: ../tests/data/report/haplocheck.txt
Expand Down Expand Up @@ -33,15 +34,19 @@ library(knitr)
knitr::opts_chunk$set(echo = FALSE)
```

```{r echo=FALSE, results='asis'}
```{r echo=FALSE, warning=FALSE, error=FALSE, include=FALSE}

link_to_sample <- function(sample) {
paste0('<a target=_blank href=samples/', sample, '.html>', sample,'</a>' )
}

variants <- read.delim(params$variants)
contamination <- read.delim(params$haplocheck)
haplogroups <- read.delim(params$haplogroups)
statistics <- read.delim(params$statistics)
pipeline_params <- read.delim(params$pipeline_parameters)

excluded_samples <- try(read.delim(params$excluded_samples, header = FALSE, ))
excluded_samples <- try(read.delim(params$excluded_samples, header = FALSE))
if(inherits(excluded_samples, "try-error")) {
excluded_samples = data.frame(V1=c())
}
Expand Down Expand Up @@ -125,30 +130,48 @@ Row

### Summary {data-width=750}

```{r echo=FALSE, results='asis'}
```{r}

colors <- c("#28a745", "#dc3545")
names(colors) <- c("PASSED", "FAILED")

colors_contamination <- c("#dddddd","#28a745", "#dc3545")
names(colors_contamination) <- c("-", "NO", "YES")

statistics %>%
select("Sample_Label", "MeanDepth", "CoveragePercentage", "CoveredBases", "MeanBaseQuality", "MeanMapQuality", "qc") %>%
mutate (
Sample_Link = link_to_sample(Sample_Label),
contamination = case_when(
qc == "FAILED" ~ "-",
Sample_Label %in% contaminated_samples$Sample_Label ~ "YES",
TRUE ~ "NO"
)
) %>%
select("Sample_Link", "MeanDepth", "CoveragePercentage", "CoveredBases", "MeanBaseQuality", "MeanMapQuality", "qc", "contamination") %>%
datatable(
colnames=c("Sample", "Mean Coverage", "Covered Bases (%)", "Covered Bases", "Mean Base Quality", "Mean Mapping Quality", "QC"),
colnames=c("Sample", "Mean Coverage", "Covered Bases (%)", "Covered Bases", "Mean Base Quality", "Mean Mapping Quality", "QC", "Cont."),
options = list(
bPaginate = FALSE
)
),
escape = FALSE,
class = 'cell-border stripe'
) %>%
formatStyle(
'qc',
backgroundColor = styleEqual(c("PASSED", "FAILED"), colors),
color = 'white'
) %>%
formatStyle(
'contamination',
backgroundColor = styleEqual(c("-", "NO", "YES"), colors_contamination),
color = 'white'
)
```


### Workflow parameters and Software versions {data-width=250}

```{r echo=FALSE, results='asis'}
```{r}
pipeline_params %>%
kable()
```
Expand All @@ -159,7 +182,7 @@ Row

### Mean Coverage per sample

```{r echo=FALSE, results='asis'}
```{r}

ggplotly(
ggplot(statistics) +
Expand All @@ -175,7 +198,7 @@ ggplotly(

### Mean Base Quality per sample

```{r echo=FALSE, results='asis'}
```{r}
ggplotly(
ggplot(statistics) +
geom_col(aes(x=Sample_Label, y=MeanBaseQuality, fill=qc)) +
Expand All @@ -192,14 +215,17 @@ ggplotly(

# Contamination

```{r echo=FALSE, results='asis'}
```{r}
contaminated_samples %>%
select("Sample_Label", "Contamination_Status", "Contamination.Level") %>%
mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
select("Sample_Link", "Contamination_Status", "Contamination.Level") %>%
datatable(
colnames=c("Sample", "Contamination","Level"),
options = list(
bPaginate = FALSE
)
),
escape = FALSE,
class = 'cell-border stripe'
)
```

Expand All @@ -212,14 +238,17 @@ Row

### Summary

```{r echo=FALSE, results='asis'}
```{r}
variants_count %>%
select("Sample_Label", "N") %>%
mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
select("Sample_Link", "N") %>%
datatable(
colnames=c("Sample", "Number of Variants"),
options = list(
bPaginate = FALSE
)
),
escape = FALSE,
class = 'cell-border stripe'
)
```

Expand All @@ -229,7 +258,7 @@ Row
### Common Variants and Hetereoplasmies


```{r echo=FALSE, results='asis'}
```{r}
variants <- variants %>%
mutate(Type = case_when(
Type == "0/1" ~ "2",
Expand Down Expand Up @@ -269,14 +298,17 @@ Row

### Summary

```{r echo=FALSE, results='asis'}
```{r}
haplogroups %>%
select("Sample_Label", "Haplogroup", "Quality") %>%
mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
select("Sample_Link", "Haplogroup", "Quality") %>%
datatable(
colnames=c("Sample", "Haplogroup", "Quality"),
options = list(
bPaginate = FALSE
)
),
escape = FALSE,
class = 'cell-border stripe'
)
```

Expand All @@ -285,7 +317,7 @@ Row

### Quality per Sample

```{r echo=FALSE, results='asis'}
```{r}
ggplotly(
ggplot(haplogroups) +
geom_col(aes(y=Quality, x=Sample_Label), alpha=0.7) +
Expand Down
Loading
Loading