diff --git a/reports/report.Rmd b/reports/report.Rmd
index 308a955..e36b666 100644
--- a/reports/report.Rmd
+++ b/reports/report.Rmd
@@ -19,7 +19,7 @@ params:
```{r setup, include=FALSE}
-MIN_COVERAGE_PERCENTAGE = 90;
+MIN_COVERAGE_PERCENTAGE = 0.90;
MIN_MEAN_BASE_QUALITY = 15;
MIN_MEAN_DEPTH = 50
@@ -36,9 +36,27 @@ knitr::opts_chunk$set(echo = FALSE)
```{r echo=FALSE, warning=FALSE, error=FALSE, include=FALSE}
+PRIMARY <- "#2780e3"
+PRIMARY_DARK <- "#1a68be"
+RED <- "#dc3545"
+GREEN <- "#28a745"
+ORANGE <- "#ff8800"
+
link_to_sample <- function(sample) {
- return (sample)
- #paste0('', sample,'' )
+ paste0('', sample,'' )
+}
+
+
+formatStyleTresholdValue <- function(datatable, column, threshold) {
+ formatStyle(
+ datatable,
+ column,
+ color = styleInterval(threshold, c(RED, ""))
+ )
+}
+
+formatStyleSample <- function(datatable) {
+ formatStyle(datatable, 'Sample_Link', fontWeight = 'bold')
}
variants <- read.delim(params$variants)
@@ -67,6 +85,8 @@ mapping <- mapping %>%
statistics <- spread(statistics, key = Parameter, value = Value)
statistics$MeanCoverage <- as.numeric(statistics$MeanDepth)
statistics$MeanBaseQuality <- as.numeric(statistics$MeanBaseQuality)
+statistics$CoveredBases <- as.numeric(statistics$CoveredBases)
+statistics$MeanMapQuality <- as.numeric(statistics$MeanMapQuality)
statistics <- merge(statistics, mapping, by.x="Sample", by.y="Filename")
@@ -79,14 +99,21 @@ contamination <- merge(contamination, mapping, by.x="Sample", by.y="Filename") %
contaminated_samples = contamination %>%
rename("Contamination_Status" = "Contamination.Status") %>%
filter(Contamination_Status == "YES");
+#variants_count <- data.table(t(table(variants$ID)))
+#variants_count <- merge(variants_count, mapping, by.x="V2", by.y="Filename") %>%
+# arrange(Sample_Label)
-variants_count <- data.table(t(table(variants$ID)))
-variants_count <- merge(variants_count, mapping, by.x="V2", by.y="Filename") %>%
+variants <- merge(variants, mapping, by.x="ID", by.y="Filename") %>%
arrange(Sample_Label)
-variants <- merge(variants, mapping, by.x="ID", by.y="Filename") %>%
+variants_count <- variants %>% group_by(Sample_Label) %>%
+ summarise(
+ N = n(),
+ Filtered = sum(Filter != "PASS")
+ ) %>%
arrange(Sample_Label)
+
variant_caller <- pipeline_params %>%
filter(Parameter == "Variant Caller")
@@ -133,17 +160,17 @@ Row
```{r}
-colors <- c("#28a745", "#dc3545")
+colors <- c(GREEN, RED)
names(colors) <- c("PASSED", "FAILED")
-colors_contamination <- c("#dddddd","#28a745", "#dc3545")
+colors_contamination <- c("", GREEN, RED)
names(colors_contamination) <- c("-", "NO", "YES")
statistics %>%
mutate (
Sample_Link = link_to_sample(Sample_Label),
contamination = case_when(
- qc == "FAILED" ~ "-",
+ qc == "FAILED" ~ "",
Sample_Label %in% contaminated_samples$Sample_Label ~ "YES",
TRUE ~ "NO"
)
@@ -155,8 +182,13 @@ statistics %>%
bPaginate = FALSE
),
escape = FALSE,
+ rowname = FALSE,
class = 'cell-border stripe'
- ) %>%
+ ) %>%
+ formatStyleSample() %>%
+ formatStyleTresholdValue('MeanDepth', MIN_MEAN_DEPTH) %>%
+ formatStyleTresholdValue('CoveragePercentage', MIN_COVERAGE_PERCENTAGE) %>%
+ formatStyleTresholdValue('MeanBaseQuality', MIN_MEAN_BASE_QUALITY) %>%
formatStyle(
'qc',
backgroundColor = styleEqual(c("PASSED", "FAILED"), colors),
@@ -183,41 +215,62 @@ Row
-----------------------------------------------------------------------
-### Mean Coverage per sample
+### Mean Coverage
```{r}
ggplotly(
ggplot(statistics) +
- geom_col(aes(x=Sample_Label, y=MeanCoverage, fill=qc)) +
- theme(axis.text.x=element_text(angle=+90)) +
- geom_hline(yintercept=MIN_MEAN_DEPTH, color="black", linetype="dashed") +
- scale_fill_manual(values=colors) +
- xlab("Samples") +
- ylab("Mean Coverage") +
- labs(fill = "")
+ geom_histogram(aes(x=MeanCoverage), fill=PRIMARY) +
+ geom_vline(xintercept=MIN_MEAN_DEPTH, color=RED, linetype="dashed") +
+ ylab("Samples") +
+ xlab("Mean Coverage") +
+ theme_bw()
+)
+```
+
+### Covered Bases
+
+```{r}
+
+ggplotly(
+ ggplot(statistics) +
+ geom_histogram(aes(x=CoveredBases), fill=PRIMARY) +
+ geom_vline(xintercept=MIN_COVERAGE_PERCENTAGE*16569, color=RED, linetype="dashed") +
+ ylab("Samples") +
+ xlab("Covered Bases (%)") +
+ theme_bw()
)
```
Row
-----------------------------------------------------------------------
-### Mean Base Quality per sample
+### Mean Base Quality
```{r}
ggplotly(
ggplot(statistics) +
- geom_col(aes(x=Sample_Label, y=MeanBaseQuality, fill=qc)) +
- theme(axis.text.x=element_text(angle=+90)) +
- geom_hline(yintercept=MIN_MEAN_BASE_QUALITY, color="black", linetype="dashed") +
- scale_fill_manual(values=colors) +
- xlab("Samples") +
- ylab("Mean Base Quality") +
- labs(fill = "")
+ geom_histogram(aes(x=MeanBaseQuality), fill=PRIMARY) +
+ geom_vline(xintercept=MIN_MEAN_BASE_QUALITY, color=RED, linetype="dashed") +
+ ylab("Samples") +
+ xlab("Mean Base Quality") +
+ theme_bw()
)
```
+### Mean Map Quality
+
+```{r}
+ggplotly(
+ ggplot(statistics) +
+ geom_histogram(aes(x=MeanMapQuality), fill=PRIMARY) +
+ ylab("Samples") +
+ xlab("Mean Mapping Quality") +
+ theme_bw()
+)
+```
# Contamination
@@ -231,8 +284,10 @@ contaminated_samples %>%
bPaginate = FALSE
),
escape = FALSE,
+ rowname = FALSE,
class = 'cell-border stripe'
- )
+ ) %>%
+ formatStyleSample()
```
---
@@ -247,50 +302,17 @@ Row
```{r}
variants_count %>%
mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
- select("Sample_Link", "N") %>%
+ select("Sample_Link", "N", "Filtered") %>%
datatable(
- colnames=c("Sample", "Number of Variants"),
+ colnames=c("Sample", "Number of Variants", "Flagged Variants"),
options = list(
bPaginate = FALSE
),
escape = FALSE,
+ rowname = FALSE,
class = 'cell-border stripe'
- )
-```
-
-Row
--------------------------------------
-
-### Common Variants and Hetereoplasmies
-
-
-```{r}
-variants <- variants %>%
- mutate(Type = case_when(
- Type == "0/1" ~ "2",
- Type == "1/0" ~ "2",
- Type == "INDEL" ~ "3",
- TRUE ~ as.character(Type)
- ))
-
-summary_data <- aggregate( Sample_Label ~ Pos + Type , data = variants, FUN = length)
-summary_data <- summary_data %>% filter(Sample_Label >= 1)
-summary_data <- summary_data %>% mutate(
- label = case_when(
- Type == 1 ~ "Variant",
- Type == 2 ~ "Heteroplasmy",
- Type == 3 ~ "INDEL",
- TRUE ~ "Unknown"
- )
-)
-summary_data$AF<-summary_data$Sample_Label / max(length(unique(variants$Sample_Label)))
-ggplotly(
- ggplot(summary_data, aes(x = Pos, y = AF, color=label)) +
- geom_point() +
- xlab("Position") +
- ylab("Allele Frequency") +
- labs(color = "Type")
-)
+ ) %>%
+ formatStyleSample()
```
---
@@ -314,7 +336,14 @@ haplogroups %>%
bPaginate = FALSE
),
escape = FALSE,
+ rowname = FALSE,
class = 'cell-border stripe'
+ ) %>%
+ formatStyleSample() %>%
+ formatStyle(
+ "Quality",
+ backgroundColor = styleInterval(c(0.8,0.9), c(RED, ORANGE, GREEN)),
+ color ="white"
)
```
@@ -326,12 +355,12 @@ Row
```{r}
ggplotly(
ggplot(haplogroups) +
- geom_col(aes(y=Quality, x=Sample_Label), alpha=0.7) +
- geom_hline(yintercept=0.90, color="green", linetype="dashed") +
- geom_hline(yintercept=0.80, color="orange", linetype="dashed") +
- geom_hline(yintercept=0.70, color="red", linetype="dashed") +
- xlab("Samples") +
- ylab("Haplogroup Quality") +
- theme(axis.text.x=element_text(angle=+90))
+ geom_histogram(aes(x=Quality), fill=PRIMARY) +
+ geom_vline(xintercept=0.90, color=GREEN, linetype="dashed") +
+ geom_vline(xintercept=0.80, color="orange", linetype="dashed") +
+ geom_vline(xintercept=0.70, color=RED, linetype="dashed") +
+ ylab("Samples") +
+ xlab("Haplogroup Quality") +
+ theme_bw()
)
```
diff --git a/reports/sample.Rmd b/reports/sample.Rmd
index 2b1cfcf..5cf1d1f 100644
--- a/reports/sample.Rmd
+++ b/reports/sample.Rmd
@@ -133,6 +133,10 @@ variants <- variants %>%
Type == 2 ~ create_label('Heteroplasmy', 'default'),
Type == 3 ~ create_label('InDel', 'info'),
TRUE ~ "Unknown"
+ )) %>%
+ mutate(Filter_Label = case_when(
+ Filter == "PASS" ~ create_label('PASS', 'success'),
+ TRUE ~ create_label(Filter, 'warning')
))
variant_caller <- pipeline_params %>%
@@ -182,19 +186,40 @@ statistics %>%
#)
```
+### Variant Flags
+
+```{r}
+variants %>% count(Filter_Label) %>%
+ datatable(
+ colnames=c("Filter Flag", "Variants"),
+ options = list(
+ bPaginate = FALSE,
+ bFilter = FALSE,
+ bSort = FALSE,
+ bFooter = FALSE,
+ info = FALSE
+ ),
+ escape = FALSE,
+ rownames = FALSE,
+ class = 'cell-border stripe'
+ )
+
+```
+
### Variants and Hetroplasmies
Detected **`r variants %>% filter(Type == 1) %>% nrow()` variants**, **`r variants %>% filter(Type == 2) %>% nrow()` heteroplasmies** and **`r variants %>% filter(Type == 3) %>% nrow()` InDels**.
+
```{r}
variants %>%
mutate(
Variant_Label = link_to_variant(Pos, Ref, Variant)
) %>%
arrange(Pos, Ref, Variant) %>%
- select("Variant_Label", "VariantLevel", "Type_Label") %>%
+ select("Variant_Label", "VariantLevel", "Type_Label","Filter_Label") %>%
datatable(
- colnames=c("Variant", "Level", ""),
+ colnames=c("Variant", "Level", "Type","Flags"),
options = list(
bPaginate = FALSE,
bFilter = FALSE,