Skip to content

Commit

Permalink
Use histograms in quality control reports (#18)
Browse files Browse the repository at this point in the history
* Use histograms in quality control reports

* Add flags to reports

* Remove common variants plot
  • Loading branch information
lukfor authored Mar 7, 2024
1 parent 59fb63d commit b82d38b
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 74 deletions.
173 changes: 101 additions & 72 deletions reports/report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ params:
```{r setup, include=FALSE}
MIN_COVERAGE_PERCENTAGE = 90;
MIN_COVERAGE_PERCENTAGE = 0.90;
MIN_MEAN_BASE_QUALITY = 15;
MIN_MEAN_DEPTH = 50
Expand All @@ -36,9 +36,27 @@ knitr::opts_chunk$set(echo = FALSE)

```{r echo=FALSE, warning=FALSE, error=FALSE, include=FALSE}
PRIMARY <- "#2780e3"
PRIMARY_DARK <- "#1a68be"
RED <- "#dc3545"
GREEN <- "#28a745"
ORANGE <- "#ff8800"
link_to_sample <- function(sample) {
return (sample)
#paste0('<a target=_blank href=samples/', sample, '.html>', sample,'</a>' )
paste0('<a target=_blank href="samples/', sample, '.html">', sample,'</a>' )
}
formatStyleTresholdValue <- function(datatable, column, threshold) {
formatStyle(
datatable,
column,
color = styleInterval(threshold, c(RED, ""))
)
}
formatStyleSample <- function(datatable) {
formatStyle(datatable, 'Sample_Link', fontWeight = 'bold')
}
variants <- read.delim(params$variants)
Expand Down Expand Up @@ -67,6 +85,8 @@ mapping <- mapping %>%
statistics <- spread(statistics, key = Parameter, value = Value)
statistics$MeanCoverage <- as.numeric(statistics$MeanDepth)
statistics$MeanBaseQuality <- as.numeric(statistics$MeanBaseQuality)
statistics$CoveredBases <- as.numeric(statistics$CoveredBases)
statistics$MeanMapQuality <- as.numeric(statistics$MeanMapQuality)
statistics <- merge(statistics, mapping, by.x="Sample", by.y="Filename")
Expand All @@ -79,14 +99,21 @@ contamination <- merge(contamination, mapping, by.x="Sample", by.y="Filename") %
contaminated_samples = contamination %>%
rename("Contamination_Status" = "Contamination.Status") %>%
filter(Contamination_Status == "YES");
#variants_count <- data.table(t(table(variants$ID)))
#variants_count <- merge(variants_count, mapping, by.x="V2", by.y="Filename") %>%
# arrange(Sample_Label)
variants_count <- data.table(t(table(variants$ID)))
variants_count <- merge(variants_count, mapping, by.x="V2", by.y="Filename") %>%
variants <- merge(variants, mapping, by.x="ID", by.y="Filename") %>%
arrange(Sample_Label)
variants <- merge(variants, mapping, by.x="ID", by.y="Filename") %>%
variants_count <- variants %>% group_by(Sample_Label) %>%
summarise(
N = n(),
Filtered = sum(Filter != "PASS")
) %>%
arrange(Sample_Label)
variant_caller <- pipeline_params %>%
filter(Parameter == "Variant Caller")
Expand Down Expand Up @@ -133,17 +160,17 @@ Row

```{r}
colors <- c("#28a745", "#dc3545")
colors <- c(GREEN, RED)
names(colors) <- c("PASSED", "FAILED")
colors_contamination <- c("#dddddd","#28a745", "#dc3545")
colors_contamination <- c("", GREEN, RED)
names(colors_contamination) <- c("-", "NO", "YES")
statistics %>%
mutate (
Sample_Link = link_to_sample(Sample_Label),
contamination = case_when(
qc == "FAILED" ~ "-",
qc == "FAILED" ~ "",
Sample_Label %in% contaminated_samples$Sample_Label ~ "YES",
TRUE ~ "NO"
)
Expand All @@ -155,8 +182,13 @@ statistics %>%
bPaginate = FALSE
),
escape = FALSE,
rowname = FALSE,
class = 'cell-border stripe'
) %>%
) %>%
formatStyleSample() %>%
formatStyleTresholdValue('MeanDepth', MIN_MEAN_DEPTH) %>%
formatStyleTresholdValue('CoveragePercentage', MIN_COVERAGE_PERCENTAGE) %>%
formatStyleTresholdValue('MeanBaseQuality', MIN_MEAN_BASE_QUALITY) %>%
formatStyle(
'qc',
backgroundColor = styleEqual(c("PASSED", "FAILED"), colors),
Expand All @@ -183,41 +215,62 @@ Row
-----------------------------------------------------------------------


### Mean Coverage per sample
### Mean Coverage

```{r}
ggplotly(
ggplot(statistics) +
geom_col(aes(x=Sample_Label, y=MeanCoverage, fill=qc)) +
theme(axis.text.x=element_text(angle=+90)) +
geom_hline(yintercept=MIN_MEAN_DEPTH, color="black", linetype="dashed") +
scale_fill_manual(values=colors) +
xlab("Samples") +
ylab("Mean Coverage") +
labs(fill = "")
geom_histogram(aes(x=MeanCoverage), fill=PRIMARY) +
geom_vline(xintercept=MIN_MEAN_DEPTH, color=RED, linetype="dashed") +
ylab("Samples") +
xlab("Mean Coverage") +
theme_bw()
)
```

### Covered Bases

```{r}
ggplotly(
ggplot(statistics) +
geom_histogram(aes(x=CoveredBases), fill=PRIMARY) +
geom_vline(xintercept=MIN_COVERAGE_PERCENTAGE*16569, color=RED, linetype="dashed") +
ylab("Samples") +
xlab("Covered Bases (%)") +
theme_bw()
)
```

Row
-----------------------------------------------------------------------

### Mean Base Quality per sample
### Mean Base Quality

```{r}
ggplotly(
ggplot(statistics) +
geom_col(aes(x=Sample_Label, y=MeanBaseQuality, fill=qc)) +
theme(axis.text.x=element_text(angle=+90)) +
geom_hline(yintercept=MIN_MEAN_BASE_QUALITY, color="black", linetype="dashed") +
scale_fill_manual(values=colors) +
xlab("Samples") +
ylab("Mean Base Quality") +
labs(fill = "")
geom_histogram(aes(x=MeanBaseQuality), fill=PRIMARY) +
geom_vline(xintercept=MIN_MEAN_BASE_QUALITY, color=RED, linetype="dashed") +
ylab("Samples") +
xlab("Mean Base Quality") +
theme_bw()
)
```


### Mean Map Quality

```{r}
ggplotly(
ggplot(statistics) +
geom_histogram(aes(x=MeanMapQuality), fill=PRIMARY) +
ylab("Samples") +
xlab("Mean Mapping Quality") +
theme_bw()
)
```

# Contamination

Expand All @@ -231,8 +284,10 @@ contaminated_samples %>%
bPaginate = FALSE
),
escape = FALSE,
rowname = FALSE,
class = 'cell-border stripe'
)
) %>%
formatStyleSample()
```

---
Expand All @@ -247,50 +302,17 @@ Row
```{r}
variants_count %>%
mutate (Sample_Link = link_to_sample(Sample_Label)) %>%
select("Sample_Link", "N") %>%
select("Sample_Link", "N", "Filtered") %>%
datatable(
colnames=c("Sample", "Number of Variants"),
colnames=c("Sample", "Number of Variants", "Flagged Variants"),
options = list(
bPaginate = FALSE
),
escape = FALSE,
rowname = FALSE,
class = 'cell-border stripe'
)
```

Row
-------------------------------------

### Common Variants and Hetereoplasmies


```{r}
variants <- variants %>%
mutate(Type = case_when(
Type == "0/1" ~ "2",
Type == "1/0" ~ "2",
Type == "INDEL" ~ "3",
TRUE ~ as.character(Type)
))
summary_data <- aggregate( Sample_Label ~ Pos + Type , data = variants, FUN = length)
summary_data <- summary_data %>% filter(Sample_Label >= 1)
summary_data <- summary_data %>% mutate(
label = case_when(
Type == 1 ~ "Variant",
Type == 2 ~ "Heteroplasmy",
Type == 3 ~ "INDEL",
TRUE ~ "Unknown"
)
)
summary_data$AF<-summary_data$Sample_Label / max(length(unique(variants$Sample_Label)))
ggplotly(
ggplot(summary_data, aes(x = Pos, y = AF, color=label)) +
geom_point() +
xlab("Position") +
ylab("Allele Frequency") +
labs(color = "Type")
)
) %>%
formatStyleSample()
```

---
Expand All @@ -314,7 +336,14 @@ haplogroups %>%
bPaginate = FALSE
),
escape = FALSE,
rowname = FALSE,
class = 'cell-border stripe'
) %>%
formatStyleSample() %>%
formatStyle(
"Quality",
backgroundColor = styleInterval(c(0.8,0.9), c(RED, ORANGE, GREEN)),
color ="white"
)
```

Expand All @@ -326,12 +355,12 @@ Row
```{r}
ggplotly(
ggplot(haplogroups) +
geom_col(aes(y=Quality, x=Sample_Label), alpha=0.7) +
geom_hline(yintercept=0.90, color="green", linetype="dashed") +
geom_hline(yintercept=0.80, color="orange", linetype="dashed") +
geom_hline(yintercept=0.70, color="red", linetype="dashed") +
xlab("Samples") +
ylab("Haplogroup Quality") +
theme(axis.text.x=element_text(angle=+90))
geom_histogram(aes(x=Quality), fill=PRIMARY) +
geom_vline(xintercept=0.90, color=GREEN, linetype="dashed") +
geom_vline(xintercept=0.80, color="orange", linetype="dashed") +
geom_vline(xintercept=0.70, color=RED, linetype="dashed") +
ylab("Samples") +
xlab("Haplogroup Quality") +
theme_bw()
)
```
29 changes: 27 additions & 2 deletions reports/sample.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ variants <- variants %>%
Type == 2 ~ create_label('Heteroplasmy', 'default'),
Type == 3 ~ create_label('InDel', 'info'),
TRUE ~ "Unknown"
)) %>%
mutate(Filter_Label = case_when(
Filter == "PASS" ~ create_label('PASS', 'success'),
TRUE ~ create_label(Filter, 'warning')
))
variant_caller <- pipeline_params %>%
Expand Down Expand Up @@ -182,19 +186,40 @@ statistics %>%
#)
```

### Variant Flags

```{r}
variants %>% count(Filter_Label) %>%
datatable(
colnames=c("Filter Flag", "Variants"),
options = list(
bPaginate = FALSE,
bFilter = FALSE,
bSort = FALSE,
bFooter = FALSE,
info = FALSE
),
escape = FALSE,
rownames = FALSE,
class = 'cell-border stripe'
)
```

### Variants and Hetroplasmies

Detected **`r variants %>% filter(Type == 1) %>% nrow()` variants**, **`r variants %>% filter(Type == 2) %>% nrow()` heteroplasmies** and **`r variants %>% filter(Type == 3) %>% nrow()` InDels**.


```{r}
variants %>%
mutate(
Variant_Label = link_to_variant(Pos, Ref, Variant)
) %>%
arrange(Pos, Ref, Variant) %>%
select("Variant_Label", "VariantLevel", "Type_Label") %>%
select("Variant_Label", "VariantLevel", "Type_Label","Filter_Label") %>%
datatable(
colnames=c("Variant", "Level", ""),
colnames=c("Variant", "Level", "Type","Flags"),
options = list(
bPaginate = FALSE,
bFilter = FALSE,
Expand Down

0 comments on commit b82d38b

Please sign in to comment.