Skip to content

Commit

Permalink
fix QC report issues (#249)
Browse files Browse the repository at this point in the history
* force scientific notation on NMR dose effects plot

* tidy gitignore

* smaller plate figure sizes when there's a single plate
  • Loading branch information
LMBradford authored Jun 14, 2024
1 parent d3916b6 commit cb6faaf
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 33 deletions.
35 changes: 14 additions & 21 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@
*.log
# Python stuff
*.pyc

## Inputs
*metadata.txt
*contrasts.txt
*config.yaml
inputs*/reference/*
*.gmt
*.csv
inputs_* # For Lauren's testing setup
*remove.txt

# Custom
## Sequence files
*fq
Expand All @@ -42,14 +53,6 @@
*sam
*.bai

## Inputs
*metadata.txt
*contrasts.txt
*config.yaml
inputs/reference/*
*.gmt
*.csv

## Test data repo
test-data/

Expand All @@ -66,10 +69,6 @@ count_table*.tsv
checksum*
truth_checksums/*

TODO.txt

run.sh
preprocessing.txt

## Misc
*Log.out
Expand All @@ -92,13 +91,7 @@ analysis_complete
reports_complete

*.bak
*remove.txt
data/raw/run_folders.txt


c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt
reference/
wikipathways-20210810-gmt-Homo_sapiens.gmt
unify_temposeq_manifests/
data/
Human_S1500_1.2_standardized.csv
test-data*
truth_checksums*

38 changes: 26 additions & 12 deletions Rmd/Sample_QC.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ library(dendsort)
library(rrcov)
library(vtree) # Richard Webster and Nick Barrowman
library(ggh4x)
library(scales)
```


Expand Down Expand Up @@ -1283,7 +1284,7 @@ are_there_plates <- any(grepl("column", colnames(QAQC_annotated), ignore.case =
if (!is.na(params$batch_var)) {
batch_length <- length(unique(QAQC_annotated[, params$batch_var]))
} else{
batch_length <- 5
batch_length <- 1
}
colnum <- which(grepl("column", colnames(QAQC_annotated), ignore.case = T))[1]
rownum <- which(grepl("row", colnames(QAQC_annotated), ignore.case = T))[1]
Expand All @@ -1301,18 +1302,31 @@ plate_plot <- ggplot(QAQC_annotated, aes(x = factor(.data[[colname]]),
scale_x_discrete(position = "top")
if (!is.na(params$batch_var)) {
plate_plot <- plate_plot + facet_wrap(~ .data[[params$batch_var]], scales = "free", ncol = 2)
plate_plot <- plate_plot + facet_wrap2(
~ .data[[params$batch_var]],
scales = "free", axes = "all", remove_labels = FALSE,
ncol = 2)
} else {
plate_plot <- plate_plot + theme(
plot.margin = margin(0, 0, 0, 0, "cm"),
legend.text = element_text(size = 8)
)
}
```


```{r}
custom_fig_height <- ifelse(test = are_there_plates, yes = batch_length, no = 5)
custom_fig_height <- ifelse(test = batch_length > 1, yes = batch_length, no = 2.75)
# Set plot width to narrow for single plate, and wider for multiple plates to accomodate two column facet wrap
custom_fig_width <- ifelse(test = batch_length > 1, yes = batch_length, no = 4)
```

### Number of Mapped Reads

```{r fig.height = as.numeric(custom_fig_height), eval = are_there_plates}
```{r fig.height = as.numeric(custom_fig_height), fig.width = custom_fig_width, eval = are_there_plates}
(plate_plot +
geom_point(mapping = aes(color = NMR_data, val = NMR_data), data = QAQC_annotated, size = 3) +
ggtitle("Number of mapped reads")) %>%
Expand All @@ -1321,7 +1335,7 @@ custom_fig_height <- ifelse(test = are_there_plates, yes = batch_length, no = 5)

### Fraction of Mapped Reads

```{r fig.height = as.numeric(custom_fig_height), eval = are_there_plates}
```{r fig.height = as.numeric(custom_fig_height), fig.width = custom_fig_width, eval = are_there_plates}
(plate_plot +
geom_point(mapping = aes(color = FMR_data, val = FMR_data), data = QAQC_annotated, size = 3) +
ggtitle("Fraction of mapped reads")) %>%
Expand All @@ -1330,7 +1344,7 @@ custom_fig_height <- ifelse(test = are_there_plates, yes = batch_length, no = 5)

### Nsig80

```{r fig.height = as.numeric(custom_fig_height), eval = are_there_plates}
```{r fig.height = as.numeric(custom_fig_height), fig.width = custom_fig_width, eval = are_there_plates}
(plate_plot +
geom_point(mapping = aes(color = Nsig80_data, val = Nsig80_data), data = QAQC_annotated, size = 3) +
ggtitle("Nsig80")) %>%
Expand All @@ -1339,7 +1353,7 @@ custom_fig_height <- ifelse(test = are_there_plates, yes = batch_length, no = 5)

### Ncov5

```{r fig.height = as.numeric(custom_fig_height), eval = are_there_plates}
```{r fig.height = as.numeric(custom_fig_height), fig.width = custom_fig_width, eval = are_there_plates}
(plate_plot +
geom_point(mapping = aes(color = Ncov5_data, val = Ncov5_data), data = QAQC_annotated, size = 3) +
ggtitle("Ncov5")) %>%
Expand All @@ -1348,7 +1362,7 @@ custom_fig_height <- ifelse(test = are_there_plates, yes = batch_length, no = 5)

### Gini

```{r fig.height = as.numeric(custom_fig_height), eval = are_there_plates}
```{r fig.height = as.numeric(custom_fig_height), fig.width = custom_fig_width, eval = are_there_plates}
(plate_plot +
geom_point(mapping = aes(color = Gini_data, val = Gini_data), data = QAQC_annotated, size = 3) +
ggtitle("Gini")) %>%
Expand All @@ -1357,7 +1371,7 @@ custom_fig_height <- ifelse(test = are_there_plates, yes = batch_length, no = 5)

### Any samples which failed

```{r fig.height = as.numeric(custom_fig_height), eval = are_there_plates}
```{r fig.height = as.numeric(custom_fig_height), fig.width = custom_fig_width, eval = are_there_plates}
(plate_plot +
geom_tile(mapping = aes(fill = log(.data[[params$dose]] + 1)),
data = QAQC_annotated) +
Expand Down Expand Up @@ -1679,7 +1693,7 @@ plot_batches <- ggplot(QAQC_annotated, aes(x = .data[[params$treatment_var]], co
coord_flip()
plot_batches + geom_boxplot(aes(y = Gini_data)) + ggtitle("Gini")
plot_batches + geom_boxplot(aes(y = NMR_data)) + ggtitle("Number of Mapped Reads")
plot_batches + geom_boxplot(aes(y = NMR_data)) + ggtitle("Number of Mapped Reads")
plot_batches + geom_boxplot(aes(y = FMR_data)) + ggtitle("Fraction of Mapped Reads")
plot_batches + geom_boxplot(aes(y = Ncov5_data)) + ggtitle("Proportion of active probes (% probes with >5 mapped reads)")
plot_batches + geom_boxplot(aes(y = Nsig80_data)) + ggtitle("Proportion of probes required to account for 80% of signal")
Expand All @@ -1697,8 +1711,8 @@ plot_dose_effect <- ggplot(QAQC_annotated, aes(x = factor(.data[[params$dose]]),
theme(axis.text.x = element_text(angle = 90))
plot_dose_effect + geom_boxplot(aes(y = Gini_data)) + ggtitle("Gini")
plot_dose_effect + geom_boxplot(aes(y = NMR_data)) + ggtitle("Number of Mapped Reads")
plot_dose_effect + geom_boxplot(aes(y = FMR_data)) + ggtitle("Fraction of Mapped Reads")
plot_dose_effect + geom_boxplot(aes(y = NMR_data)) + ggtitle("Number of Mapped Reads") + scale_y_continuous(labels = scales::scientific)
plot_dose_effect + geom_boxplot(aes(y = FMR_data)) + ggtitle("Fraction of Mapped Reads")
plot_dose_effect + geom_boxplot(aes(y = Ncov5_data)) + ggtitle("Proportion of active probes (% probes with >5 mapped reads)")
plot_dose_effect + geom_boxplot(aes(y = Nsig80_data)) + ggtitle("Proportion of probes required to account for 80% of signal")
```
Expand Down

0 comments on commit cb6faaf

Please sign in to comment.