-
Notifications
You must be signed in to change notification settings - Fork 3
/
ddPCR_analysis.R
228 lines (152 loc) · 7.34 KB
/
ddPCR_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# ddPCR_analysis.R
# S057j ; S072 ..
# Prelims ----
source('./0-general_functions_main.R') # Source the general_functions file before running this
source('./0.5-user-inputs.R') # source the user inputs from a different script
title_name <- base_title_name
# pre-processing function ----
#' Renames colnames from Quantasoft analysis Pro to match the Quantasoft run data (typical)
#' source: wastewater covid data analysis pipeline/g.10-sheet_columns_renaming_funs.R
#' @param .df dataframe to rename
#' @return dataframe with renamed columns
raw_ddpcr_renamer <- function(.df)
{
.df %>%
rename(PositiveDroplets = Positives) %>% # rename to be more meaningful
# rename the column names - if exported from Quantasoft analysis Pro
rename(CopiesPer20uLWell = matches('Copies/20.*LWell'),
Concentration = matches('Conc\\(copies/.*L)'),
AcceptedDroplets = any_of('Accepted Droplets'),
Threshold = any_of('Threshold1'),
MeanAmplitudeofPositives = any_of('MeanAmplitudeOfPositives'), # 'Of' to 'of'
MeanAmplitudeofNegatives = any_of('MeanAmplitudeOfNegatives')) %>%
# rename the column name - if exported from Quantasoft analysis Pro
mutate(across(any_of('Concentration'), as.numeric)) %>% # Remove the NO CALLS and make it numeric column
# convert ambiguous columns into numeric
mutate(across(matches('Total|Poisson|Mean|Ch|Ratio|Abundance|Linkage|CNV|Copies|Det'), as.numeric)) %>%
mutate(across(where(is.list), as.character)) %>% # convert any stray lists into character
# remove leading 0 (zero) from A01..A09..H01..H09
mutate(across('Well', ~ str_replace(., '(?<=[:alpha:])0(?=[:digit:])', '') ))
}
# Load data + meta ----
get_data <- read_csv(str_c('excel files/', flnm, '.csv'))
# Read the sample names and metadata from google sheet
plate_template <- get_and_parse_plate_layout(flnm, read_target_name = FALSE) %>%
rename(Well = 'Well Position') # rename well position to match the data and to be consise
# Process data ----
## attach metadata ----
# attach metadata, flag manual removals
compiled_w_metadata <-
get_data %>%
raw_ddpcr_renamer %>% # rename columns (for quantasoft analysis pro exported data)
# join the plate layout/template
left_join(plate_template) %>%
relocate(colnames(plate_template)) %>% # move the columns to the front
rename(Target_name = Target) %>% # rename target to qPCR format
# control column types and order
mutate(across(Concentration, as.numeric)) %>% # force conc to be numeric
# bring controls to the end
mutate(across(Sample_category, ~ fct_relevel(.x, 'C-', 'ntc', 'Negative', after = Inf))) %>%
## custom processing ordering data ----
mutate(across(assay_variable, ~ fct_relevel(.x, assay_var_order))) %>% # order the assay variables
## Flag data to remove/manual ----
# flag wells to remove from analysis (keeps them in the data output for transparency)
# remove before doing ratios!
mutate(flag_wells =
# str_detect(Well, wells_to_remove) | # flag wells to remove or
(Well %in% wells_to_remove | # flag wells to remove or
str_detect(Sample_category, categories_to_remove)) %>% # categories to remove
if_else(., 'Manual', ''), # flag manually removed samples
.after = Well) # keep the flag next to the well
## get LOD ----
# more details on LOD in WW-CoV2-project g.7 file
LOD <-
filter(compiled_w_metadata, Sample_category == 'Negative',
!flag_wells == 'Manual') %>% # remove flagged wells if any negatives were flagged manually
# get the LOD for each target
reframe(.by = Target_name,
# LOD in copies per 20uL well
LOD_max_copieswell = max(CopiesPer20uLWell, na.rm = T),
LOD_mean_copieswell = mean(CopiesPer20uLWell, na.rm = T),
# LOD in number of positive droplets ; minimum 3 if blanks are clean
LOD_max_pos_droplets = max(3, PositiveDroplets, na.rm = T)
)
## flag wells below LOD -----
processed_data <- compiled_w_metadata %>%
# join the LOD values
left_join(LOD) %>%
# flag wells with positive droplets below LOD
mutate(flag_wells =
if_else(flag_wells == 'Manual', 'Manual', # keep manually flagged wells
if_else(PositiveDroplets < LOD_max_pos_droplets, 'Below LOD', flag_wells))
)
## Custom/ratios ----
# custom analysis for Swetha's marine ddPCR data
ratio_data <- processed_data %>%
# remove flagged wells
filter(!flag_wells == 'Manual', # remove manually flagged wells
# remove wells below LOD for chromosome
!(flag_wells == 'Below LOD' & Target_name == 'Chromosome'),
!str_detect(Sample_category, 'Negative')) %>% # remove negatives [junk ratios]
# calculate copy number : ratio of plasmid to chromosome
select(Sample_category, assay_variable, Well, Target_name, Concentration) %>%
pivot_wider(names_from = Target_name, values_from = Concentration) %>%
mutate(copy_number = Plasmid / Chromosome)
# Save data ----
# save processed data
write_csv(processed_data,
str_c('excel files/processed_data/', flnm, '-ddPCR-processed.csv'),
na = '')
# Save ratio data
write_csv(ratio_data,
str_c('excel files/processed_data/', flnm, '-ddPCR-ratio.csv'),
na = '')
# Plotting ----
## copies by target ----
# plot copies per target
copies_all_targets <-
{plot_facetted_assay(.data =
filter(processed_data, !flag_wells == 'Manual'), # removed flagged wells
.yvar_plot = CopiesPer20uLWell, .xvar_plot = assay_variable,
.label.var = Well,
.facetvar_plot = Target_name) +
# add LOD line
geom_hline(data = LOD, aes(yintercept = LOD_max_copieswell), linetype = 'dashed', color = 'red') +
geom_hline(data = LOD, aes(yintercept = LOD_mean_copieswell), linetype = 'dashed', color = 'gray')} %>%
print()
# geom_line(aes(group = Sample_category), alpha = 0.2, show.legend = F) # connect points for easy visual
# save the plot
ggsave(plot_as(title_name, '-ddPCR_raw'), plot = copies_all_targets,
width = 4.2, height = 4)
# interactive plot
ggplotly(copies_all_targets)
## plasmid copy number (ratio) ----
copy_num <-
plot_facetted_assay(.data = ratio_data,
.yvar_plot = copy_number, .xvar_plot = assay_variable,
.label.var = Well,
.facetvar_plot = NULL)
# copy_num +
# geom_line(aes(group = Sample_category), alpha = 0.2, show.legend = F) # connect points for easy visual
# save the plot
ggsave(plot_as(title_name, '-ddPCR_plasmid-copies'), plot = copy_num,
width = 4.2, height = 3)
# interactive plot
ggplotly(copy_num)
# Custom analysis ----
# run line by line to see the outputs
if(0)
{
# zoom in
copies_all_targets #+ ylim(c(0, 4300))
ggsave(plot_as(title_name, '-ddPCR_raw'), width = 4.2, height = 4)
# reorder columns for visual inspection
relocate(processed_data, 1, 3, 4, 7, PositiveDroplets) %>%
# filter(Sample_category == 'Negative') %>%
view
# plot positive droplets by target (dot plot)
ggplot(data = processed_data %>% filter(!flag_wells),
aes(x = PositiveDroplets)) +
geom_dotplot() +
facet_wrap(~Target_name, scales = 'free')
}