Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Muting errors that may arise from statistical tests #9

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 45 additions & 11 deletions R/accessory_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,16 @@ twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1,
template2, index2=FALSE, ploidy2=2, cellularity2=1, standard2, name2,
equalsegments=FALSE, altmethod=FALSE, cap=12, qcap=12, bottom=0,
plot=TRUE, trncname=FALSE, legend=TRUE, chrsubset, onlyautosomes=TRUE,
sgc=c(),showcorrelation=TRUE) {
sgc=c(),showcorrelation=TRUE,quieterrors=TRUE) {

# Error handler for errors produced by stat functions when those tests fail
# These would meant that the test is invalid and it should produce p.value of 1
# so we return that here for convenience, by default
stat_test_error_handler <- function(err) {
if (!quieterrors) {warning(geterrmessage())}
return(1)
}

if (missing(template2)) {template2<-template1}
if(index1) {
pd1<-Biobase::pData(template1)
Expand Down Expand Up @@ -304,9 +313,16 @@ twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1,
if(altmethod=="MAD"){Mean2[i] <- median(template2na$copynumbers[seq(primer,allsegments[i])])}
SE2[i] <- sd(template2na$copynumbers[seq(primer,allsegments[i])])/sqrt(Num_Bins[i])
if(altmethod=="MAD"){SE2[i] <- mad(template2na$copynumbers[seq(primer,allsegments[i])])/sqrt(Num_Bins[i])}
if (Num_Bins[i]>1) {p_value[i] <- t.test(template1na$copynumbers[seq(primer,allsegments[i])],
template2na$copynumbers[seq(primer,allsegments[i])])$p.value
} else {p_value[i]<-1}
if (Num_Bins[i] > 1) {
p_value[i] <- {
tryCatch({
t.test(
template1na$copynumbers[seq(primer, allsegments[i])],
template2na$copynumbers[seq(primer, allsegments[i])]
)$p.value
}, error = stat_test_error_handler)
}
} else {p_value[i] <- 1}
primer <- allsegments[i]+1
}
if(altmethod=="SD"){
Expand All @@ -322,8 +338,11 @@ twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1,
for (i in seq_along(allsegments)) {
cn1 <- (template1na$copynumbers[seq(primer,allsegments[i])]-ns1)/sd1
cn2 <- (template2na$copynumbers[seq(primer,allsegments[i])]-ns2)/sd2
if (Num_Bins[i]>1) {p_value[i] <- t.test(cn1,cn2)$p.value
} else {p_value[i]<-1}
if (Num_Bins[i] > 1) {
p_value[i] <- {
tryCatch({t.test(cn1, cn2)$p.value}, error = stat_test_error_handler)
}
} else {p_value[i] <- 1}
primer <- allsegments[i]+1
}
}
Expand All @@ -340,7 +359,10 @@ twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1,
for (i in seq_along(allsegments)) {
cn1 <- (template1na$copynumbers[seq(primer,allsegments[i])]-ns1)/mad1
cn2 <- (template2na$copynumbers[seq(primer,allsegments[i])]-ns2)/mad2
if (Num_Bins[i]>1) {p_value[i] <- wilcox.test(cn1,cn2)$p.value
if (Num_Bins[i]>1) {
p_value[i] <- {
tryCatch({wilcox.test(cn1, cn2)$p.value}, error = stat_test_error_handler)
}
} else {p_value[i]<-1}
primer <- allsegments[i]+1
}
Expand Down Expand Up @@ -379,8 +401,12 @@ twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1,
SE1[segmentcounter + i] <- sd(template1na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])/sqrt(nobs[i])
Mean2[segmentcounter + i] <- mean(template2na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])
SE2[segmentcounter + i] <- sd(template2na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])/sqrt(nobs[i])
p_value[segmentcounter + i] <- t.test(template1na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)],
p_value[segmentcounter + i] <- {
tryCatch({
t.test(template1na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)],
template2na$copynumbers[seq(bincounter, bincounter+nobs[i]-1)])$p.value
}, error = stat_test_error_handler)
}
bincounter <- bincounter + nobs[i]
}
}
Expand All @@ -400,8 +426,13 @@ twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1,
lastbin <- which(template1na$chr==Chromosome[i] & template1na$end==End[i])
cn1 <- (template1na$copynumbers[seq(firstbin, lastbin)]-ns1)/sd1
cn2 <- (template2na$copynumbers[seq(firstbin, lastbin)]-ns2)/sd2
if (Num_Bins[i]>1) {p_value[i] <- t.test(cn1,cn2)$p.value
} else {p_value[i]<-1}
if (Num_Bins[i] > 1) {
p_value[i] <- {
tryCatch({
t.test(cn1, cn2)$p.value
}, error = stat_test_error_handler)
}
} else {p_value[i] <- 1}
}
}
if(altmethod=="MAD"){
Expand All @@ -418,7 +449,10 @@ twosamplecompare <- function(template1, index1=FALSE, ploidy1=2, cellularity1=1,
lastbin <- which(template1na$chr==Chromosome[i] & template1na$end==End[i])
cn1 <- (template1na$copynumbers[seq(firstbin,lastbin)]-ns1)/mad1
cn2 <- (template2na$copynumbers[seq(firstbin,lastbin)]-ns2)/mad2
if (Num_Bins[i]>1) {p_value[i] <- wilcox.test(cn1,cn2)$p.value
if (Num_Bins[i]>1) {
p_value[i] <- {
tryCatch({wilcox.test(cn1, cn2)$p.value}, error = stat_test_error_handler)
}
} else {p_value[i]<-1}
}
}
Expand Down
1 change: 1 addition & 0 deletions man/twosamplecompare.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ twosamplecompare(template1, index1 = FALSE, ploidy1 = 2,
\item{onlyautosomes}{Logical or integer. You can fill in an integer to specify how many autosomes your species has. When TRUE, \code{twosamplecompare} defaults to 22 (human) autosomes. When FALSE, \code{twosamplecompare} will also plot whichever other chromosomes are specified in the template, e.g. "X", "Y", "MT"}
\item{sgc}{Integer or character vector. Specify which chromosomes occur with only a single copy in the germline. Note that this is assumed for both samples.}
\item{showcorrelation}{Logical. Add the correlation to the two-sample plot. Default = TRUE}
\item{quieterrors}{Logical. Mute warnings about errors that the statistical tests produce. Default = TRUE}
}
\details{
This function can be used for different types of comparisons. It can be used to compare a tumor sample with a healthy (preferably matched) control. In this case, it may not be necessary to fill in the cellularity, because it will not make a difference for the statistical tests. In this ability the function helps to determine which (if any) segments are significantly different from normal. The other major use is to compare two tumors from potentially the same origin, but that were separated in space or time. You can then assess if changes have occurred, or even whether the two samples are from different clonal origin. In this case it is important to achieve maximum similarity in segments. Now the argument altmethod may come in handy, because it does not require model fitting and optimization. The q-values that are obtained with this function should be interpreted with caution. The two-sample statistical tests will easily reach significance when the sample sizes, in this case bins per segment, are large. By creating equal segment sizes with the argument equalsegments, these biases disappear.
Expand Down