From 59b38dc73d42c5934eab57cc0f3cbc375424e582 Mon Sep 17 00:00:00 2001 From: kkleinoros Date: Fri, 16 Dec 2016 14:11:53 -0500 Subject: [PATCH 1/2] next version --- DESCRIPTION | 6 +- vignettes/funtooNorm.Rmd | 2 +- vignettes/nature.csl | 117 --------------------------------------- 3 files changed, 4 insertions(+), 121 deletions(-) delete mode 100644 vignettes/nature.csl diff --git a/DESCRIPTION b/DESCRIPTION index 30a784d..7c78f03 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: funtooNorm Type: Package Title: Normalization Procedure for Infinium HumanMethylation450 BeadChip Kit -Version: 0.99.6 +Version: 0.99.7 Date: 2016-12-14 Author: Celia Greenwood ,Stepan Grinek - , Kathleen Klein + , Kathleen Klein Maintainer: Kathleen Klein Description: Provides a function to normalize Illumina Infinium Human Methylation 450 BeadChip (Illumina 450K), correcting for tissue and/or cell @@ -31,4 +31,4 @@ Depends: LazyData: true VignetteBuilder: knitr biocViews: DNAMethylation, Preprocessing, Normalization -RoxygenNote: 5.0.1.9000 +RoxygenNote: 5.0.1 diff --git a/vignettes/funtooNorm.Rmd b/vignettes/funtooNorm.Rmd index a605d24..d1d6019 100644 --- a/vignettes/funtooNorm.Rmd +++ b/vignettes/funtooNorm.Rmd @@ -156,7 +156,7 @@ genomerange <- getGRanges(mySampleSet) grs <- GenomicRatioSet(gr=genomerange, Beta=normBeta, preprocessMethod="funtooNorm", - pData=phenoData) + metadata=list(pData=phenoData)) grs ``` diff --git a/vignettes/nature.csl b/vignettes/nature.csl deleted file mode 100644 index 35efd0a..0000000 --- a/vignettes/nature.csl +++ /dev/null @@ -1,117 +0,0 @@ - - From 591eb1cb430cd8524c2eb16635cce682575d9aa5 Mon Sep 17 00:00:00 2001 From: kkleinoros Date: Tue, 20 Dec 2016 14:58:32 -0500 Subject: [PATCH 2/2] change to S4 --- NAMESPACE | 2 +- R/SampleSet.R | 610 ++++++++++-------- R/funtoonorm.R | 2 +- man/SampleSet-class.Rd | 22 +- man/agreement.Rd | 4 +- man/fromGenStudFiles.Rd | 4 +- man/fromRGChannelSet.Rd | 8 +- man/funtooNorm.Rd | 28 +- man/getNormBeta.Rd | 4 +- man/getNormM.Rd | 10 +- man/getRawBeta.Rd | 4 +- man/getSnpM.Rd | 11 +- man/plotValidationGraph.Rd | 14 +- ....SampleSet.Rd => show-SampleSet-method.Rd} | 13 +- vignettes/funtooNorm.Rmd | 4 +- 15 files changed, 394 insertions(+), 346 deletions(-) rename man/{print.SampleSet.Rd => show-SampleSet-method.Rd} (54%) diff --git a/NAMESPACE b/NAMESPACE index c967bbe..69fa8ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,5 @@ # Generated by roxygen2: do not edit by hand -S3method(print,SampleSet) export(agreement) export(fromGenStudFiles) export(fromRGChannelSet) @@ -12,6 +11,7 @@ export(getRawBeta) export(getSnpM) export(plotValidationGraph) exportClasses(SampleSet) +exportMethods(show) import(IlluminaHumanMethylation450kanno.ilmn12.hg19) import(IlluminaHumanMethylation450kmanifest) import(methods) diff --git a/R/SampleSet.R b/R/SampleSet.R index 9237a29..5cfc7ce 100644 --- a/R/SampleSet.R +++ b/R/SampleSet.R @@ -1,15 +1,14 @@ ################################################################################ -## This file illustrates how to create a 'SampleSet' object, -## which are list containing -## signal data and other covariables utilized by funtooNorm. -## The data is separated into the 3 kind positions are quantified -## Each way have then 2 channel (methylated and unmethylated ie : A and B -## We define then the 6 (2*3) labels: AIGrn BIGrn AIRed BIRed AII BII - - -## This file allow to create some 'SampleSet' objects, they are list containing - -#' SampleSet is an S3 class defined for the purpose of running the +## This function creates a S4 object of class 'SampleSet'. +## SampleSet object contain both signal data as well as +## other covariables needed for the funtooNorm function. +## The data is divided by colour and probe type. +## Each type have 2 channels (methylated and unmethylated ie : A and B +## We define the six groups as (2*3): AIGrn BIGrn AIRed BIRed AII BII + +#' @title S4 class object SampleSet +#' +#' @description SampleSet is an S4 class defined for the purpose of running the #' funtooNorm algorithm. They are lists containing signal data and different #' variables useful for funtooNorm. The data is separated into the 3 probes #' types, each having 2 channels (methylated and unmethylated ie : A and B) @@ -23,28 +22,31 @@ #' @slot nPos numeric: the number of positions in the ILLUMINA chip #' @slot annotation IlluminaMethylationAnnotation: the annotation object from #' minfi package -#' @slot cell_type list: list matching each sample to define the categories -#' @slot qntllist numeric: vector of ordered quantiles -#' @slot quantiles numeric: list of 6 quantiles tables for 6 type of signals -#' @slot ctl.covmat numeric: covariance matrix for the model fit -#' @slot signal numeric: list of 6 signal tables the 6 type of signals +#' @slot cell_type factor: vector of the cell type for each sample as factors +#' @slot qntllist list: vector of ordered quantiles +#' @slot quantiles list: list of 6 quantiles tables for the 6 signal types +#' @slot ctl.covmat matrix: covariance matrix for the model fit +#' @slot signal list: list of the values for all 6 probe types. +#' @slot names list: list of probes for each type #' -#' @return a SampleSet object +#' @return a S4 object of class SampleSet #' @export #' #' @examples showClass("SampleSet") #' @importClassesFrom minfi IlluminaMethylationAnnotation #' -setClass("SampleSet", representation(type="character", - sampleNames="character", - sampleSize="numeric", - nPos="numeric", - annotation="IlluminaMethylationAnnotation", - cell_type="list", - qntllist="numeric", - quantiles="numeric", - ctl.covmat="numeric", - signal="numeric") +setClass("SampleSet", representation(type = "character", + sampleNames = "character", + sampleSize = "numeric", + nPos = "numeric", + annotation = "character", + cell_type = "factor", + qntllist = "numeric", + quantiles = "list", + ctl.covmat = "matrix", + signal = "list", + names = "list", + predmat = "list") ) @@ -56,12 +58,14 @@ setClass("SampleSet", representation(type="character", ################################################################################ -#' create a SampleSet from RGChannelSet from minfi package +#' @title Creates an object of class SampleSet from a RGChannelSet {minfi} +#' +#' @description Creates a object of class SampleSet from the raw unprocessed data in RGChannelSet #' #' @param myRGChannelSet : RGChannelSet, from minfi package, should contain a -#' cell_type vector in it s phenotypes data pData +#' cell_type vector in pData #' -#' @return a SampleSet object +#' @return a class 'SampleSet' object #' @export #' #' @examples require(minfiData) @@ -70,19 +74,19 @@ setClass("SampleSet", representation(type="character", #' #' @importClassesFrom minfi RGChannelSet fromRGChannelSet <- function(myRGChannelSet){ - object <- list(type="minfi") - class(object) <- "SampleSet" - + object = new("SampleSet") + object@type = "minfi" + if(! "cell_type" %in% colnames(minfi::pData(myRGChannelSet))){ stop("Your object should contain a field \"cell_type\" in it's phenotype data minfi::pData(rgset) in order to use funtooNorm") } cell_type <- minfi::pData(myRGChannelSet)$cell_type - object$sampleSize=length(cell_type) - object$cell_type=cell_type - object$sampleNames=rownames(minfi::pData(myRGChannelSet)) - object$annotation=myRGChannelSet@annotation + object@sampleSize=length(cell_type) + object@cell_type=as.factor(cell_type) + object@sampleNames=rownames(minfi::pData(myRGChannelSet)) + object@annotation=myRGChannelSet@annotation if (any(cell_type == '' | is.na(cell_type))) { @@ -111,80 +115,76 @@ fromRGChannelSet <- function(myRGChannelSet){ controlgrn <- log2(1 + controlgrn) controlred <- log2(1 + controlred) - object$ctl.covmat=constructProbCovMat(controlred,controlgrn, - cp.types,object$cell_type) + object@ctl.covmat=constructProbCovMat(controlred,controlgrn, + cp.types,object@cell_type) message("A covariance Matrix was build") loc=minfi::getLocations(sprintf("%sanno.%s", - object$annotation["array"], - object$annotation["annotation"]), + object@annotation["array"], + object@annotation["annotation"]), orderByLocation = FALSE) pos=cbind(names(loc),as.character(GenomeInfoDb::seqnames(loc)),start(loc)) chrYnames=names(loc)[as.character(GenomeInfoDb::seqnames(loc))=="chrY"] - object$signal=list() - object$names=list() - - SnpI <- minfi::getProbeInfo(object$annotation, type = "SnpI") + SnpI <- minfi::getProbeInfo(object@annotation, type = "SnpI") ## Type I Green - TypeI.Green <- rbind(minfi::getProbeInfo(object$annotation, + TypeI.Green <- rbind(minfi::getProbeInfo(object@annotation, type = "I-Green"), SnpI[SnpI$Color == "Grn",]) sub=TypeI.Green$Name %in% chrYnames - object$names$IGrn=TypeI.Green$Name[!sub] - object$names$chrY=TypeI.Green$Name[sub] + object@names$IGrn=TypeI.Green$Name[!sub] + object@names$chrY=TypeI.Green$Name[sub] sigA=minfi::getGreen(myRGChannelSet)[TypeI.Green$AddressA,] sigB=minfi::getGreen(myRGChannelSet)[TypeI.Green$AddressB,] - object$signal$AIGrn=sigA[!sub,] - object$signal$BIGrn=sigB[!sub,] - object$signal$BchrY=sigB[sub,] - object$signal$AchrY=sigA[sub,] + object@signal$AIGrn=sigA[!sub,] + object@signal$BIGrn=sigB[!sub,] + object@signal$BchrY=sigB[sub,] + object@signal$AchrY=sigA[sub,] ## Type I Red - TypeI.Red <- rbind(minfi::getProbeInfo(object$annotation, type = "I-Red"), + TypeI.Red <- rbind(minfi::getProbeInfo(object@annotation, type = "I-Red"), SnpI[SnpI$Color == "Red",]) sub=TypeI.Red$Name %in% chrYnames - object$names$IRed=TypeI.Red$Name[!sub] - object$names$chrY=c(object$names$chrY,TypeI.Red$Name[sub]) + object@names$IRed=TypeI.Red$Name[!sub] + object@names$chrY=c(object@names$chrY,TypeI.Red$Name[sub]) sigA=minfi::getRed(myRGChannelSet)[TypeI.Red$AddressA,] sigB=minfi::getRed(myRGChannelSet)[TypeI.Red$AddressB,] - object$signal$AIRed=sigA[!sub,] - object$signal$BIRed=sigB[!sub,] - object$signal$AchrY=rbind(object$signal$AchrY,sigA[sub,]) - object$signal$BchrY=rbind(object$signal$BchrY,sigB[sub,]) + object@signal$AIRed=sigA[!sub,] + object@signal$BIRed=sigB[!sub,] + object@signal$AchrY=rbind(object@signal$AchrY,sigA[sub,]) + object@signal$BchrY=rbind(object@signal$BchrY,sigB[sub,]) ## Type II - TypeII <- rbind(minfi::getProbeInfo(object$annotation, type = "II"), - minfi::getProbeInfo(object$annotation, type = "SnpII")) + TypeII <- rbind(minfi::getProbeInfo(object@annotation, type = "II"), + minfi::getProbeInfo(object@annotation, type = "SnpII")) sub=TypeII$Name %in% chrYnames - object$names$II=TypeII$Name[!sub] - object$names$chrY=c(object$names$chrY,TypeII$Name[sub]) + object@names$II=TypeII$Name[!sub] + object@names$chrY=c(object@names$chrY,TypeII$Name[sub]) sigA=minfi::getRed(myRGChannelSet)[TypeII$AddressA,] sigB=minfi::getGreen(myRGChannelSet)[TypeII$AddressA,] - object$signal$AII=sigA[!sub,] - object$signal$BII=sigB[!sub,] - object$signal$AchrY=rbind(object$signal$AchrY,sigA[sub,]) - object$signal$BchrY=rbind(object$signal$BchrY,sigB[sub,]) - - object$nPos=sum(length(object$names$chrY), - length(object$names$II), - length(object$names$IRed), - length(object$names$IGrn)) - - for(i in names(object$signal)){ - object$signal[[i]]=log2(1 + object$signal[[i]]) + object@signal$AII=sigA[!sub,] + object@signal$BII=sigB[!sub,] + object@signal$AchrY=rbind(object@signal$AchrY,sigA[sub,]) + object@signal$BchrY=rbind(object@signal$BchrY,sigB[sub,]) + + object@nPos=sum(length(object@names$chrY), + length(object@names$II), + length(object@names$IRed), + length(object@names$IGrn)) + + for(i in names(object@signal)){ + object@signal[[i]]=log2(1 + object@signal[[i]]) } message("Signal data loaded") - object$qntllist=buildQuantileList(object$nPos) - object$quantiles=list() - for(i in names(object$signal)[!grepl("Y",names(object$signal))]){ - object$quantiles[[i]]=matrixStats::colQuantiles(object$signal[[i]], - prob=object$qntllist) + object@qntllist=buildQuantileList(object@nPos) + for(i in names(object@signal)[!grepl("Y",names(object@signal))]){ + object@quantiles[[i]]=matrixStats::colQuantiles(object@signal[[i]], + prob=object@qntllist) } message("Quantiles done") @@ -193,7 +193,7 @@ fromRGChannelSet <- function(myRGChannelSet){ ################################################################################ -#' create a SampleSet from GenomeStudio files +#' Creates a S4 object of class 'SampleSet' from GenomeStudio files #' #' @param controlProbeFile file of control probe data exported from GenomeStudio #' @param signalFile file exported from GenomeStudio with the exact same samples @@ -213,44 +213,44 @@ fromGenStudFiles <- function(controlProbeFile,signalFile,cell_type){ if (length(unique(cell_type))<2) { stop("There should be at least 2 cell types\n") } - class(object) <- "SampleSet" - object$sampleSize=length(cell_type) - object$cell_type=cell_type - object$annotation=c(array="IlluminaHumanMethylation450k", + object <- new("SampleSet") + object@sampleSize=length(cell_type) + object@cell_type=cell_type + object@annotation=c(array="IlluminaHumanMethylation450k", annotation="ilmn12.hg19") ## Construct the control Table controlTable=utils::read.table(controlProbeFile,sep='\t',header=TRUE) - object$sampleNames=unlist(strsplit(colnames(controlTable) - [1:object$sampleSize*3+1],".Signal_Grn")) + object@sampleNames=unlist(strsplit(colnames(controlTable) + [1:object@sampleSize*3+1],".Signal_Grn")) - controlred=controlTable[,paste(object$sampleName,".Signal_Red",sep="")] - controlgrn=controlTable[,paste(object$sampleName,".Signal_Grn",sep="")] + controlred=controlTable[,paste(object@sampleName,".Signal_Red",sep="")] + controlgrn=controlTable[,paste(object@sampleName,".Signal_Grn",sep="")] cp.types=controlTable$TargetID - if(any(! object$sampleNames %in% names(object$cell_type))){ - i=which(! object$sampleNames%in%names(object$cell_type))[1] - stop("STOP: ",object$sampleNames[i],"from file ",controlProbeFile, + if(any(! object@sampleNames %in% names(object@cell_type))){ + i=which(! object@sampleNames%in%names(object@cell_type))[1] + stop("STOP: ",object@sampleNames[i],"from file ",controlProbeFile, " should be present in the cell_types names:\n") } - object$cell_type=object$cell_type[object$sampleNames] + object@cell_type=object@cell_type[object@sampleNames] controlgrn <- log2(1 + controlgrn) controlred <- log2(1 + controlred) - object$ctl.covmat=constructProbCovMat(controlred,controlgrn, - cp.types,object$cell_type) + object@ctl.covmat=constructProbCovMat(controlred,controlgrn, + cp.types,object@cell_type) message("A covariance Matrix was build") ## Building the colClasses to help reading the file - colClasses <- rep("double", object$sampleSize*2+4) + colClasses <- rep("double", object@sampleSize*2+4) colClasses[1:4]=list(NULL) colClasses[2]="character" signalTable=utils::read.table(signalFile,sep='\t', header=TRUE,colClasses=colClasses) - sigA=signalTable[,1:object$sampleSize*2] - sigB=signalTable[,1:object$sampleSize*2+1] + sigA=signalTable[,1:object@sampleSize*2] + sigB=signalTable[,1:object@sampleSize*2+1] names=signalTable[,1] rm(signalTable) @@ -259,17 +259,17 @@ fromGenStudFiles <- function(controlProbeFile,signalFile,cell_type){ stop("There are non-numeric values in the matrix", '\n')} if (any(!is.finite(as.matrix(sigB)))){ stop("There are non-numeric values in the matrix", '\n')} - if(any(unlist(strsplit(colnames(sigA),".Signal_A"))!=object$sampleNames)){ + if(any(unlist(strsplit(colnames(sigA),".Signal_A"))!=object@sampleNames)){ stop("Those two files should contain the same samples in the same order:\n", controlProbeFile,signalFile)} #Naming the column consistently - colnames(sigA)=object$sampleNames - colnames(sigB)=object$sampleNames + colnames(sigA)=object@sampleNames + colnames(sigB)=object@sampleNames - object$nPos=nrow(sigA) + object@nPos=nrow(sigA) - object$signal=list(AIGrn=sigA[orderIGrn,], + object@signal=list(AIGrn=sigA[orderIGrn,], BIGrn=sigB[orderIGrn,], AIRed=sigA[orderIRed,], BIRed=sigB[orderIRed,], @@ -278,7 +278,7 @@ fromGenStudFiles <- function(controlProbeFile,signalFile,cell_type){ AchrY=sigA[orderchrY,], BchrY=sigB[orderchrY,]) - object$names=list(IGrn=names[orderIGrn], + object@names=list(IGrn=names[orderIGrn], IRed=names[orderIRed], II=names[orderII], chrY=names[orderchrY]) @@ -286,16 +286,16 @@ fromGenStudFiles <- function(controlProbeFile,signalFile,cell_type){ message("Signal data loaded") - for(i in names(object$signal)){ - object$signal[[i]]=log2(1 + object$signal[[i]]) + for(i in names(object@signal)){ + object@signal[[i]]=log2(1 + object@signal[[i]]) } - object$qntllist=buildQuantileList(object$nPos) - object$quantiles=list() - object$quantiles=list() - for(i in names(object$signal)[!grepl("Y",names(object$signal))]){ - object$quantiles[[i]]=matrixStats::colQuantiles(object$signal[[i]], - prob=object$qntllist) + object@qntllist=buildQuantileList(object@nPos) + object@quantiles=list() + object@quantiles=list() + for(i in names(object@signal)[!grepl("Y",names(object@signal))]){ + object@quantiles[[i]]=matrixStats::colQuantiles(object@signal[[i]], + prob=object@qntllist) } message("Quantiles done") @@ -304,10 +304,11 @@ fromGenStudFiles <- function(controlProbeFile,signalFile,cell_type){ ################################################################################ -#' Print information about the SampleSet +#' Show Object SampleSet #' +#' @description Display informations about the SampleSet object #' @param x an object of class SampleSet -#' @param ... further arguments passed to or from other methods. +#' @param ... optional arguments passed to or from other methods. #' #' @export #' @@ -316,43 +317,64 @@ fromGenStudFiles <- function(controlProbeFile,signalFile,cell_type){ #' mySampleSet=fromRGChannelSet(RGsetEx) #' mySampleSet #' -print.SampleSet <- function(x, ...){ - cat("SampleSet object built from ",x$type,'\n') - cat("Data: ",x$nPos,"positions ") - cat("and ",x$sampleSize, "samples",'\n') - cat(" cell type:",levels(x$cell_type),'\n') - cat(" ",length(x$qntllist),"quantiles",'\n') - if(is.null(x$predmat)){ - cat("funtooNorm Normalization was not applied",'\n') - }else{ - cat("funtooNorm Normalization was applied",'\n') - } -} - -getLogSigA <- function(signal){ - return(rbind(signal$AIGrn, - signal$AIRed, - signal$AII, - signal$AchrY)) -} - -getLogSigB <- function(signal){ - return(rbind(signal$BIGrn, - signal$BIRed, - signal$BII, - signal$BchrY)) -} +setMethod(f="show", + signature = "SampleSet", + definition = function(object){ + cat("SampleSet object built from ",object@type,'\n') + cat("Data: ",object@nPos,"positions ") + cat("and ",object@sampleSize, "samples",'\n') + cat(" cell type:",levels(object@cell_type),'\n') + cat(" ",length(object@qntllist),"quantiles",'\n') + if(length(object@predmat)==0){ + cat("funtooNorm Normalization was not applied",'\n')}else{ + cat("funtooNorm Normalization was applied",'\n') + } + } +) + +setGeneric(name="getLogSigA", + def=function(object) standardGeneric("getLogSigA") +) +setMethod("getLogSigA", + signature = "SampleSet", + definition = function(object){ + return(rbind(object@signal$AIGrn, + object@signal$AIRed, + object@signal$AII, + object@signal$AchrY)) + } +) + +setGeneric(name="getLogSigB", + def=function(object) standardGeneric("getLogSigB") +) +setMethod("getLogSigB", + signature = "SampleSet", + definition = function(object){ + return(rbind(object@signal$BIGrn, + object@signal$BIRed, + object@signal$BII, + object@signal$BchrY)) + } +) ################################################################################ ## internal function to get the position names returning a vector of ## position names the preserving the order define by this package -getPositionNames <- function(names){ - return(c(names$IGrn, - names$IRed, - names$II, - names$chrY)) -} +setGeneric(name="getPositionNames", + def=function(object) standardGeneric("getPositionNames") +) + +setMethod("getPositionNames", + signature = "SampleSet", + definition = function(object){ + return(c(object@names$IGrn, + object@names$IRed, + object@names$II, + object@names$chrY)) + } +) #' Return a list @@ -367,19 +389,26 @@ getPositionNames <- function(names){ #' mySampleSet=fromRGChannelSet(RGsetEx) #' gr=getGRanges(mySampleSet) #' -getGRanges <- function(object){ - methWithoutSNPs=getPositionNames(object$names) - methWithoutSNPs=methWithoutSNPs[!grepl("^rs",methWithoutSNPs)] - loc=minfi::getLocations(sprintf("%sanno.%s", - object$annotation["array"], - object$annotation["annotation"]), - orderByLocation = FALSE) +setGeneric(name="getGRanges", + def=function(object) standardGeneric("getGRanges") +) + +setMethod("getGRanges", + signature = "SampleSet", + definition = function(object){ + methWithoutSNPs=getPositionNames(object) + methWithoutSNPs=methWithoutSNPs[!grepl("^rs",methWithoutSNPs)] + loc=minfi::getLocations(sprintf("%sanno.%s", + object@annotation["array"], + object@annotation["annotation"]), + orderByLocation = FALSE) return(loc[methWithoutSNPs]) -} + } +) ################################################################################ -#' compute the beta value of the raw signal for each position and each +#' Computes the beta value from the raw signal at each position for each #' sample #' #' @param object object of class SampleSet @@ -394,17 +423,24 @@ getGRanges <- function(object){ #' mySampleSet=fromRGChannelSet(RGsetEx) #' r=getRawBeta(mySampleSet) #' -getRawBeta <- function(object,offset=100){ - mat=calcBeta(getLogSigA(object$signal), - getLogSigB(object$signal), - offset) - colnames(mat)=object$sampleNames - rownames(mat)=getPositionNames(object$names) - return(mat[!grepl("^rs",rownames(mat)),]) -} +setGeneric(name="getRawBeta", + def=function(object, offset=100) standardGeneric("getRawBeta") +) + +setMethod("getRawBeta", + signature = "SampleSet", + definition = function(object,offset){ + mat=calcBeta(getLogSigA(object), + getLogSigB(object), + offset) + colnames(mat)=object@sampleNames + rownames(mat)=getPositionNames(object) + return(mat[!grepl("^rs",rownames(mat)),]) + } +) ################################################################################ -#' compute the beta value after normalization for each position and each +#' Computes the beta value after normalization at each position for each #' sample #' #' @param object of type SampleSet @@ -419,52 +455,65 @@ getRawBeta <- function(object,offset=100){ #' mySampleSet=fromRGChannelSet(RGsetEx) #' b=getNormBeta(funtooNorm(mySampleSet)) #' -getNormBeta <- function(object,offset=100){ - if(any(is.null(object$predmat))){ - stop("WARNING: please call funtooNorm") - } - mat=calcBeta(getLogSigA(object$predmat), - getLogSigB(object$predmat), - offset) - colnames(mat)=object$sampleNames - rownames(mat)=getPositionNames(object$names) - return(mat[!grepl("^rs",rownames(mat)),]) -} +setGeneric(name="getNormBeta", + def=function(object, offset=100) standardGeneric("getNormBeta") +) + +setMethod("getNormBeta", + signature = "SampleSet", + definition = function(object,offset){ + if(length(object@predmat)==0){ + stop("WARNING: please call funtooNorm") + } + mat=calcBeta(getLogSigA(object), + getLogSigB(object), + offset) + colnames(mat)=object@sampleNames + rownames(mat)=getPositionNames(object) + return(mat[!grepl("^rs",rownames(mat)),]) + } +) ################################################################################ -#' compute the M value after normalization for each position and each +#' Computes the M value after normalization at each position for each #' sample #' #' @param object of type SampleSet #' @param offset default is 100 as Illumina standard #' -#' @return a matrix containing M after normalization value for each position -#' and each samples log2(Meth/Unmeth) +#' @return a matrix containing M values, log2(Meth/Unmeth), after normalization at each position +#' for each samples. #' @export #' #' @examples require(minfiData) #' pData(RGsetEx)$cell_type <- rep(c("type1","type2"),3) #' mySampleSet=fromRGChannelSet(RGsetEx) #' m=getNormM(funtooNorm(mySampleSet)) -#' -getNormM <- function(object,offset=100){ - if(any(is.null(object$predmat))){ - stop("WARNING: please call funtooNorm") - } - mat=getLogSigB(object$predmat)-getLogSigA(object$predmat) - colnames(mat)=object$sampleNames - rownames(mat)=getPositionNames(object$names) - return(mat[!grepl("^rs",rownames(mat)),]) -} +#' +setGeneric(name="getNormM", + def=function(object) standardGeneric("getNormM") +) + +setMethod("getNormM", + signature = "SampleSet", + definition = function(object){ + if(length(object@predmat)==0){ + stop("WARNING: please call funtooNorm") + } + mat=getLogSigB(object)-getLogSigA(object) + colnames(mat)=object@sampleNames + rownames(mat)=getPositionNames(object) + return(mat[!grepl("^rs",rownames(mat)),]) + } +) ################################################################################ -#' compute the M value after normalization for each SNP position and -#' each sample +#' Computes the M value after normalization for each SNP. #' -#' @param object of type SampleSet +#' @param object of class SampleSet +#' +#' @return a matrix containing M values, log2(Meth/Unmeth), after normalization for each SNP #' -#' @return a matrix containing M after normalization value for each SNP of the -#' chip and each sample log2(Meth/Unmeth) #' @export #' #' @examples require(minfiData) @@ -472,93 +521,98 @@ getNormM <- function(object,offset=100){ #' mySampleSet=fromRGChannelSet(RGsetEx) #' snp=getSnpM(funtooNorm(mySampleSet)) #' -getSnpM <- function(object){ - if(any(is.null(object$predmat))){ - stop("WARNING: please call funtooNorm") - } - mat=getLogSigA(object$predmat)-getLogSigB(object$predmat) - colnames(mat)=object$sampleNames - rownames(mat)=getPositionNames(object$names) - return(mat[grepl("^rs",rownames(mat)),]) -} +setGeneric(name="getSnpM", + def=function(object) standardGeneric("getSnpM") +) + +setMethod("getSnpM", + signature = "SampleSet", + definition = function(object){ + if(length(object@predmat)==0){ + stop("WARNING: please call funtooNorm") + } + mat=getLogSigA(object)-getLogSigB(object) + colnames(mat)=object@sampleNames + rownames(mat)=getPositionNames(object) + return(mat[grepl("^rs",rownames(mat)),]) + } +) ################################################################################ #' This is the main normalization function which applies to autosomes and the X chromosome. -#' Chromosome Y requires separate analysis, since there are few probes on Y. -#' Therefore we use a straightforward quantile normalization applied to males only. +#' Chromosome Y requires separate analysis as there are few probes on Y. +#' We use a straightforward quantile normalization applied to males only. #' -#' @param object of type SampleSet +#' @param object of class SampleSet #' @param type.fits can be "PCR" or "PLS" (default "PCR") #' @param ncmp number of components used in the analysis (default 4) -#' @param force set it to TRUE in order to re-compute the normalisation whent -#' it is already done -#' @param sex boolean vector: when not null force the chrY normalization to use -#' treat the TRUE values as mens +#' @param force when set to TRUE forces the normalization procedure to re-compute +#' @param sex boolean vector: if NULL Beta values from ChrY are used for classification. #' -#' @return a SampleSet containing the normalised signal +#' @return a S4 object of class SampleSet containing the normalized signal #' @export #' #' @examples require(minfiData) #' pData(RGsetEx)$cell_type <- rep(c("type1","type2"),3) #' mySampleSet=fromRGChannelSet(RGsetEx) #' mySampleSet=funtooNorm(mySampleSet) -#' -funtooNorm <- function(object, type.fits="PCR",ncmp=4,force=FALSE,sex=NULL){ - - if(force | is.null(object$predmat)){ - object$predmat=list() - }else{ - message("Normalization already exist") - } - - - ######## this part deal with chrY - if(is.null(sex)){ - mens=matrixStats::colMedians(calcBeta(object$signal$AchrY, - object$signal$BchrY))<0.6 - message("we found ",sum(mens)," men and ", - sum(!mens)," women in your data set base on Y probes only") - }else{ - mens=sex - message("There is ",sum(mens)," men and ", - sum(!mens)," women") - } - # women will not have any corrections - object$predmat$AchrY=object$signal$AchrY - object$predmat$BchrY=object$signal$BchrY - if(1=0.98),8)) if (!is.finite(numcomp) | numcomp>20) { @@ -586,7 +645,7 @@ plotValidationGraph <- function(object, type.fits="PCR",file=""){ message("\nStarting validation with a max of ", numcomp , " components...\n") - if(file!=""){ + if(is.null(file)){ message("plotting in the pdf file:",file) pdf(file = file, height=7, width=7) } @@ -597,9 +656,9 @@ plotValidationGraph <- function(object, type.fits="PCR",file=""){ par(omi=c(0,0,0,0),mar = c(2,2,0.2, 0.2), mgp = c(1,0,0)) - for(i in names(object$quantiles)){ - plotValidate(object$quantiles[[i]],object$qntllist, - object$ctl.covmat,numcomp,i) + for(i in names(object@quantiles)){ + plotValidate(object@quantiles[[i]],object@qntllist, + object@ctl.covmat,numcomp,i) } @@ -610,11 +669,12 @@ plotValidationGraph <- function(object, type.fits="PCR",file=""){ legend(title='Number of components:', x = "top",inset = 0, legend = 1:numcomp, col=rainbow(numcomp), lty=ltylist, cex=1.1, horiz = TRUE) - if(file!=""){ + if(!is.null(file)){ dev.off() } -} + } +) #' @import IlluminaHumanMethylation450kmanifest #' @import IlluminaHumanMethylation450kanno.ilmn12.hg19 diff --git a/R/funtoonorm.R b/R/funtoonorm.R index d3e8f7a..27c12be 100644 --- a/R/funtoonorm.R +++ b/R/funtoonorm.R @@ -125,7 +125,7 @@ funtooNormApply <- function(signal, quantiles, qntllist, } ################################################################################ -#' Function to measure intra-replicate agreement in methylation data. +#' Function to measure intra-replicate agreement for methylation data. #' #' @param Beta : Matrix with beta-values, rows corresponding to probes, columns #' corresponding to samples. diff --git a/man/SampleSet-class.Rd b/man/SampleSet-class.Rd index 3463f3c..2046780 100644 --- a/man/SampleSet-class.Rd +++ b/man/SampleSet-class.Rd @@ -3,16 +3,12 @@ \docType{class} \name{SampleSet-class} \alias{SampleSet-class} -\title{SampleSet is an S3 class defined for the purpose of running the -funtooNorm algorithm. They are lists containing signal data and different -variables useful for funtooNorm. The data is separated into the 3 probes -types, each having 2 channels (methylated and unmethylated ie : A and B) -We then define then the 6 (2*3) labels: AIGrn BIGrn AIRed BIRed AII BII} +\title{S4 class object SampleSet} \value{ -a SampleSet object +a S4 object of class SampleSet } \description{ -SampleSet is an S3 class defined for the purpose of running the +SampleSet is an S4 class defined for the purpose of running the funtooNorm algorithm. They are lists containing signal data and different variables useful for funtooNorm. The data is separated into the 3 probes types, each having 2 channels (methylated and unmethylated ie : A and B) @@ -33,15 +29,17 @@ contain the list of sample names in order used} \item{\code{annotation}}{IlluminaMethylationAnnotation: the annotation object from minfi package} -\item{\code{cell_type}}{list: list matching each sample to define the categories} +\item{\code{cell_type}}{factor: vector of the cell type for each sample as factors} + +\item{\code{qntllist}}{list: vector of ordered quantiles} -\item{\code{qntllist}}{numeric: vector of ordered quantiles} +\item{\code{quantiles}}{list: list of 6 quantiles tables for the 6 signal types} -\item{\code{quantiles}}{numeric: list of 6 quantiles tables for 6 type of signals} +\item{\code{ctl.covmat}}{matrix: covariance matrix for the model fit} -\item{\code{ctl.covmat}}{numeric: covariance matrix for the model fit} +\item{\code{signal}}{list: list of the values for all 6 probe types.} -\item{\code{signal}}{numeric: list of 6 signal tables the 6 type of signals} +\item{\code{names}}{list: list of probes for each type} }} \examples{ showClass("SampleSet") diff --git a/man/agreement.Rd b/man/agreement.Rd index 638cb85..2b514ec 100644 --- a/man/agreement.Rd +++ b/man/agreement.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/funtoonorm.R \name{agreement} \alias{agreement} -\title{Function to measure intra-replicate agreement in methylation data.} +\title{Function to measure intra-replicate agreement for methylation data.} \usage{ agreement(Beta, individualID) } @@ -19,7 +19,7 @@ The average value of the square distance between replicates: a measure of agreement between replicates in methylation data. } \description{ -Function to measure intra-replicate agreement in methylation data. +Function to measure intra-replicate agreement for methylation data. } \details{ We expect that the values returned by the agreement function after diff --git a/man/fromGenStudFiles.Rd b/man/fromGenStudFiles.Rd index 5e77459..fa24c8f 100644 --- a/man/fromGenStudFiles.Rd +++ b/man/fromGenStudFiles.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/SampleSet.R \name{fromGenStudFiles} \alias{fromGenStudFiles} -\title{create a SampleSet from GenomeStudio files} +\title{Creates a S4 object of class 'SampleSet' from GenomeStudio files} \usage{ fromGenStudFiles(controlProbeFile, signalFile, cell_type) } @@ -19,6 +19,6 @@ the files from genome studios, and at least 2 different cell types.} a SampleSet object } \description{ -create a SampleSet from GenomeStudio files +Creates a S4 object of class 'SampleSet' from GenomeStudio files } diff --git a/man/fromRGChannelSet.Rd b/man/fromRGChannelSet.Rd index 8a5acfd..e8b2ff7 100644 --- a/man/fromRGChannelSet.Rd +++ b/man/fromRGChannelSet.Rd @@ -2,19 +2,19 @@ % Please edit documentation in R/SampleSet.R \name{fromRGChannelSet} \alias{fromRGChannelSet} -\title{create a SampleSet from RGChannelSet from minfi package} +\title{Creates an object of class SampleSet from a RGChannelSet {minfi}} \usage{ fromRGChannelSet(myRGChannelSet) } \arguments{ \item{myRGChannelSet}{: RGChannelSet, from minfi package, should contain a -cell_type vector in it s phenotypes data pData} +cell_type vector in pData} } \value{ -a SampleSet object +a class 'SampleSet' object } \description{ -create a SampleSet from RGChannelSet from minfi package +Creates a object of class SampleSet from the raw unprocessed data in RGChannelSet } \examples{ require(minfiData) diff --git a/man/funtooNorm.Rd b/man/funtooNorm.Rd index 8bfe3cd..afcf129 100644 --- a/man/funtooNorm.Rd +++ b/man/funtooNorm.Rd @@ -2,39 +2,31 @@ % Please edit documentation in R/SampleSet.R \name{funtooNorm} \alias{funtooNorm} -\title{This function applies the normalization method central to the package -to each signal. -The chrY have a deserve a specific treatment, men are asses using the median -beta estimation on the raw data positions with a cutoff at 60 percent. -We perform on the mens a quantile normalization and we do not change the -women values} +\title{This is the main normalization function which applies to autosomes and the X chromosome. +Chromosome Y requires separate analysis as there are few probes on Y. +We use a straightforward quantile normalization applied to males only.} \usage{ funtooNorm(object, type.fits = "PCR", ncmp = 4, force = FALSE, sex = NULL) } \arguments{ -\item{object}{of type SampleSet} +\item{object}{of class SampleSet} \item{type.fits}{can be "PCR" or "PLS" (default "PCR")} \item{ncmp}{number of components used in the analysis (default 4)} -\item{force}{set it to TRUE in order to re-compute the normalisation whent -it is already done} +\item{force}{when set to TRUE forces the normalization procedure to re-compute} -\item{sex}{boolean vector: when not null force the chrY normalization to use -treat the TRUE values as mens} +\item{sex}{boolean vector: if NULL Beta values from ChrY are used for classification.} } \value{ -a SampleSet containing the normalised signal +a S4 object of class SampleSet containing the normalized signal } \description{ -This function applies the normalization method central to the package -to each signal. -The chrY have a deserve a specific treatment, men are asses using the median -beta estimation on the raw data positions with a cutoff at 60 percent. -We perform on the mens a quantile normalization and we do not change the -women values +This is the main normalization function which applies to autosomes and the X chromosome. +Chromosome Y requires separate analysis as there are few probes on Y. +We use a straightforward quantile normalization applied to males only. } \examples{ require(minfiData) diff --git a/man/getNormBeta.Rd b/man/getNormBeta.Rd index 31b3bc4..7d1306e 100644 --- a/man/getNormBeta.Rd +++ b/man/getNormBeta.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/SampleSet.R \name{getNormBeta} \alias{getNormBeta} -\title{compute the beta value after normalization for each position and each +\title{Computes the beta value after normalization at each position for each sample} \usage{ getNormBeta(object, offset = 100) @@ -17,7 +17,7 @@ a matrix containing beta after normalization value for each CpG position and each samples } \description{ -compute the beta value after normalization for each position and each +Computes the beta value after normalization at each position for each sample } \examples{ diff --git a/man/getNormM.Rd b/man/getNormM.Rd index ddb4aab..4ff2f2e 100644 --- a/man/getNormM.Rd +++ b/man/getNormM.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/SampleSet.R \name{getNormM} \alias{getNormM} -\title{compute the M value after normalization for each position and each +\title{Computes the M value after normalization at each position for each sample} \usage{ -getNormM(object, offset = 100) +getNormM(object) } \arguments{ \item{object}{of type SampleSet} @@ -13,11 +13,11 @@ getNormM(object, offset = 100) \item{offset}{default is 100 as Illumina standard} } \value{ -a matrix containing M after normalization value for each position -and each samples log2(Meth/Unmeth) +a matrix containing M values, log2(Meth/Unmeth), after normalization at each position +for each samples. } \description{ -compute the M value after normalization for each position and each +Computes the M value after normalization at each position for each sample } \examples{ diff --git a/man/getRawBeta.Rd b/man/getRawBeta.Rd index b3b07c1..b6caafb 100644 --- a/man/getRawBeta.Rd +++ b/man/getRawBeta.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/SampleSet.R \name{getRawBeta} \alias{getRawBeta} -\title{compute the beta value of the raw signal for each position and each +\title{Computes the beta value from the raw signal at each position for each sample} \usage{ getRawBeta(object, offset = 100) @@ -17,7 +17,7 @@ a matrix containing the raw beta value for each position and each samples } \description{ -compute the beta value of the raw signal for each position and each +Computes the beta value from the raw signal at each position for each sample } \examples{ diff --git a/man/getSnpM.Rd b/man/getSnpM.Rd index 958b0de..834f51a 100644 --- a/man/getSnpM.Rd +++ b/man/getSnpM.Rd @@ -2,21 +2,18 @@ % Please edit documentation in R/SampleSet.R \name{getSnpM} \alias{getSnpM} -\title{compute the M value after normalization for each SNP position and -each sample} +\title{Computes the M value after normalization for each SNP.} \usage{ getSnpM(object) } \arguments{ -\item{object}{of type SampleSet} +\item{object}{of class SampleSet} } \value{ -a matrix containing M after normalization value for each SNP of the -chip and each sample log2(Meth/Unmeth) +a matrix containing M values, log2(Meth/Unmeth), after normalization for each SNP } \description{ -compute the M value after normalization for each SNP position and -each sample +Computes the M value after normalization for each SNP. } \examples{ require(minfiData) diff --git a/man/plotValidationGraph.Rd b/man/plotValidationGraph.Rd index a333e61..126f35e 100644 --- a/man/plotValidationGraph.Rd +++ b/man/plotValidationGraph.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/SampleSet.R \name{plotValidationGraph} \alias{plotValidationGraph} -\title{Plot a series of graphs with different numbers of components for -each signal} +\title{Plot a series of graphs for each signal type, illustrating the effects + of the number of components included} \usage{ -plotValidationGraph(object, type.fits = "PCR", file = "") +plotValidationGraph(object, type.fits = "PCR", file = NULL) } \arguments{ -\item{object}{of type SampleSet} +\item{object}{of class SampleSet} \item{type.fits}{can be "PCR" or "PLS" (default "PCR")} -\item{file}{if not empty will write a pdf using this name, path can be +\item{file}{if not NULL will write a pdf using this name, path can be included} } \description{ -Plot a series of graphs with different numbers of components for -each signal +Plot a series of graphs for each signal type, illustrating the effects + of the number of components included } \examples{ require(minfiData) diff --git a/man/print.SampleSet.Rd b/man/show-SampleSet-method.Rd similarity index 54% rename from man/print.SampleSet.Rd rename to man/show-SampleSet-method.Rd index 98f6cad..45f3ac6 100644 --- a/man/print.SampleSet.Rd +++ b/man/show-SampleSet-method.Rd @@ -1,18 +1,19 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/SampleSet.R -\name{print.SampleSet} -\alias{print.SampleSet} -\title{Print information about the SampleSet} +\docType{methods} +\name{show,SampleSet-method} +\alias{show,SampleSet-method} +\title{Show Object SampleSet} \usage{ -\method{print}{SampleSet}(x, ...) +\S4method{show}{SampleSet}(object) } \arguments{ \item{x}{an object of class SampleSet} -\item{...}{further arguments passed to or from other methods.} +\item{...}{optional arguments passed to or from other methods.} } \description{ -Print information about the SampleSet +Display informations about the SampleSet object } \examples{ require(minfiData) diff --git a/vignettes/funtooNorm.Rmd b/vignettes/funtooNorm.Rmd index d1d6019..38ec615 100644 --- a/vignettes/funtooNorm.Rmd +++ b/vignettes/funtooNorm.Rmd @@ -25,7 +25,7 @@ knitr::opts_chunk$set(tidy = FALSE, ``` ```{r library, echo=FALSE, results='hide', message=FALSE} -library(minfi) +require(minfi) ``` #Introduction @@ -48,7 +48,7 @@ Note that the current version of the package does not do a good job of normalizi different colors provide the methylated and the unmethylated measurements. ``` -Therefore, we dissociate the 6 types of signals in our method : +Therefore, we separate the 6 types of signals in our method : _AIGrn_, _BIGrn_, _AIRed_, _BIRed_, _AII_ and _BII_, where the __A__ (methylated) and __B__ (unmethylated) are on __Green__ or __Red__ channel depending on the 3 types : __Type I Red__, __Type I Green__ or