Skip to content

Commit

Permalink
Revert "Clenaups for LinearModelAnalyzer"
Browse files Browse the repository at this point in the history
This reverts commit 7935d5d.
  • Loading branch information
arteymix committed Jun 7, 2024
1 parent 1962417 commit e6124b7
Show file tree
Hide file tree
Showing 25 changed files with 1,772 additions and 1,603 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* The Gemma project
*
* Copyright (c) 2006 University of British Columbia
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package ubic.gemma.core.analysis.expression.diff;

import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.analysis.service.ExpressionDataMatrixService;
import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService;

/**
* Analyzer base class.
*
* @author keshav
*/
public abstract class AbstractAnalyzer {

@Autowired
protected ExpressionDataMatrixService expressionDataMatrixService = null;

@Autowired
protected CompositeSequenceService compositeSequenceService;

@SuppressWarnings("unused") // needed for tests.
public void setExpressionDataMatrixService( ExpressionDataMatrixService expressionDataMatrixService ) {
this.expressionDataMatrixService = expressionDataMatrixService;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* The Gemma project
*
* Copyright (c) 2006-2010 University of British Columbia
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package ubic.gemma.core.analysis.expression.diff;

import cern.colt.list.DoubleArrayList;
import cern.colt.matrix.DoubleMatrix1D;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix1D;
import ubic.basecode.math.MultipleTestCorrection;
import ubic.basecode.math.Rank;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

import javax.annotation.Nullable;
import java.util.Collection;

/**
* An abstract differential expression analyzer to be extended
*
* @author keshav
*/
public abstract class AbstractDifferentialExpressionAnalyzer extends AbstractAnalyzer implements DiffExAnalyzer {

private final Log log = LogFactory.getLog( this.getClass() );

@Override
public abstract Collection<DifferentialExpressionAnalysis> run( ExpressionExperiment expressionExperiment,
DifferentialExpressionAnalysisConfig config );

@Override
public abstract Collection<DifferentialExpressionAnalysis> run( ExpressionExperiment expressionExperiment,
ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config );

/**
* @param pvalues pvalues
* @return normalized ranks of the pvalues, or null if they were invalid/unusable.
*/
double[] computeRanks( double[] pvalues ) {
if ( pvalues == null ) {
log.error( "Null pvalues" );
return null;
}
if ( pvalues.length == 0 ) {
log.error( "Empty pvalues array" );
return null;
}

DoubleArrayList ranks = Rank.rankTransform( new DoubleArrayList( pvalues ) );

if ( ranks == null ) {
log.error( "Pvalue ranks could not be computed" );
return null;
}

double[] normalizedRanks = new double[ranks.size()];
for ( int i = 0; i < ranks.size(); i++ ) {
normalizedRanks[i] = ranks.get( i ) / ranks.size();
}
return normalizedRanks;
}

/**
* @param pvalues pvalues
* @return Qvalues, or null if they could not be computed.
*/
@Nullable
double[] benjaminiHochberg( Double[] pvalues ) {
DoubleMatrix1D benjaminiHochberg = MultipleTestCorrection
.benjaminiHochberg( new DenseDoubleMatrix1D( ArrayUtils.toPrimitive( pvalues ) ) );
return benjaminiHochberg != null ? benjaminiHochberg.toArray() : null;
}

DifferentialExpressionAnalysis initAnalysisEntity( BioAssaySet bioAssaySet,
DifferentialExpressionAnalysisConfig config ) {

if ( config == null ) {
config = new DifferentialExpressionAnalysisConfig();
}
DifferentialExpressionAnalysis expressionAnalysis = config.toAnalysis();
expressionAnalysis.setExperimentAnalyzed( bioAssaySet );
return expressionAnalysis;
}

/**
* Needed to convert NaN or infinity values to a value we can store in the database.
*
* @param e e
* @return converted
*/
Double nan2Null( Double e ) {
boolean isNaN = ( e == null || Double.isNaN( e ) || e == Double.NEGATIVE_INFINITY
|| e == Double.POSITIVE_INFINITY );
if ( isNaN ) {
return null;
}
return e;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/
package ubic.gemma.core.analysis.expression.diff;

import org.springframework.context.ApplicationContextAware;
import ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalyzerServiceImpl.AnalysisType;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.expression.experiment.BioAssaySet;
Expand All @@ -27,7 +28,7 @@
* @author paul
*/
@SuppressWarnings({ "unused", "WeakerAccess" }) // Possible external use
public interface AnalysisSelectionAndExecutionService {
public interface AnalysisSelectionAndExecutionService extends ApplicationContextAware {

Collection<DifferentialExpressionAnalysis> analyze( ExpressionExperiment expressionExperiment,
DifferentialExpressionAnalysisConfig config );
Expand All @@ -46,6 +47,11 @@ Collection<DifferentialExpressionAnalysis> analyze( ExpressionExperiment express
AnalysisType determineAnalysis( BioAssaySet bioAssaySet, Collection<ExperimentalFactor> experimentalFactors,
ExperimentalFactor subsetFactor, boolean includeInteractions );

/**
* @return a new instance of a linear model analyzer.
*/
DiffExAnalyzer getAnalyzer();

DifferentialExpressionAnalysis analyze( ExpressionExperimentSubSet subset,
DifferentialExpressionAnalysisConfig config );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Component;
import ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalyzerServiceImpl.AnalysisType;
import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationServiceImpl;
Expand All @@ -45,8 +46,10 @@ public class AnalysisSelectionAndExecutionServiceImpl implements AnalysisSelecti

private static final Log log = LogFactory.getLog( AnalysisSelectionAndExecutionServiceImpl.class );

@Autowired
private LinearModelAnalyzer linearModelAnalyzer;
/*
* We are context-aware so we can get prototype beans.
*/
private ApplicationContext applicationContext;

@Override
public Collection<DifferentialExpressionAnalysis> analyze( ExpressionExperiment expressionExperiment,
Expand All @@ -57,11 +60,11 @@ public Collection<DifferentialExpressionAnalysis> analyze( ExpressionExperiment
throw new RuntimeException( "Could not locate an appropriate analyzer" );
}

return linearModelAnalyzer.run( expressionExperiment, config );
return this.applicationContext.getBean( DiffExAnalyzer.class ).run( expressionExperiment, config );
}

/**
* FIXME this should probably deal with the case of outliers and also the {@link LinearModelAnalyzerImpl}'s
* FIXME this should probably deal with the case of outliers and also the {@link LinearModelAnalyzer}'s
* EXCLUDE_CHARACTERISTICS_VALUES
*
* @return selected type of analysis such as t-test, two-way ANOVA, etc.
Expand Down Expand Up @@ -153,7 +156,7 @@ public AnalysisType determineAnalysis( BioAssaySet bioAssaySet, DifferentialExpr
}

/**
* FIXME this should probably deal with the case of outliers and also the {@link LinearModelAnalyzerImpl}'s
* FIXME this should probably deal with the case of outliers and also the {@link LinearModelAnalyzer}'s
* EXCLUDE_CHARACTERISTICS_VALUES
*
* @return AnalysisType
Expand Down Expand Up @@ -270,6 +273,11 @@ public AnalysisType determineAnalysis( BioAssaySet bioAssaySet, Collection<Exper
}
}

@Override
public DiffExAnalyzer getAnalyzer() {
return this.applicationContext.getBean( DiffExAnalyzer.class );
}

@Override
public DifferentialExpressionAnalysis analyze( ExpressionExperimentSubSet subset,
DifferentialExpressionAnalysisConfig config ) {
Expand All @@ -279,7 +287,12 @@ public DifferentialExpressionAnalysis analyze( ExpressionExperimentSubSet subset
throw new RuntimeException( "Could not locate an appropriate analyzer" );
}

return linearModelAnalyzer.run( subset, config );
return this.applicationContext.getBean( DiffExAnalyzer.class ).run( subset, config );
}

@Override
public void setApplicationContext( ApplicationContext applicationContext ) throws BeansException {
this.applicationContext = applicationContext;
}

private Collection<ExperimentalFactor> getFactorsToUse( BioAssaySet bioAssaySet,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* The Gemma project
*
* Copyright (c) 2012 University of British Columbia
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package ubic.gemma.core.analysis.expression.diff;

import ubic.gemma.core.analysis.service.ExpressionDataMatrixService;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult;
import ubic.gemma.model.analysis.expression.diff.HitListSize;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet;
import ubic.gemma.model.genome.Gene;

import java.util.Collection;
import java.util.Map;
import java.util.Set;

/**
* @author paul
*/
@SuppressWarnings({ "unused", "WeakerAccess" }) // Possible external use
public interface DiffExAnalyzer {

ExperimentalFactor determineInterceptFactor( Collection<ExperimentalFactor> factors,
QuantitationType quantitationType );

/**
* @param expressionExperiment the experiment
* @param config config
* @return analyses. There will be more than one if a subset factor is defined.
*/
Collection<DifferentialExpressionAnalysis> run( ExpressionExperiment expressionExperiment,
DifferentialExpressionAnalysisConfig config );

/***
* Allows entry of modified data matrices into the workflow.
* @param config config
* @param expressionExperiment the experiment
* @param dmatrix D matrix
* @return analyses
*/
Collection<DifferentialExpressionAnalysis> run( ExpressionExperiment expressionExperiment,
ExpressionDataDoubleMatrix dmatrix, DifferentialExpressionAnalysisConfig config );

/**
* Generate HitListSize entities that will be stored to count the number of diff. ex probes at various preset
* thresholds, to avoid wasting time generating these counts on the fly later. This is done automatically during
* analysis, so is just here to allow 'backfilling'.
*
* @param probeToGeneMap map
* @param results results
* @return hit list sizes
*/
Set<HitListSize> computeHitListSizes( Collection<DifferentialExpressionAnalysisResult> results,
Map<CompositeSequence, Collection<Gene>> probeToGeneMap );

/**
* Utility method
*
* @param probeToGeneMap map
* @param resultList result list
* @return number of genes tested
*/
int getNumberOfGenesTested( Collection<DifferentialExpressionAnalysisResult> resultList,
Map<CompositeSequence, Collection<Gene>> probeToGeneMap );

/**
* this is needed so we can alter this in tests
*
* @param expressionDataMatrixService EE data matrix service
*/
void setExpressionDataMatrixService( ExpressionDataMatrixService expressionDataMatrixService );

/**
* Note that normally when we run a subset analysis, the subsetting is done internally, so we pass in the expression
* experiment, not the subset. This method is used for exceptions to that.
*
* @param subset subset
* @param config config
* @return analysis
*/
DifferentialExpressionAnalysis run( ExpressionExperimentSubSet subset,
DifferentialExpressionAnalysisConfig config );

}
Loading

0 comments on commit e6124b7

Please sign in to comment.