Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store additional metadata for ExpressionExperiment and BioAssay in the database #668

Draft
wants to merge 1 commit into
base: development
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;

/**
* Designed to add count and/or RPKM data to a data set that has only meta-data.
Expand All @@ -50,6 +52,7 @@ public class RNASeqDataAddCli extends ExpressionExperimentManipulatingCLI {
private Integer readLength = null;
private String rpkmFile = null;
private boolean justbackfillLog2cpm = false;
private File[] additionalMetadata;

@Override
public CommandGroup getCommandGroup() {
Expand All @@ -70,6 +73,10 @@ protected void buildOptions( Options options ) {

options.addOption( "log2cpm", "Just compute log2cpm from the existing stored count data (backfill); batchmode OK, no other options needed" );

options.addOption( Option.builder( "am" )
.longOpt( "additional-metadata" )
.type( File.class )
.build() );
}

@Override
Expand Down Expand Up @@ -192,6 +199,8 @@ protected void doWork() throws Exception {
serv.addCountData( ee, targetArrayDesign, countMatrix, rpkmMatrix, readLength, isPairedReads,
allowMissingSamples );

serv.addAdditionalMetadata( ee, additionalMetadata, Collections.emptyMap() );

} catch ( IOException e ) {
throw new Exception( "Failed while processing " + ee, e );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

import java.io.File;
import java.io.IOException;
import java.util.Map;

public interface DataUpdater {
void addAffyDataFromAPTOutput( ExpressionExperiment ee, String pathToAptOutputFile ) throws IOException;
Expand All @@ -26,4 +29,6 @@ void replaceData( ExpressionExperiment ee, ArrayDesign targetPlatform, Quantitat

ExpressionExperiment replaceData( ExpressionExperiment ee, ArrayDesign targetPlatform,
ExpressionDataDoubleMatrix data );

void addAdditionalMetadata( ExpressionExperiment ee, File[] additionalMetadata, Map<BioAssay, File[]> additionalMetadataPerBioAssay );
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
Expand Down Expand Up @@ -49,6 +50,7 @@
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.MetadataType;
import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService;
import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
Expand All @@ -61,6 +63,7 @@
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import ubic.gemma.persistence.util.EntityUtils;

import java.io.File;
import java.io.IOException;
import java.util.*;

Expand Down Expand Up @@ -189,9 +192,10 @@ public void addAffyDataFromAPTOutput( ExpressionExperiment ee, String pathToAptO
* switched to use it.
* @param countMatrix Representing 'raw' counts (added after rpkm, if provided).
* @param rpkmMatrix Representing per-gene normalized data, optional (RPKM or FPKM)
* @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
* @param isPairedReads is paired reads
* @param readLength read length
* @param isPairedReads is paired reads
* @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
* @param additionalMetadata
*/
@Override
@Transactional(propagation = Propagation.NEVER)
Expand Down Expand Up @@ -280,7 +284,6 @@ public void addCountData( ExpressionExperiment ee, ArrayDesign targetArrayDesign

this.addData( ee, targetArrayDesign, rpkmEEMatrix );
}

}

/**
Expand Down Expand Up @@ -545,11 +548,11 @@ public void reprocessAffyDataFromCel( ExpressionExperiment ee ) {
* selected experiment. Will do postprocessing if the data quantitationType is 'preferred', but if there is already
* a preferred quantitation type, an error will be thrown.
*
* @param ee ee
* @param targetPlatform optional; if null, uses the platform already used (if there is just one; you can't use
* this
* for a multi-platform dataset)
* @param data to slot in
* @param ee ee
* @param targetPlatform optional; if null, uses the platform already used (if there is just one; you can't use
* this
* for a multi-platform dataset)
* @param data to slot in
* @return ee
*/
@Override
Expand Down Expand Up @@ -673,6 +676,19 @@ public ExpressionExperiment replaceData( ExpressionExperiment ee, ArrayDesign ta
return ee;
}

@Override
@Transactional(propagation = Propagation.NEVER)
public void addAdditionalMetadata( ExpressionExperiment ee, File[] additionalMetadata, Map<BioAssay, File[]> additionalMetadataPerBioAssay ) {
for ( File am : additionalMetadata ) {
experimentService.addAdditionalMetadata( ee, MetadataType.PREPROCESSING, am, MediaType.TEXT_PLAIN_VALUE );
}
for ( Map.Entry<BioAssay, File[]> e : additionalMetadataPerBioAssay.entrySet() ) {
for ( File am : e.getValue() ) {
experimentService.addAdditionalMetadata( ee, e.getKey(), MetadataType.PREPROCESSING, am, MediaType.TEXT_PLAIN_VALUE );
}
}
}

/**
* RNA-seq
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package ubic.gemma.model.expression;

import lombok.Data;
import lombok.EqualsAndHashCode;
import ubic.gemma.model.common.Describable;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.MetadataType;

import java.sql.Blob;

/**
* Metadata associated to an {@link ExpressionExperiment} or {@link BioAssay}.
* @author poirigui
*/
@Data
@EqualsAndHashCode(of = { "id" })
public class AdditionalMetadata implements Describable {

private Long id;
private String name;
private String description;
private MetadataType type;
private Blob contents;
private String mediaType;
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.AdditionalMetadata;

import javax.persistence.Transient;
import java.io.Serializable;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;

/**
* Represents the bringing together of a biomaterial with an assay of some sort (typically an expression assay). We
Expand Down Expand Up @@ -60,6 +63,8 @@ public class BioAssay extends AbstractDescribable implements gemma.gsec.model.Se
*/
private String fastqHeaders;

private Set<AdditionalMetadata> additionalMetadata = new HashSet<>();

@Override
public int hashCode() {
int hashCode;
Expand Down Expand Up @@ -215,6 +220,14 @@ public void setFastqHeaders( String fastqHeaders ) {
this.fastqHeaders = fastqHeaders;
}

public Set<AdditionalMetadata> getAdditionalMetadata() {
return additionalMetadata;
}

public void setAdditionalMetadata( Set<AdditionalMetadata> additionalMetadata ) {
this.additionalMetadata = additionalMetadata;
}

public static final class Factory {

public static BioAssay newInstance() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
*/
package ubic.gemma.model.expression.experiment;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

Expand All @@ -26,6 +25,7 @@
import ubic.gemma.model.common.auditAndSecurity.curation.CurationDetails;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.AdditionalMetadata;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
Expand Down Expand Up @@ -81,6 +81,11 @@ public void setNumberOfSamples( Integer numberofSamples ) {

private Set<Characteristic> allCharacteristics;

/**
* A collection of additional metadata blobs.
*/
private Set<AdditionalMetadata> additionalMetadata = new HashSet<>();

@Override
public ExpressionExperimentValueObject createValueObject() {
return new ExpressionExperimentValueObject( this );
Expand Down Expand Up @@ -280,6 +285,14 @@ public void setTaxon( Taxon taxon ) {
this.taxon = taxon;
}

public Set<AdditionalMetadata> getAdditionalMetadata() {
return additionalMetadata;
}

public void setAdditionalMetadata( Set<AdditionalMetadata> additionalMetadata ) {
this.additionalMetadata = additionalMetadata;
}

@Override
public String toString() {
return super.toString() + ( shortName != null ? " Short Name=" + shortName : "" );
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ubic.gemma.model.expression.experiment;

import ubic.gemma.model.expression.bioAssay.BioAssay;

public enum MetadataType {
/**
* A sequencing QC report.
* <p>
* Example: a FastQC report attached to a specific {@link BioAssay}.
*/
SEQUENCING_QC_REPORT,
/**
* A sequencing alignment report.
* <p>
* Example: STAR's Log.final.out file on a {@link BioAssay}
*/
SEQUENCING_ALIGNMENT_REPORT,
/**
* An overall sequencing report.
* <p>
* Example: a MultiQC report on a {@link ExpressionExperiment}
*/
SEQUENCING_OVERALL_REPORT,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ppavlidis I'm looking for some feedback for possible values to put here. If there are things that might make sense to include for microarray platforms like the output of APT tools, etc.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is already some infrastructure to support this (surfaced on diagnostics tab) but it is a low priority now

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I reviewed it this morning and I synchronized all the MultiQC reports we have generated from the RNA-Seq pipeline.

I also finished adding a new argument to rnaseqDataAdd to take care of copying over the report in Gemma data directory so that it can be integrated fully in the pipeline.

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.AdditionalMetadata;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
Expand All @@ -18,11 +19,13 @@
import ubic.gemma.persistence.service.BrowsingDao;
import ubic.gemma.persistence.service.FilteringVoEnabledDao;
import ubic.gemma.persistence.service.common.auditAndSecurity.curation.CuratableDao;
import ubic.gemma.persistence.service.expression.bioAssay.BioAssayDao;
import ubic.gemma.persistence.util.Filters;
import ubic.gemma.persistence.util.Slice;
import ubic.gemma.persistence.util.Sort;

import javax.annotation.Nullable;
import java.io.InputStream;
import java.util.Collection;
import java.util.Date;
import java.util.List;
Expand Down Expand Up @@ -232,4 +235,17 @@ Map<ExpressionExperiment, Collection<AuditEvent>> getSampleRemovalEvents(
long countTroubledPlatforms( ExpressionExperiment ee );

MeanVarianceRelation updateMeanVarianceRelation( ExpressionExperiment ee, MeanVarianceRelation mvr );

/**
* Add metadata on a given dataset.
*/
AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, MetadataType type, InputStream additionalMetadata, long length, String mediaType );

/**
* Add metadata on a specific bioassay.
* <p>
* FIXME: this should probably be relocated in {@link BioAssayDao}.
* @throws IllegalArgumentException if the bioassay does not belong to the expression experiment
*/
AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, BioAssay sample, MetadataType metadataType, InputStream stream, long length, String mediaType ) throws IllegalArgumentException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.AdditionalMetadata;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject;
import ubic.gemma.model.expression.bioAssay.BioAssay;
Expand All @@ -60,8 +61,8 @@
import ubic.gemma.persistence.util.*;

import javax.annotation.Nullable;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.groupingBy;
Expand Down Expand Up @@ -430,7 +431,7 @@ public Collection<ExpressionExperiment> findByTaxon( Taxon taxon ) {
//language=HQL
// final String queryString =
// "select distinct ee from ExpressionExperiment as ee " + "inner join ee.bioAssays as ba "
// + "inner join ba.sampleUsed as sample where sample.sourceTaxon = :taxon ";
// + "inner join ba.sampleUsed as bioAssay where bioAssay.sourceTaxon = :taxon ";
final String queryString = "select ee from ExpressionExperiment as ee where ee.taxon = (:taxon)";

//noinspection unchecked
Expand Down Expand Up @@ -701,6 +702,32 @@ public MeanVarianceRelation updateMeanVarianceRelation( ExpressionExperiment ee,
return mvr;
}

@Override
public AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, MetadataType type, InputStream stream, long length, String mediaType ) {
AdditionalMetadata am = createAdditionalMetadata( type, stream, length );
ee.getAdditionalMetadata().add( am );
return am;
}

@Override
public AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, BioAssay bioAssay, MetadataType type, InputStream stream, long length, String mediaType ) throws IllegalArgumentException {
if ( ee.getBioAssays().contains( bioAssay ) ) {
throw new IllegalArgumentException( String.format( "%s is not part of %s", bioAssay, ee ) );
}
AdditionalMetadata am = createAdditionalMetadata( type, stream, length );
bioAssay.getAdditionalMetadata().add( am );
return am;
}

private AdditionalMetadata createAdditionalMetadata( MetadataType type, InputStream stream, long length ) {
AdditionalMetadata meta = new AdditionalMetadata();
meta.setType( type );
meta.setContents( getSessionFactory().getCurrentSession().getLobHelper().createBlob( stream, length ) );
meta.setMediaType( "text/plain" );
getSessionFactory().getCurrentSession().persist( meta );
return meta;
}

@Override
public Collection<ArrayDesign> getArrayDesignsUsed( BioAssaySet bas ) {

Expand Down Expand Up @@ -1701,6 +1728,7 @@ protected void configureFilterableProperties( FilterablePropertiesConfigurer con
configurer.unregisterProperty( "source" );
configurer.unregisterProperty( "otherParts.size" );
configurer.unregisterProperty( "otherRelevantPublications.size" );
configurer.unregisterProperty( "additionalMetadata.size" );

configurer.unregisterProperties( p -> p.endsWith( "externalDatabases.size" ) );

Expand Down
Loading