Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
Metadata can be attached at both ExpressionExperiment and BioAssay
levels.
  • Loading branch information
arteymix committed Apr 27, 2023
1 parent b2f9237 commit 026e3c8
Show file tree
Hide file tree
Showing 17 changed files with 278 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;

/**
* Designed to add count and/or RPKM data to a data set that has only meta-data.
Expand All @@ -50,6 +52,7 @@ public class RNASeqDataAddCli extends ExpressionExperimentManipulatingCLI {
private Integer readLength = null;
private String rpkmFile = null;
private boolean justbackfillLog2cpm = false;
private File[] additionalMetadata;

@Override
public CommandGroup getCommandGroup() {
Expand All @@ -70,6 +73,10 @@ protected void buildOptions( Options options ) {

options.addOption( "log2cpm", "Just compute log2cpm from the existing stored count data (backfill); batchmode OK, no other options needed" );

options.addOption( Option.builder( "am" )
.longOpt( "additional-metadata" )
.type( File.class )
.build() );
}

@Override
Expand Down Expand Up @@ -192,6 +199,8 @@ protected void doWork() throws Exception {
serv.addCountData( ee, targetArrayDesign, countMatrix, rpkmMatrix, readLength, isPairedReads,
allowMissingSamples );

serv.addAdditionalMetadata( ee, additionalMetadata, Collections.emptyMap() );

} catch ( IOException e ) {
throw new Exception( "Failed while processing " + ee, e );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

import java.io.File;
import java.io.IOException;
import java.util.Map;

public interface DataUpdater {
void addAffyDataFromAPTOutput( ExpressionExperiment ee, String pathToAptOutputFile ) throws IOException;
Expand All @@ -26,4 +29,6 @@ void replaceData( ExpressionExperiment ee, ArrayDesign targetPlatform, Quantitat

ExpressionExperiment replaceData( ExpressionExperiment ee, ArrayDesign targetPlatform,
ExpressionDataDoubleMatrix data );

void addAdditionalMetadata( ExpressionExperiment ee, File[] additionalMetadata, Map<BioAssay, File[]> additionalMetadataPerBioAssay );
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
Expand Down Expand Up @@ -49,6 +50,7 @@
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.MetadataType;
import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService;
import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
Expand All @@ -61,6 +63,7 @@
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import ubic.gemma.persistence.util.EntityUtils;

import java.io.File;
import java.io.IOException;
import java.util.*;

Expand Down Expand Up @@ -189,9 +192,10 @@ public void addAffyDataFromAPTOutput( ExpressionExperiment ee, String pathToAptO
* switched to use it.
* @param countMatrix Representing 'raw' counts (added after rpkm, if provided).
* @param rpkmMatrix Representing per-gene normalized data, optional (RPKM or FPKM)
* @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
* @param isPairedReads is paired reads
* @param readLength read length
* @param isPairedReads is paired reads
* @param allowMissingSamples if true, samples that are missing data will be deleted from the experiment.
* @param additionalMetadata
*/
@Override
@Transactional(propagation = Propagation.NEVER)
Expand Down Expand Up @@ -280,7 +284,6 @@ public void addCountData( ExpressionExperiment ee, ArrayDesign targetArrayDesign

this.addData( ee, targetArrayDesign, rpkmEEMatrix );
}

}

/**
Expand Down Expand Up @@ -545,11 +548,11 @@ public void reprocessAffyDataFromCel( ExpressionExperiment ee ) {
* selected experiment. Will do postprocessing if the data quantitationType is 'preferred', but if there is already
* a preferred quantitation type, an error will be thrown.
*
* @param ee ee
* @param targetPlatform optional; if null, uses the platform already used (if there is just one; you can't use
* this
* for a multi-platform dataset)
* @param data to slot in
* @param ee ee
* @param targetPlatform optional; if null, uses the platform already used (if there is just one; you can't use
* this
* for a multi-platform dataset)
* @param data to slot in
* @return ee
*/
@Override
Expand Down Expand Up @@ -673,6 +676,19 @@ public ExpressionExperiment replaceData( ExpressionExperiment ee, ArrayDesign ta
return ee;
}

@Override
@Transactional(propagation = Propagation.NEVER)
public void addAdditionalMetadata( ExpressionExperiment ee, File[] additionalMetadata, Map<BioAssay, File[]> additionalMetadataPerBioAssay ) {
for ( File am : additionalMetadata ) {
experimentService.addAdditionalMetadata( ee, MetadataType.PREPROCESSING, am, MediaType.TEXT_PLAIN_VALUE );
}
for ( Map.Entry<BioAssay, File[]> e : additionalMetadataPerBioAssay.entrySet() ) {
for ( File am : e.getValue() ) {
experimentService.addAdditionalMetadata( ee, e.getKey(), MetadataType.PREPROCESSING, am, MediaType.TEXT_PLAIN_VALUE );
}
}
}

/**
* RNA-seq
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package ubic.gemma.model.expression;

import lombok.Data;
import lombok.EqualsAndHashCode;
import ubic.gemma.model.common.Describable;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.MetadataType;

import java.sql.Blob;

/**
* Metadata associated to an {@link ExpressionExperiment} or {@link BioAssay}.
* @author poirigui
*/
@Data
@EqualsAndHashCode(of = { "id" })
public class AdditionalMetadata implements Describable {

private Long id;
private String name;
private String description;
private MetadataType type;
private Blob contents;
private String mediaType;
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.AdditionalMetadata;

import javax.persistence.Transient;
import java.io.Serializable;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;

/**
* Represents the bringing together of a biomaterial with an assay of some sort (typically an expression assay). We
Expand Down Expand Up @@ -60,6 +63,8 @@ public class BioAssay extends AbstractDescribable implements gemma.gsec.model.Se
*/
private String fastqHeaders;

private Set<AdditionalMetadata> additionalMetadata = new HashSet<>();

@Override
public int hashCode() {
int hashCode;
Expand Down Expand Up @@ -215,6 +220,14 @@ public void setFastqHeaders( String fastqHeaders ) {
this.fastqHeaders = fastqHeaders;
}

public Set<AdditionalMetadata> getAdditionalMetadata() {
return additionalMetadata;
}

public void setAdditionalMetadata( Set<AdditionalMetadata> additionalMetadata ) {
this.additionalMetadata = additionalMetadata;
}

public static final class Factory {

public static BioAssay newInstance() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
*/
package ubic.gemma.model.expression.experiment;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;

Expand All @@ -26,6 +25,7 @@
import ubic.gemma.model.common.auditAndSecurity.curation.CurationDetails;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.AdditionalMetadata;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
Expand Down Expand Up @@ -81,6 +81,11 @@ public void setNumberOfSamples( Integer numberofSamples ) {

private Set<Characteristic> allCharacteristics;

/**
* A collection of additional metadata blobs.
*/
private Set<AdditionalMetadata> additionalMetadata = new HashSet<>();

@Override
public ExpressionExperimentValueObject createValueObject() {
return new ExpressionExperimentValueObject( this );
Expand Down Expand Up @@ -280,6 +285,14 @@ public void setTaxon( Taxon taxon ) {
this.taxon = taxon;
}

public Set<AdditionalMetadata> getAdditionalMetadata() {
return additionalMetadata;
}

public void setAdditionalMetadata( Set<AdditionalMetadata> additionalMetadata ) {
this.additionalMetadata = additionalMetadata;
}

@Override
public String toString() {
return super.toString() + ( shortName != null ? " Short Name=" + shortName : "" );
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ubic.gemma.model.expression.experiment;

import ubic.gemma.model.expression.bioAssay.BioAssay;

public enum MetadataType {
/**
* A sequencing QC report.
* <p>
* Example: a FastQC report attached to a specific {@link BioAssay}.
*/
SEQUENCING_QC_REPORT,
/**
* A sequencing alignment report.
* <p>
* Example: STAR's Log.final.out file on a {@link BioAssay}
*/
SEQUENCING_ALIGNMENT_REPORT,
/**
* An overall sequencing report.
* <p>
* Example: a MultiQC report on a {@link ExpressionExperiment}
*/
SEQUENCING_OVERALL_REPORT,
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.AdditionalMetadata;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
Expand All @@ -18,11 +19,13 @@
import ubic.gemma.persistence.service.BrowsingDao;
import ubic.gemma.persistence.service.FilteringVoEnabledDao;
import ubic.gemma.persistence.service.common.auditAndSecurity.curation.CuratableDao;
import ubic.gemma.persistence.service.expression.bioAssay.BioAssayDao;
import ubic.gemma.persistence.util.Filters;
import ubic.gemma.persistence.util.Slice;
import ubic.gemma.persistence.util.Sort;

import javax.annotation.Nullable;
import java.io.InputStream;
import java.util.Collection;
import java.util.Date;
import java.util.List;
Expand Down Expand Up @@ -232,4 +235,17 @@ Map<ExpressionExperiment, Collection<AuditEvent>> getSampleRemovalEvents(
long countTroubledPlatforms( ExpressionExperiment ee );

MeanVarianceRelation updateMeanVarianceRelation( ExpressionExperiment ee, MeanVarianceRelation mvr );

/**
* Add metadata on a given dataset.
*/
AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, MetadataType type, InputStream additionalMetadata, long length, String mediaType );

/**
* Add metadata on a specific bioassay.
* <p>
* FIXME: this should probably be relocated in {@link BioAssayDao}.
* @throws IllegalArgumentException if the bioassay does not belong to the expression experiment
*/
AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, BioAssay sample, MetadataType metadataType, InputStream stream, long length, String mediaType ) throws IllegalArgumentException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.AdditionalMetadata;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject;
import ubic.gemma.model.expression.bioAssay.BioAssay;
Expand All @@ -60,8 +61,8 @@
import ubic.gemma.persistence.util.*;

import javax.annotation.Nullable;
import java.io.InputStream;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.groupingBy;
Expand Down Expand Up @@ -430,7 +431,7 @@ public Collection<ExpressionExperiment> findByTaxon( Taxon taxon ) {
//language=HQL
// final String queryString =
// "select distinct ee from ExpressionExperiment as ee " + "inner join ee.bioAssays as ba "
// + "inner join ba.sampleUsed as sample where sample.sourceTaxon = :taxon ";
// + "inner join ba.sampleUsed as bioAssay where bioAssay.sourceTaxon = :taxon ";
final String queryString = "select ee from ExpressionExperiment as ee where ee.taxon = (:taxon)";

//noinspection unchecked
Expand Down Expand Up @@ -701,6 +702,32 @@ public MeanVarianceRelation updateMeanVarianceRelation( ExpressionExperiment ee,
return mvr;
}

@Override
public AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, MetadataType type, InputStream stream, long length, String mediaType ) {
AdditionalMetadata am = createAdditionalMetadata( type, stream, length );
ee.getAdditionalMetadata().add( am );
return am;
}

@Override
public AdditionalMetadata addAdditionalMetadata( ExpressionExperiment ee, BioAssay bioAssay, MetadataType type, InputStream stream, long length, String mediaType ) throws IllegalArgumentException {
if ( ee.getBioAssays().contains( bioAssay ) ) {
throw new IllegalArgumentException( String.format( "%s is not part of %s", bioAssay, ee ) );
}
AdditionalMetadata am = createAdditionalMetadata( type, stream, length );
bioAssay.getAdditionalMetadata().add( am );
return am;
}

private AdditionalMetadata createAdditionalMetadata( MetadataType type, InputStream stream, long length ) {
AdditionalMetadata meta = new AdditionalMetadata();
meta.setType( type );
meta.setContents( getSessionFactory().getCurrentSession().getLobHelper().createBlob( stream, length ) );
meta.setMediaType( "text/plain" );
getSessionFactory().getCurrentSession().persist( meta );
return meta;
}

@Override
public Collection<ArrayDesign> getArrayDesignsUsed( BioAssaySet bas ) {

Expand Down Expand Up @@ -1701,6 +1728,7 @@ protected void configureFilterableProperties( FilterablePropertiesConfigurer con
configurer.unregisterProperty( "source" );
configurer.unregisterProperty( "otherParts.size" );
configurer.unregisterProperty( "otherRelevantPublications.size" );
configurer.unregisterProperty( "additionalMetadata.size" );

configurer.unregisterProperties( p -> p.endsWith( "externalDatabases.size" ) );

Expand Down
Loading

0 comments on commit 026e3c8

Please sign in to comment.