Skip to content

Commit

Permalink
Eliminate offset/limit parameters and improve the code for retrieving…
Browse files Browse the repository at this point in the history
… analysis VOs

Remove unnecessary offset/limit parameters of getDatasetDifferentialExpressionAnalyses() (fix #841).

Add a parameter to include analyses of subsets.

Tidy up the DAO:

 - don't retrieve FVs for subsets if includeAnalysesOfSubsets is false
 - initialize proxies of both experiments and subsets

Include analyses of subsets in the output of the endpoint.

Add two new service methods to retrieve analysis VOs: one by a
collection of BioAssaySet and another one for a single BioAssaySet. Use
the latter for the REST API.

Rename sourceExperiment to sourceExperimentId and retain the older name
marked as deprecated (fix #844).
  • Loading branch information
arteymix committed Sep 15, 2023
1 parent 91758b9 commit 40dda98
Show file tree
Hide file tree
Showing 13 changed files with 138 additions and 120 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ private void getStats( ExpressionExperimentDetailsValueObject eeVo ) {
assert id != null;

Map<ExpressionExperimentDetailsValueObject, List<DifferentialExpressionAnalysisValueObject>> analysis = differentialExpressionAnalysisService
.getAnalysesByExperiment( Collections.singleton( id ) );
.getAnalysesByExperimentIds( Collections.singleton( id ), true );
if ( analysis != null && analysis.containsKey( eeVo ) ) {
eeVo.setDifferentialExpressionAnalyses( analysis.get( eeVo ) );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ private List<DiffExResultSetSummaryValueObject> addConditionsToSearchResultValue

// database hit: important that this be fast.
Map<ExpressionExperimentDetailsValueObject, List<DifferentialExpressionAnalysisValueObject>> analyses = differentialExpressionAnalysisService
.getAnalysesByExperiment( EntityUtils.getIds( experimentGroup ) );
.getAnalysesByExperimentIds( EntityUtils.getIds( experimentGroup ), true );

experiment:
for ( ExpressionExperimentDetailsValueObject bas : analyses.keySet() ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.*;
import lombok.Getter;
import lombok.Setter;
import org.hibernate.Hibernate;
import ubic.gemma.model.analysis.AnalysisValueObject;
import ubic.gemma.model.expression.experiment.ExperimentalFactorValueObject;
Expand Down Expand Up @@ -50,8 +51,15 @@ public class DifferentialExpressionAnalysisValueObject extends AnalysisValueObje
private Collection<DiffExResultSetSummaryValueObject> resultSets = new HashSet<>();
@JsonInclude(JsonInclude.Include.NON_NULL)
private Collection<Long> arrayDesignsUsed;
/**
* The ID of the experiment being analyzed. Either an experiment or a subset.
*/
private Long bioAssaySetId;
private Long sourceExperiment;
/**
* If this is an analysis of a subset, the ID of the source experiment.
*/
@Nullable
private Long sourceExperimentId;
private ExperimentalFactorValueObject subsetFactor;
private FactorValueValueObject subsetFactorValue;

Expand All @@ -70,7 +78,7 @@ public DifferentialExpressionAnalysisValueObject( DifferentialExpressionAnalysis
// experimentAnalyzed is eagerly fetched
if ( analysis.getExperimentAnalyzed() instanceof ExpressionExperimentSubSet ) {
// sourceExperiment is eagerly fetched too
this.sourceExperiment = ( ( ExpressionExperimentSubSet ) analysis.getExperimentAnalyzed() ).getSourceExperiment().getId();
this.sourceExperimentId = ( ( ExpressionExperimentSubSet ) analysis.getExperimentAnalyzed() ).getSourceExperiment().getId();
}
if ( analysis.getSubsetFactorValue() != null && Hibernate.isInitialized( ( analysis.getSubsetFactorValue() ) ) ) {
this.subsetFactorValue = new FactorValueValueObject( analysis.getSubsetFactorValue() );
Expand All @@ -80,6 +88,14 @@ public DifferentialExpressionAnalysisValueObject( DifferentialExpressionAnalysis
}
}

/**
* @deprecated this was renamed to {@link #getSourceExperimentId()} for consistency
*/
@Deprecated
public Long getSourceExperiment() {
return sourceExperimentId;
}

/**
* @deprecated This was renamed for clarity.
* @see #getFactorValuesUsedByExperimentalFactorId()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisDao;
Expand Down Expand Up @@ -71,6 +72,11 @@ Map<ExpressionExperiment, Collection<DifferentialExpressionAnalysis>> getAnalyse

Collection<Long> getExperimentsWithAnalysis( Taxon taxon );

/**
* Obtain analyses per experiment IDs which can be either IDs of {@link ExpressionExperiment} or {@link ExpressionExperimentSubSet}.
* @param includeAnalysesOfSubsets whether to include the analysis of subsets if th experiment ID is that of an {@link ExpressionExperiment}
* @return a mapping of IDs of {@link BioAssaySet} to lists of {@link DifferentialExpressionAnalysisValueObject}
*/
Map<Long, List<DifferentialExpressionAnalysisValueObject>> getAnalysesByExperimentIds(
Collection<Long> expressionExperimentIds, int offset, int limit );
Collection<Long> experimentIds, boolean includeAnalysesOfSubsets );
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
*/
package ubic.gemma.persistence.service.analysis.expression.diff;

import org.apache.commons.collections4.ListUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.commons.lang3.tuple.Pair;
import org.hibernate.Hibernate;
Expand Down Expand Up @@ -477,16 +476,15 @@ public Collection<Long> getExperimentsWithAnalysis( Taxon taxon ) {

@Override
public Map<Long, List<DifferentialExpressionAnalysisValueObject>> getAnalysesByExperimentIds(
Collection<Long> expressionExperimentIds, int offset, int limit ) {
Collection<Long> experimentIds, boolean includeAnalysesOfSubsets ) {

/*
* There are three cases to consider: the ids are experiments; the ids are experiment subsets; the ids are
* experiments that have subsets.
*/
Map<Long, List<DifferentialExpressionAnalysisValueObject>> r = new HashMap<>();

Map<Long, Collection<Long>> arrayDesignsUsed = CommonQueries
.getArrayDesignsUsedEEMap( expressionExperimentIds, this.getSessionFactory().getCurrentSession() );
.getArrayDesignsUsedEEMap( experimentIds, this.getSessionFactory().getCurrentSession() );

/*
* Fetch analyses of experiments or subsets.
Expand All @@ -496,102 +494,81 @@ public Map<Long, List<DifferentialExpressionAnalysisValueObject>> getAnalysesByE
"select distinct a from DifferentialExpressionAnalysis a "
+ "join fetch a.experimentAnalyzed e "
+ "where e.id in (:eeIds)" )
.setParameterList( "eeIds", expressionExperimentIds )
.setFirstResult( offset )
.setMaxResults( limit )
.setParameterList( "eeIds", experimentIds )
.list();

// initialize result sets and hit list sizes
// this is necessary because the DEA VO constructor will ignore uninitialized associations
for ( DifferentialExpressionAnalysis hit : hits ) {
Hibernate.initialize( hit.getResultSets() );
for ( ExpressionAnalysisResultSet rs : hit.getResultSets() ) {
Hibernate.initialize( rs.getHitListSizes() );
}
}

Map<Long, Collection<FactorValue>> ee2fv = new HashMap<>();
List<Object[]> fvs;

if ( !hits.isEmpty() ) {
// factor values for the experiments.
//noinspection unchecked
fvs = this.getSessionFactory().getCurrentSession().createQuery(
"select distinct ee.id, fv from " + "ExpressionExperiment"
+ " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" )
.setParameterList( "ees", expressionExperimentIds ).list();
List<Object[]> fvs = this.getSessionFactory().getCurrentSession().createQuery(
"select ee.id, fv from ExpressionExperiment ee "
+ "join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv "
+ "where ee.id in (:ees) "
+ "group by ee, fv" )
.setParameterList( "ees", experimentIds ).list();
this.addFactorValues( ee2fv, fvs );
}

// also get factor values for subsets - those not found yet.
Collection<Long> used = new HashSet<>();
for ( DifferentialExpressionAnalysis a : hits ) {
used.add( a.getExperimentAnalyzed().getId() );
}

List probableSubSetIds = ListUtils.removeAll( used, ee2fv.keySet() );
if ( !probableSubSetIds.isEmpty() ) {
if ( includeAnalysesOfSubsets ) {
// Subsets of those same experiments (there might not be any)
//noinspection unchecked
List<DifferentialExpressionAnalysis> analysesOfSubsets = this.getSessionFactory().getCurrentSession()
.createQuery( "select distinct a from ExpressionExperimentSubSet ee, DifferentialExpressionAnalysis a "
+ "join ee.sourceExperiment see "
+ "join fetch a.experimentAnalyzed eeanalyzed "
+ "where see.id in (:eeids) and ee=eeanalyzed" )
.setParameterList( "eeids", experimentIds ).list();

if ( !analysesOfSubsets.isEmpty() ) {
hits.addAll( analysesOfSubsets );
Collection<Long> experimentSubsetIds = new HashSet<>();
for ( DifferentialExpressionAnalysis a : analysesOfSubsets ) {
experimentSubsetIds.add( a.getExperimentAnalyzed().getId() );
}
// factor value information for the subset. The key output is the ID of the subset, not of the source
// experiment.
//noinspection unchecked
fvs = this.getSessionFactory().getCurrentSession().createQuery(
"select distinct ee.id, fv from " + "ExpressionExperimentSubSet"
+ " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" )
.setParameterList( "ees", probableSubSetIds ).list();
List<Object[]> fvs = this.getSessionFactory().getCurrentSession()
.createQuery( "select ee.id, fv from ExpressionExperimentSubSet ee "
+ "join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv "
+ "where ee.id in (:ees) "
+ "group by ee, fv" )
.setParameterList( "ees", experimentSubsetIds ).list();
this.addFactorValues( ee2fv, fvs );
}
}

// postprocesss...
if ( hits.isEmpty() ) {
return Collections.emptyMap();
}

/*
* Subsets of those same experiments (there might not be any)
*/
//noinspection unchecked
List<DifferentialExpressionAnalysis> analysesOfSubsets = this.getSessionFactory().getCurrentSession()
.createQuery( "select distinct a from " + "ExpressionExperimentSubSet"
+ " ee, DifferentialExpressionAnalysis a" + " join ee.sourceExperiment see "
+ " join fetch a.experimentAnalyzed eeanalyzed where see.id in (:eeids) and ee=eeanalyzed" )
.setParameterList( "eeids", expressionExperimentIds ).list();

if ( !analysesOfSubsets.isEmpty() ) {
hits.addAll( analysesOfSubsets );

Collection<Long> experimentSubsetIds = new HashSet<>();
for ( DifferentialExpressionAnalysis a : analysesOfSubsets ) {
ExpressionExperimentSubSet subset = ( ExpressionExperimentSubSet ) a.getExperimentAnalyzed();
experimentSubsetIds.add( subset.getId() );
// initialize result sets and hit list sizes
// this is necessary because the DEA VO constructor will ignore uninitialized associations
for ( DifferentialExpressionAnalysis hit : hits ) {
Hibernate.initialize( hit.getResultSets() );
for ( ExpressionAnalysisResultSet rs : hit.getResultSets() ) {
Hibernate.initialize( rs.getHitListSizes() );
}

// factor value information for the subset. The key output is the ID of the subset, not of the source
// experiment.
//noinspection unchecked
fvs = this.getSessionFactory().getCurrentSession().createQuery(
"select distinct ee.id, fv from " + "ExpressionExperimentSubSet"
+ " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" )
.setParameterList( "ees", experimentSubsetIds ).list();
this.addFactorValues( ee2fv, fvs );
}

// postprocesss...
if ( hits.isEmpty() ) {
return r;
}
Collection<DifferentialExpressionAnalysisValueObject> summaries = this
.convertToValueObjects( hits, arrayDesignsUsed, ee2fv );

Map<Long, List<DifferentialExpressionAnalysisValueObject>> r = new HashMap<>();
for ( DifferentialExpressionAnalysisValueObject an : summaries ) {

Long bioAssaySetId;
if ( an.getSourceExperiment() != null ) {
bioAssaySetId = an.getSourceExperiment();
if ( an.getSourceExperimentId() != null ) {
bioAssaySetId = an.getSourceExperimentId();
} else {
bioAssaySetId = an.getBioAssaySetId();
}
if ( !r.containsKey( bioAssaySetId ) ) {
r.put( bioAssaySetId, new ArrayList<DifferentialExpressionAnalysisValueObject>() );
}
r.get( bioAssaySetId ).add( an );
r.computeIfAbsent( bioAssaySetId, k -> new ArrayList<>() ).add( an );
}

return r;

}

@Override
Expand Down Expand Up @@ -699,12 +676,12 @@ private Collection<DifferentialExpressionAnalysisValueObject> convertToValueObje
avo.setSubsetFactor(
new ExperimentalFactorValueObject( analysis.getSubsetFactorValue().getExperimentalFactor() ) );
assert bioAssaySet instanceof ExpressionExperimentSubSet;
avo.setSourceExperiment( ( ( ExpressionExperimentSubSet ) bioAssaySet ).getSourceExperiment().getId() );
avo.setSourceExperimentId( ( ( ExpressionExperimentSubSet ) bioAssaySet ).getSourceExperiment().getId() );
if ( arrayDesignsUsed.containsKey( bioAssaySet.getId() ) ) {
avo.setArrayDesignsUsed( arrayDesignsUsed.get( bioAssaySet.getId() ) );
} else {
assert arrayDesignsUsed.containsKey( avo.getSourceExperiment() );
avo.setArrayDesignsUsed( arrayDesignsUsed.get( avo.getSourceExperiment() ) );
assert arrayDesignsUsed.containsKey( avo.getSourceExperimentId() );
avo.setArrayDesignsUsed( arrayDesignsUsed.get( avo.getSourceExperimentId() ) );
}
} else {
Collection<Long> adids = arrayDesignsUsed.get( bioAssaySet.getId() );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisValueObject;
import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.service.BaseService;
import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisService;
Expand Down Expand Up @@ -118,19 +115,37 @@ Map<ExpressionExperiment, Collection<DifferentialExpressionAnalysis>> getAnalyse
boolean canDelete( DifferentialExpressionAnalysis differentialExpressionAnalysis );

/**
* Given a set of ids, find experiments or experimentsubsets that have differential expression analyses. Subsets are
* handled two ways: if the ID given is of a subset, or if the ID is of an experiment that has subsets. In the
* Retrieve all the analysis VOs for a given {@link BioAssaySet}.
* <p>
* If the given experiment has subsets, the returned list will contain experiments for its {@link ExpressionExperimentSubSet}
* as per {@link #getAnalysesByExperiments(Collection, boolean)}.
*/
@Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ" })
List<DifferentialExpressionAnalysisValueObject> getAnalysesByExperiment( BioAssaySet experiment, boolean includeAnalysesOfSubsets );

/**
* Retrieve differential expression analyses by their associated experiment.
* <p>
* If the experiment is a {@link ExpressionExperiment} that has subsets, the returned values will contain analyses
* of its {@link ExpressionExperimentSubSet}.
* <p>
* Subsets are handled two ways: if the given experiment is a subset, or if the experiment has subsets. In the
* latter case, the return value will contain experiments that were not explicitly queried for.
*
* @param ids of experiments or experimentsubsets.
* @return map of bioassayset (valueobjects) to analyses (valueobjects) for each.
* @param experiments a collection of {@link ExpressionExperiment} or {@link ExpressionExperimentSubSet}
* @return a mapping of {@link BioAssaySet} VOs to analysies VOs
*/
@Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_VALUE_OBJECT_MAP_READ" })
Map<ExpressionExperimentDetailsValueObject, List<DifferentialExpressionAnalysisValueObject>> getAnalysesByExperiment(
Collection<Long> ids );
Map<ExpressionExperimentDetailsValueObject, List<DifferentialExpressionAnalysisValueObject>> getAnalysesByExperiments( Collection<? extends BioAssaySet> experiments, boolean includeAnalysesOfSubsets );

/**
* Retrieve differential expression analyses by IDs of their associated experiment.
* <p>
* If the ID represent a {@link ExpressionExperiment} that has subsets, the returned values will contain analyses of
* its {@link ExpressionExperimentSubSet}.
*
* @see DifferentialExpressionAnalysisDao#getAnalysesByExperimentIds(Collection, boolean)
*/
@Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_VALUE_OBJECT_MAP_READ" })
Map<ExpressionExperimentDetailsValueObject, List<DifferentialExpressionAnalysisValueObject>> getAnalysesByExperiment(
Collection<Long> ids, int offset, int limit );

Map<ExpressionExperimentDetailsValueObject, List<DifferentialExpressionAnalysisValueObject>> getAnalysesByExperimentIds( Collection<Long> experimentIds, boolean includeAnalysesOfSubsets );
}
Loading

0 comments on commit 40dda98

Please sign in to comment.