Skip to content

Commit

Permalink
Add support for multiple cell type labelling
Browse files Browse the repository at this point in the history
Makes it so that a single-cell dimension can have multiple cell type
labelling. The cell type labelling is represented as a subclass of an
Analysis and its protocol describe the labelling process in details.

Create an experimental factor for the cell type when labels are assigned
to the preferred single-cell vectors. The factor is re-created when new
labels are assigned and removed when those labels are removed.
  • Loading branch information
arteymix committed Feb 13, 2024
1 parent 5032005 commit 80c6409
Show file tree
Hide file tree
Showing 12 changed files with 487 additions and 118 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package ubic.gemma.model.common.description;

import ubic.gemma.core.ontology.OntologyService;

/**
* Enumeration of commonly used categories for referring to in the code.
* <p>
* Entries here have corresponding declarations in {@code EFO.factor.categories.txt} and are also available via
* {@link OntologyService#getCategoryTerms()} in the form of ontology terms.
*/
public final class Categories {

public static Category CELL_TYPE = new Category( "cell type", "http://www.ebi.ac.uk/efo/EFO_0000324" );
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ubic.gemma.model.common.description;

import lombok.Value;

import javax.annotation.Nullable;

/**
* Represents a category.
* <p>
* We intend to <a href="https://github.com/PavlidisLab/Gemma/issues/913">revamp the the characteristic hierarchy</a>
* which will make categories persistent alongside {@link ubic.gemma.model.expression.experiment.Statement} and terms.
* @author poirigui
* @see Categories
*/
@Value
public class Category {
String category;
@Nullable
String categoryUri;
}
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,11 @@ public boolean equals( Object object ) {
* the fields; we can't just compare the hashcodes because they also look at the id, so comparing one transient
* and one persistent would always fail...
*/
return CharacteristicUtils.equals( category, categoryUri, that.category, that.categoryUri )
&& CharacteristicUtils.equals( value, valueUri, that.value, that.valueUri );
if ( !CharacteristicUtils.equals( category, categoryUri, that.category, that.categoryUri ) ) return false;
if ( valueUri != null ^ that.valueUri != null ) {
return false; // free-text v.s. ontology term, always false
}
return valueUri != null ? org.apache.commons.lang.StringUtils.equalsIgnoreCase( valueUri, that.valueUri ) : org.apache.commons.lang.StringUtils.equalsIgnoreCase( value, that.value );
}

@Override
Expand Down Expand Up @@ -291,6 +294,22 @@ public static Characteristic newInstance( String name, String description, Strin
entity.setEvidenceCode( evidenceCode );
return entity;
}

public static Characteristic newInstance( Category category ) {
Characteristic entity = new Characteristic();
entity.setCategory( category.getCategory() );
entity.setCategoryUri( category.getCategoryUri() );
return entity;
}

public static Characteristic newInstance( Category category, String value, @Nullable String valueUri ) {
Characteristic entity = new Characteristic();
entity.setCategory( category.getCategory() );
entity.setCategoryUri( category.getCategoryUri() );
entity.setValue( value );
entity.setValueUri( StringUtils.stripToNull( valueUri ) );
return entity;
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package ubic.gemma.model.expression.bioAssayData;

import lombok.Getter;
import lombok.Setter;
import org.springframework.util.Assert;
import ubic.gemma.model.analysis.Analysis;
import ubic.gemma.model.common.description.Characteristic;

import java.util.Arrays;
import java.util.List;
import java.util.Objects;

/**
* Represents the labelling of cell types.
*/
@Getter
@Setter
public class CellTypeLabelling extends Analysis {

/**
* Indicate if this labelling is the preferred one.
*/
private boolean preferred;

/**
* Cell types assignment to individual cells from the {@link #cellTypeLabels} collections.
*/
private int[] cellTypes;

/**
* Cell type labels.
*/
private List<Characteristic> cellTypeLabels;

/**
* Number of distinct cell types.
* <p>
* This must always be equal to number of distinct elements of {@link #cellTypeLabels}.
*/
private Integer numberOfCellTypeLabels;

public Characteristic getCellTypeLabel( int index ) {
Assert.notNull( cellTypes, "No cell types have been assigned." );
Assert.notNull( cellTypeLabels, "No cell labels exist." );
return cellTypeLabels.get( cellTypes[index] );
}

@Override
public int hashCode() {
return Objects.hash( Arrays.hashCode( cellTypes ), cellTypeLabels );
}

@Override
public boolean equals( Object object ) {
return super.equals( object );
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import lombok.Getter;
import lombok.Setter;
import org.springframework.util.Assert;
import ubic.gemma.core.util.ListUtils;
import ubic.gemma.model.common.Identifiable;
import ubic.gemma.model.expression.bioAssay.BioAssay;
Expand Down Expand Up @@ -45,30 +44,10 @@ public class SingleCellDimension implements Identifiable {
private int numberOfCells = 0;

/**
* Cell types assignment to individual cells from the {@link #cellTypeLabels} collections.
* <p>
* If supplied, its size must be equal to that of {@link #cellIds}.
*/
@Nullable
private int[] cellTypes;

/**
* Cell type labels, or null if unknown.
* <p>
* Those are user-supplied cell type identifiers. Its size must be equal to that of {@link #cellIds}.
* <p>
* This is stored as a compressed, gzipped blob in the database. See {@link CompressedStringListType} for more details.
*/
@Nullable
private List<String> cellTypeLabels;

/**
* Number of distinct cell types.
* <p>
* This must always be equal to number of distinct elements of {@link #cellTypes}.
* Set of cell types assignment to individual cells. This is empty if no cell types have been assigned and should
* always contain a preferred labelling as per {@link CellTypeLabelling#preferred} if non-empty.
*/
@Nullable
private Integer numberOfCellTypeLabels;
private Set<CellTypeLabelling> cellTypeLabellings = new HashSet<>();

/**
* List of bioassays that each cell belongs to.
Expand Down Expand Up @@ -107,19 +86,6 @@ public BioAssay getBioAssayByCellId( String cellId ) {
return getBioAssay( getCellIndex( cellId ) );
}

public String getCellTypeLabel( int index ) {
Assert.notNull( cellTypes, "No cell types have been assigned." );
Assert.notNull( cellTypeLabels, "No cell labels exist." );
return cellTypeLabels.get( cellTypes[index] );
}

/**
* Obtain a cell type label by cell ID.
*/
public String getCellTypeLabelByCellId( String cellId ) {
return getCellTypeLabel( getCellIndex( cellId ) );
}

private int getCellIndex( String cellId ) {
if ( cellIdToIndex == null ) {
cellIdToIndex = ListUtils.indexOfElements( cellIds );
Expand All @@ -137,7 +103,7 @@ public int hashCode() {
return Objects.hash( id );
}
// no need to hash numberOfCells, it's derived from cellIds's size
return Objects.hash( cellIds, Arrays.hashCode( cellTypes ), cellTypeLabels, bioAssays, Arrays.hashCode( bioAssaysOffset ) );
return Objects.hash( cellIds, bioAssays, Arrays.hashCode( bioAssaysOffset ) );
}

@Override
Expand All @@ -149,10 +115,8 @@ public boolean equals( Object obj ) {
SingleCellDimension scd = ( SingleCellDimension ) obj;
if ( id != null && scd.id != null )
return id.equals( scd.id );
return Objects.equals( cellTypeLabels, scd.cellTypeLabels )
&& Objects.equals( bioAssays, scd.bioAssays )
return Objects.equals( bioAssays, scd.bioAssays )
&& Arrays.equals( bioAssaysOffset, scd.bioAssaysOffset )
&& Arrays.equals( cellTypes, scd.cellTypes )
&& Objects.equals( cellIds, scd.cellIds ); // this is the most expensive to compare
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
Expand Down Expand Up @@ -312,12 +313,22 @@ Map<ExpressionExperiment, Collection<AuditEvent>> getSampleRemovalEvents(

void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimension singleCellDimension );

List<CellTypeLabelling> getCellTypeLabellings( ExpressionExperiment ee );

/**
* Replace the SCD of a given dataset.
* @param ee an expression experiment; its vectors will be refreshed
* @param dimension the existing dimension
* @param newDimension the new dimension
* @return the number of updated vectors as a result
* Obtain the preferred labelling of the preferred single-cell vectors.
* @throws org.springframework.dao.IncorrectResultSizeDataAccessException if there are multiple preferred cell-type
* labellings
*/
int replaceSingleCellDimension( ExpressionExperiment ee, SingleCellDimension dimension, SingleCellDimension newDimension );
@Nullable
CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee );

/**
* Add the given cell type labelling to the single-cell dimension.
* <p>
* If the new labelling is preferred, any existing one is marked as non-preferred.
*/
void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeLabelling cellTypeLabelling );

List<Characteristic> getCellTypes( ExpressionExperiment ee );
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
Expand Down Expand Up @@ -1949,9 +1950,8 @@ public void thawForFrontEnd( final ExpressionExperiment expressionExperiment ) {
public List<SingleCellDimension> getSingleCellDimensions( ExpressionExperiment ee ) {
//noinspection unchecked
return getSessionFactory().getCurrentSession()
.createQuery( "select scedv.singleCellDimension from SingleCellExpressionDataVector scedv "
+ "where scedv.expressionExperiment = :ee "
+ "group by scedv.singleCellDimension" )
.createQuery( "select distinct scedv.singleCellDimension from SingleCellExpressionDataVector scedv "
+ "where scedv.expressionExperiment = :ee" )
.setParameter( "ee", ee )
.list();
}
Expand All @@ -1967,18 +1967,55 @@ public void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimens
}

@Override
public int replaceSingleCellDimension( ExpressionExperiment ee, SingleCellDimension dimension, SingleCellDimension newDimension ) {
int updatedVectors = getSessionFactory().getCurrentSession()
.createQuery( "update SingleCellExpressionDataVector scd set scd.singleCellDimension = :newDimension where scd.singleCellDimension = :dim" )
.setParameter( "dim", dimension )
.setParameter( "newDimension", newDimension )
.executeUpdate();
if ( updatedVectors > 0 && Hibernate.isInitialized( ee.getSingleCellExpressionDataVectors() ) ) {
// will reload vectors with the updated SCD
// if the vectors are not initialized, they will be loaded with the updated SCD when they are accessed
getSessionFactory().getCurrentSession().refresh( ee );
public List<CellTypeLabelling> getCellTypeLabellings( ExpressionExperiment ee ) {
//noinspection unchecked
return getSessionFactory().getCurrentSession()
.createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv "
+ "join scedv.singleCellDimension scd "
+ "join scd.cellTypeLabellings ctl "
+ "where scedv.expressionExperiment = :ee" )
.setParameter( "ee", ee )
.list();
}

@Nullable
@Override
public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) {
return ( CellTypeLabelling ) getSessionFactory().getCurrentSession()
.createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv "
+ "join scedv.singleCellDimension scd "
+ "join scd.cellTypeLabellings ctl "
+ "where scedv.quantitationType.isPreferred = true and ctl.preferred = true and scedv.expressionExperiment = :ee" )
.setParameter( "ee", ee )
.uniqueResult();
}

@Override
public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling labelling ) {
if ( labelling.isPreferred() ) {
for ( CellTypeLabelling l : dimension.getCellTypeLabellings() ) {
if ( l.isPreferred() ) {
log.info( "Marking existing cell type labelling as non-preferred, a new preferred labelling will be added." );
l.setPreferred( false );
break;
}
}
}
return updatedVectors;
getSessionFactory().getCurrentSession().persist( labelling );
dimension.getCellTypeLabellings().add( labelling );
}

@Override
public List<Characteristic> getCellTypes( ExpressionExperiment ee ) {
//noinspection unchecked
return getSessionFactory().getCurrentSession()
.createQuery( "select distinct ct from SingleCellExpressionDataVector scedv "
+ "join scedv.singleCellDimension scd "
+ "join scd.cellTypeLabellings ctl "
+ "join ctl.cellTypeLabels ct "
+ "where scedv.expressionExperiment = :ee and scedv.quantitationType.isPreferred = true and ctl.preferred = true" )
.setParameter( "ee", ee )
.list();
}

@Override
Expand Down
Loading

0 comments on commit 80c6409

Please sign in to comment.