diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Categories.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Categories.java
new file mode 100644
index 0000000000..0ef5cf8b75
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Categories.java
@@ -0,0 +1,14 @@
+package ubic.gemma.model.common.description;
+
+import ubic.gemma.core.ontology.OntologyService;
+
+/**
+ * Enumeration of commonly used categories for referring to in the code.
+ *
+ * Entries here have corresponding declarations in {@code EFO.factor.categories.txt} and are also available via
+ * {@link OntologyService#getCategoryTerms()} in the form of ontology terms.
+ */
+public final class Categories {
+
+ public static Category CELL_TYPE = new Category( "cell type", "http://www.ebi.ac.uk/efo/EFO_0000324" );
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Category.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Category.java
new file mode 100644
index 0000000000..8f353cf5c5
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Category.java
@@ -0,0 +1,20 @@
+package ubic.gemma.model.common.description;
+
+import lombok.Value;
+
+import javax.annotation.Nullable;
+
+/**
+ * Represents a category.
+ *
+ * We intend to revamp the the characteristic hierarchy
+ * which will make categories persistent alongside {@link ubic.gemma.model.expression.experiment.Statement} and terms.
+ * @author poirigui
+ * @see Categories
+ */
+@Value
+public class Category {
+ String category;
+ @Nullable
+ String categoryUri;
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java
index d82c265c0d..9671e77387 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java
@@ -240,8 +240,11 @@ public boolean equals( Object object ) {
* the fields; we can't just compare the hashcodes because they also look at the id, so comparing one transient
* and one persistent would always fail...
*/
- return CharacteristicUtils.equals( category, categoryUri, that.category, that.categoryUri )
- && CharacteristicUtils.equals( value, valueUri, that.value, that.valueUri );
+ if ( !CharacteristicUtils.equals( category, categoryUri, that.category, that.categoryUri ) ) return false;
+ if ( valueUri != null ^ that.valueUri != null ) {
+ return false; // free-text v.s. ontology term, always false
+ }
+ return valueUri != null ? org.apache.commons.lang.StringUtils.equalsIgnoreCase( valueUri, that.valueUri ) : org.apache.commons.lang.StringUtils.equalsIgnoreCase( value, that.value );
}
@Override
@@ -291,6 +294,22 @@ public static Characteristic newInstance( String name, String description, Strin
entity.setEvidenceCode( evidenceCode );
return entity;
}
+
+ public static Characteristic newInstance( Category category ) {
+ Characteristic entity = new Characteristic();
+ entity.setCategory( category.getCategory() );
+ entity.setCategoryUri( category.getCategoryUri() );
+ return entity;
+ }
+
+ public static Characteristic newInstance( Category category, String value, @Nullable String valueUri ) {
+ Characteristic entity = new Characteristic();
+ entity.setCategory( category.getCategory() );
+ entity.setCategoryUri( category.getCategoryUri() );
+ entity.setValue( value );
+ entity.setValueUri( StringUtils.stripToNull( valueUri ) );
+ return entity;
+ }
}
}
\ No newline at end of file
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java
new file mode 100644
index 0000000000..b67bf182e0
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java
@@ -0,0 +1,57 @@
+package ubic.gemma.model.expression.bioAssayData;
+
+import lombok.Getter;
+import lombok.Setter;
+import org.springframework.util.Assert;
+import ubic.gemma.model.analysis.Analysis;
+import ubic.gemma.model.common.description.Characteristic;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Represents the labelling of cell types.
+ */
+@Getter
+@Setter
+public class CellTypeLabelling extends Analysis {
+
+ /**
+ * Indicate if this labelling is the preferred one.
+ */
+ private boolean preferred;
+
+ /**
+ * Cell types assignment to individual cells from the {@link #cellTypeLabels} collections.
+ */
+ private int[] cellTypes;
+
+ /**
+ * Cell type labels.
+ */
+ private List cellTypeLabels;
+
+ /**
+ * Number of distinct cell types.
+ *
+ * This must always be equal to number of distinct elements of {@link #cellTypeLabels}.
+ */
+ private Integer numberOfCellTypeLabels;
+
+ public Characteristic getCellTypeLabel( int index ) {
+ Assert.notNull( cellTypes, "No cell types have been assigned." );
+ Assert.notNull( cellTypeLabels, "No cell labels exist." );
+ return cellTypeLabels.get( cellTypes[index] );
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash( Arrays.hashCode( cellTypes ), cellTypeLabels );
+ }
+
+ @Override
+ public boolean equals( Object object ) {
+ return super.equals( object );
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java
index 9321cf15af..ea6eeac693 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java
@@ -2,7 +2,6 @@
import lombok.Getter;
import lombok.Setter;
-import org.springframework.util.Assert;
import ubic.gemma.core.util.ListUtils;
import ubic.gemma.model.common.Identifiable;
import ubic.gemma.model.expression.bioAssay.BioAssay;
@@ -45,30 +44,10 @@ public class SingleCellDimension implements Identifiable {
private int numberOfCells = 0;
/**
- * Cell types assignment to individual cells from the {@link #cellTypeLabels} collections.
- *
- * If supplied, its size must be equal to that of {@link #cellIds}.
- */
- @Nullable
- private int[] cellTypes;
-
- /**
- * Cell type labels, or null if unknown.
- *
- * Those are user-supplied cell type identifiers. Its size must be equal to that of {@link #cellIds}.
- *
- * This is stored as a compressed, gzipped blob in the database. See {@link CompressedStringListType} for more details.
- */
- @Nullable
- private List cellTypeLabels;
-
- /**
- * Number of distinct cell types.
- *
- * This must always be equal to number of distinct elements of {@link #cellTypes}.
+ * Set of cell types assignment to individual cells. This is empty if no cell types have been assigned and should
+ * always contain a preferred labelling as per {@link CellTypeLabelling#preferred} if non-empty.
*/
- @Nullable
- private Integer numberOfCellTypeLabels;
+ private Set cellTypeLabellings = new HashSet<>();
/**
* List of bioassays that each cell belongs to.
@@ -107,19 +86,6 @@ public BioAssay getBioAssayByCellId( String cellId ) {
return getBioAssay( getCellIndex( cellId ) );
}
- public String getCellTypeLabel( int index ) {
- Assert.notNull( cellTypes, "No cell types have been assigned." );
- Assert.notNull( cellTypeLabels, "No cell labels exist." );
- return cellTypeLabels.get( cellTypes[index] );
- }
-
- /**
- * Obtain a cell type label by cell ID.
- */
- public String getCellTypeLabelByCellId( String cellId ) {
- return getCellTypeLabel( getCellIndex( cellId ) );
- }
-
private int getCellIndex( String cellId ) {
if ( cellIdToIndex == null ) {
cellIdToIndex = ListUtils.indexOfElements( cellIds );
@@ -137,7 +103,7 @@ public int hashCode() {
return Objects.hash( id );
}
// no need to hash numberOfCells, it's derived from cellIds's size
- return Objects.hash( cellIds, Arrays.hashCode( cellTypes ), cellTypeLabels, bioAssays, Arrays.hashCode( bioAssaysOffset ) );
+ return Objects.hash( cellIds, bioAssays, Arrays.hashCode( bioAssaysOffset ) );
}
@Override
@@ -149,10 +115,8 @@ public boolean equals( Object obj ) {
SingleCellDimension scd = ( SingleCellDimension ) obj;
if ( id != null && scd.id != null )
return id.equals( scd.id );
- return Objects.equals( cellTypeLabels, scd.cellTypeLabels )
- && Objects.equals( bioAssays, scd.bioAssays )
+ return Objects.equals( bioAssays, scd.bioAssays )
&& Arrays.equals( bioAssaysOffset, scd.bioAssaysOffset )
- && Arrays.equals( cellTypes, scd.cellTypes )
&& Objects.equals( cellIds, scd.cellIds ); // this is the most expensive to compare
}
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
index e65426c4a4..bf253e1820 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
@@ -11,6 +11,7 @@
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
+import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
@@ -312,12 +313,22 @@ Map> getSampleRemovalEvents(
void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimension singleCellDimension );
+ List getCellTypeLabellings( ExpressionExperiment ee );
+
/**
- * Replace the SCD of a given dataset.
- * @param ee an expression experiment; its vectors will be refreshed
- * @param dimension the existing dimension
- * @param newDimension the new dimension
- * @return the number of updated vectors as a result
+ * Obtain the preferred labelling of the preferred single-cell vectors.
+ * @throws org.springframework.dao.IncorrectResultSizeDataAccessException if there are multiple preferred cell-type
+ * labellings
*/
- int replaceSingleCellDimension( ExpressionExperiment ee, SingleCellDimension dimension, SingleCellDimension newDimension );
+ @Nullable
+ CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee );
+
+ /**
+ * Add the given cell type labelling to the single-cell dimension.
+ *
+ * If the new labelling is preferred, any existing one is marked as non-preferred.
+ */
+ void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeLabelling cellTypeLabelling );
+
+ List getCellTypes( ExpressionExperiment ee );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
index 9425aa54f4..ecc8156b9b 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
@@ -45,6 +45,7 @@
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
+import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
@@ -1949,9 +1950,8 @@ public void thawForFrontEnd( final ExpressionExperiment expressionExperiment ) {
public List getSingleCellDimensions( ExpressionExperiment ee ) {
//noinspection unchecked
return getSessionFactory().getCurrentSession()
- .createQuery( "select scedv.singleCellDimension from SingleCellExpressionDataVector scedv "
- + "where scedv.expressionExperiment = :ee "
- + "group by scedv.singleCellDimension" )
+ .createQuery( "select distinct scedv.singleCellDimension from SingleCellExpressionDataVector scedv "
+ + "where scedv.expressionExperiment = :ee" )
.setParameter( "ee", ee )
.list();
}
@@ -1967,18 +1967,55 @@ public void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimens
}
@Override
- public int replaceSingleCellDimension( ExpressionExperiment ee, SingleCellDimension dimension, SingleCellDimension newDimension ) {
- int updatedVectors = getSessionFactory().getCurrentSession()
- .createQuery( "update SingleCellExpressionDataVector scd set scd.singleCellDimension = :newDimension where scd.singleCellDimension = :dim" )
- .setParameter( "dim", dimension )
- .setParameter( "newDimension", newDimension )
- .executeUpdate();
- if ( updatedVectors > 0 && Hibernate.isInitialized( ee.getSingleCellExpressionDataVectors() ) ) {
- // will reload vectors with the updated SCD
- // if the vectors are not initialized, they will be loaded with the updated SCD when they are accessed
- getSessionFactory().getCurrentSession().refresh( ee );
+ public List getCellTypeLabellings( ExpressionExperiment ee ) {
+ //noinspection unchecked
+ return getSessionFactory().getCurrentSession()
+ .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv "
+ + "join scedv.singleCellDimension scd "
+ + "join scd.cellTypeLabellings ctl "
+ + "where scedv.expressionExperiment = :ee" )
+ .setParameter( "ee", ee )
+ .list();
+ }
+
+ @Nullable
+ @Override
+ public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) {
+ return ( CellTypeLabelling ) getSessionFactory().getCurrentSession()
+ .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv "
+ + "join scedv.singleCellDimension scd "
+ + "join scd.cellTypeLabellings ctl "
+ + "where scedv.quantitationType.isPreferred = true and ctl.preferred = true and scedv.expressionExperiment = :ee" )
+ .setParameter( "ee", ee )
+ .uniqueResult();
+ }
+
+ @Override
+ public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling labelling ) {
+ if ( labelling.isPreferred() ) {
+ for ( CellTypeLabelling l : dimension.getCellTypeLabellings() ) {
+ if ( l.isPreferred() ) {
+ log.info( "Marking existing cell type labelling as non-preferred, a new preferred labelling will be added." );
+ l.setPreferred( false );
+ break;
+ }
+ }
}
- return updatedVectors;
+ getSessionFactory().getCurrentSession().persist( labelling );
+ dimension.getCellTypeLabellings().add( labelling );
+ }
+
+ @Override
+ public List getCellTypes( ExpressionExperiment ee ) {
+ //noinspection unchecked
+ return getSessionFactory().getCurrentSession()
+ .createQuery( "select distinct ct from SingleCellExpressionDataVector scedv "
+ + "join scedv.singleCellDimension scd "
+ + "join scd.cellTypeLabellings ctl "
+ + "join ctl.cellTypeLabels ct "
+ + "where scedv.expressionExperiment = :ee and scedv.quantitationType.isPreferred = true and ctl.preferred = true" )
+ .setParameter( "ee", ee )
+ .list();
}
@Override
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java
index b683e42bb9..0388b076ed 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java
@@ -1,11 +1,16 @@
package ubic.gemma.persistence.service.expression.experiment;
import org.springframework.security.access.annotation.Secured;
+import ubic.gemma.model.common.description.Characteristic;
+import ubic.gemma.model.common.protocol.Protocol;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import javax.annotation.Nullable;
import java.util.Collection;
import java.util.List;
@@ -39,8 +44,51 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua
/**
* Relabel the cell types of an existing set of single-cell vectors.
- * @return a new dimension with the relabeled cell types, the original one is deleted
+ * @param newCellTypeLabels the new cell types labels, must match the number of cells
+ * @param labellingProtocol the protocol used to generate the new labelling, or null if unknown
+ * @return a new, preferred cell type labelling
*/
@Secured({ "GROUP_USER", "ACL_SECURABLE_READ" })
- SingleCellDimension relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels );
+ CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol labellingProtocol, @Nullable String description );
+
+ /**
+ * Remove the given cell type labelling.
+ *
+ * If the cell type labelling is preferred and applies the the preferred vectors as per {@link #getPreferredCellTypeLabelling(ExpressionExperiment)}, the cell type factor will be removed.
+ */
+ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" })
+ void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension scd, CellTypeLabelling cellTypeLabelling );
+
+ /**
+ * Obtain all the cell type labellings from all single-cell vectors.
+ */
+ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" })
+ List getCellTypeLabellings( ExpressionExperiment ee );
+
+ /**
+ * Obtain the preferred cell type labelling from the preferred single-cell vectors.
+ */
+ @Nullable
+ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" })
+ CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee );
+
+ /**
+ * Obtain the cell types of a given single-cell dataset.
+ *
+ * Only the cell types applicable to the preferred single-cell vectors and labelling are returned.
+ */
+ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" })
+ List getCellTypes( ExpressionExperiment ee );
+
+ /**
+ * Recreate the cell type factor based on the preferred labelling of the preferred single-cell vectors.
+ *
+ * Analyses involving the factor are removed and samples mentioning the factor values are updated as per
+ * {@link ExperimentalFactorService#remove(ExperimentalFactor)}.
+ * @return the created cell type factor
+ * @throws IllegalStateException if the dataset does not have a preferred cell type labelling for its preferred set
+ * of single-cell vectors
+ */
+ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" })
+ ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java
index 0dfc8db3d3..618e64e3d4 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java
@@ -9,13 +9,18 @@
import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.DataReplacedEvent;
+import ubic.gemma.model.common.description.Categories;
+import ubic.gemma.model.common.description.Characteristic;
+import ubic.gemma.model.common.description.CharacteristicUtils;
+import ubic.gemma.model.common.protocol.Protocol;
import ubic.gemma.model.common.quantitationtype.PrimitiveType;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
+import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
import javax.annotation.Nullable;
@@ -34,6 +39,9 @@ public class SingleCellExpressionExperimentServiceImpl implements SingleCellExpr
@Autowired
private ExpressionExperimentDao expressionExperimentDao;
+ @Autowired
+ private ExperimentalFactorService experimentalFactorService;
+
@Autowired
private AuditTrailService auditTrailService;
@@ -54,9 +62,11 @@ public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType
ee, quantitationType ) );
validateSingleCellDataVectors( ee, quantitationType, vectors );
SingleCellDimension scd = vectors.iterator().next().getSingleCellDimension();
+ boolean scdCreated = false;
if ( scd.getId() == null ) {
log.info( "Creating a new single-cell dimension for " + ee + ": " + scd );
expressionExperimentDao.createSingleCellDimension( ee, scd );
+ scdCreated = true;
}
for ( SingleCellExpressionDataVector v : vectors ) {
v.setExpressionExperiment( ee );
@@ -77,6 +87,16 @@ public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType
}
ee.getQuantitationTypes().add( quantitationType );
expressionExperimentDao.update( ee ); // will take care of creating vectors
+ if ( quantitationType.getIsPreferred() && scdCreated ) {
+ CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null );
+ if ( preferredLabelling != null ) {
+ log.info( "New single-cell preferred vectors were added, recreating the cell type factor." );
+ recreateCellTypeFactor( ee, preferredLabelling );
+ } else {
+ log.info( "New single-cell preferred vectors do not have cell type labelling, removing any existing cell type factor..." );
+ removeCellTypeFactorIfExists( ee );
+ }
+ }
auditTrailService.addUpdateEvent( ee, DataAddedEvent.class,
String.format( "Added %d vectors for %s with dimension %s", numVectorsAdded, quantitationType, scd ) );
}
@@ -114,6 +134,16 @@ public void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationT
ee.getSingleCellExpressionDataVectors().addAll( vectors );
int numVectorsAdded = ee.getSingleCellExpressionDataVectors().size() - ( previousSize - numVectorsRemoved );
expressionExperimentDao.update( ee );
+ if ( quantitationType.getIsPreferred() && scdCreated ) {
+ CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null );
+ if ( preferredLabelling != null ) {
+ log.info( "Preferred single-cell vectors were replaced, recreating the cell type factor." );
+ recreateCellTypeFactor( ee, preferredLabelling );
+ } else {
+ log.info( "Preferred single-cell vectors do not have cell type labelling, removing any existing cell type factor..." );
+ removeCellTypeFactorIfExists( ee );
+ }
+ }
auditTrailService.addUpdateEvent( ee, DataReplacedEvent.class,
String.format( "Replaced %d vectors with %d vectors for %s with dimension %s.", numVectorsRemoved, numVectorsAdded, quantitationType, scd ) );
}
@@ -224,31 +254,69 @@ public List getSingleCellDimensions( ExpressionExperiment e
@Override
@Transactional
- public SingleCellDimension relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels ) {
+ public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, Protocol protocol, String description ) {
Assert.notNull( ee.getId(), "Dataset must be persistent." );
Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." );
- SingleCellDimension newDimension = new SingleCellDimension();
- newDimension.getCellIds().addAll( dimension.getCellIds() );
- newDimension.setNumberOfCells( dimension.getNumberOfCells() );
- newDimension.getBioAssays().addAll( dimension.getBioAssays() );
- newDimension.setBioAssaysOffset( dimension.getBioAssaysOffset() );
+ Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." );
+ CellTypeLabelling labelling = new CellTypeLabelling();
+ labelling.setPreferred( true );
+ labelling.setProtocol( protocol );
+ labelling.setDescription( description );
int[] ct = new int[dimension.getCellIds().size()];
List labels = newCellTypeLabels.stream().sorted().distinct().collect( Collectors.toList() );
for ( int i = 0; i < ct.length; i++ ) {
ct[i] = Collections.binarySearch( labels, newCellTypeLabels.get( i ) );
}
- newDimension.setCellTypes( ct );
- newDimension.setCellTypeLabels( labels );
- newDimension.setNumberOfCellTypeLabels( labels.size() );
- validateSingleCellDimension( ee, newDimension );
- expressionExperimentDao.createSingleCellDimension( ee, newDimension );
- int updatedVectors = expressionExperimentDao.replaceSingleCellDimension( ee, dimension, newDimension );
- if ( updatedVectors == 0 ) {
- throw new IllegalStateException( "There are no vectors with the dimension: " + dimension + ", cannot relabel cell types." );
+ labelling.setCellTypes( ct );
+ labelling.setCellTypeLabels( labels.stream()
+ .map( l -> Characteristic.Factory.newInstance( Categories.CELL_TYPE, l, null ) )
+ .collect( Collectors.toList() ) );
+ labelling.setNumberOfCellTypeLabels( labels.size() );
+ expressionExperimentDao.addCellTypeLabelling( ee, dimension, labelling );
+ validateSingleCellDimension( ee, dimension );
+ log.info( "Relabelled single-cell vectors for " + ee + " with: " + labelling );
+
+ // checking labelling.isPreferred() is not enough, the labelling might apply to non-preferred vectors
+ if ( labelling.equals( getPreferredCellTypeLabelling( ee ) ) ) {
+ log.info( "New labels are preferred and also apply to preferred single-cell vectors, recreating the cell type factor..." );
+ recreateCellTypeFactor( ee, labelling );
+ }
+
+ return labelling;
+ }
+
+ @Override
+ @Transactional
+ public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling cellTypeLabelling ) {
+ Assert.notNull( ee.getId(), "Dataset must be persistent." );
+ Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." );
+ Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." );
+ Assert.isTrue( dimension.getCellTypeLabellings().contains( cellTypeLabelling ),
+ "The supplied labelling does not belong to the dimension." );
+ boolean alsoRemoveFactor = cellTypeLabelling.equals( getPreferredCellTypeLabelling( ee ) );
+ dimension.getCellTypeLabellings().remove( cellTypeLabelling );
+ if ( alsoRemoveFactor ) {
+ log.info( "The preferred cell type labels have been removed, removing the cell type factor..." );
+ removeCellTypeFactorIfExists( ee );
}
- expressionExperimentDao.deleteSingleCellDimension( ee, dimension );
- log.info( "Relabelled " + updatedVectors + " single-cell vectors for " + ee + " with new dimension: " + newDimension );
- return newDimension;
+ }
+
+ @Override
+ @Transactional(readOnly = true)
+ public List getCellTypeLabellings( ExpressionExperiment ee ) {
+ return expressionExperimentDao.getCellTypeLabellings( ee );
+ }
+
+ @Override
+ @Transactional(readOnly = true)
+ public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) {
+ return expressionExperimentDao.getPreferredCellTypeLabelling( ee );
+ }
+
+ @Override
+ @Transactional(readOnly = true)
+ public List getCellTypes( ExpressionExperiment ee ) {
+ return expressionExperimentDao.getCellTypes( ee );
}
/**
@@ -260,28 +328,75 @@ private void validateSingleCellDimension( ExpressionExperiment ee, SingleCellDim
"Cell IDs must be unique." );
Assert.isTrue( scbad.getCellIds().size() == scbad.getNumberOfCells(),
"The number of cell IDs must match the number of cells." );
- if ( scbad.getCellTypes() != null ) {
- Assert.notNull( scbad.getNumberOfCellTypeLabels() );
- Assert.notNull( scbad.getCellTypeLabels() );
- Assert.isTrue( scbad.getCellTypes().length == scbad.getCellIds().size(),
+ Assert.isTrue( scbad.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).count() <= 1,
+ "There must be at most one preferred cell type labelling." );
+ for ( CellTypeLabelling labelling : scbad.getCellTypeLabellings() ) {
+ Assert.notNull( labelling.getNumberOfCellTypeLabels() );
+ Assert.notNull( labelling.getCellTypeLabels() );
+ Assert.isTrue( labelling.getCellTypes().length == scbad.getCellIds().size(),
"The number of cell types must match the number of cell IDs." );
- int numberOfCellTypeLabels = scbad.getCellTypeLabels().size();
+ int numberOfCellTypeLabels = labelling.getCellTypeLabels().size();
Assert.isTrue( numberOfCellTypeLabels > 0,
"There must be at least one cell type label declared in the cellTypeLabels collection." );
- Assert.isTrue( scbad.getCellTypeLabels().stream().distinct().count() == scbad.getCellTypeLabels().size(),
+ Assert.isTrue( labelling.getCellTypeLabels().stream().distinct().count() == labelling.getCellTypeLabels().size(),
"Cell type labels must be unique." );
- Assert.isTrue( numberOfCellTypeLabels == scbad.getNumberOfCellTypeLabels(),
+ Assert.isTrue( numberOfCellTypeLabels == labelling.getNumberOfCellTypeLabels(),
"The number of cell types must match the number of values the cellTypeLabels collection." );
- for ( int k : scbad.getCellTypes() ) {
+ for ( int k : labelling.getCellTypes() ) {
Assert.isTrue( k >= 0 && k < numberOfCellTypeLabels,
String.format( "Cell type vector values must be within the [%d, %d[ range.", 0, numberOfCellTypeLabels ) );
}
- } else {
- Assert.isNull( scbad.getCellTypeLabels() );
- Assert.isNull( scbad.getNumberOfCellTypeLabels(), "There is no cell types assigned, the number of cell types must be null." );
}
Assert.isTrue( !scbad.getBioAssays().isEmpty(), "There must be at least one BioAssay." );
Assert.isTrue( ee.getBioAssays().containsAll( scbad.getBioAssays() ), "Not all supplied BioAssays belong to " + ee );
validateSparseRangeArray( scbad.getBioAssays(), scbad.getBioAssaysOffset(), scbad.getNumberOfCells() );
}
+
+
+ @Override
+ @Transactional
+ public ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee ) {
+ CellTypeLabelling ctl = getPreferredCellTypeLabelling( ee );
+ Assert.notNull( ctl, "There must be a preferred cell type labelling for " + ee + " to update the cell type factor." );
+ return recreateCellTypeFactor( ee, ctl );
+ }
+
+ private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, CellTypeLabelling ctl ) {
+ removeCellTypeFactorIfExists( ee );
+ // create a new cell type factor
+ ExperimentalFactor cellTypeFactor = ExperimentalFactor.Factory.newInstance();
+ cellTypeFactor.setType( FactorType.CATEGORICAL );
+ cellTypeFactor.setCategory( Characteristic.Factory.newInstance( Categories.CELL_TYPE ) );
+ cellTypeFactor.setExperimentalDesign( ee.getExperimentalDesign() );
+ ee.getExperimentalDesign().getExperimentalFactors().add( cellTypeFactor );
+ for ( Characteristic ct : ctl.getCellTypeLabels() ) {
+ FactorValue fv = new FactorValue();
+ Statement s = new Statement();
+ s.setCategory( ct.getCategory() );
+ s.setCategoryUri( ct.getCategoryUri() );
+ s.setSubject( ct.getValue() );
+ s.setSubjectUri( ct.getValueUri() );
+ fv.getCharacteristics().add( s );
+ fv.setExperimentalFactor( cellTypeFactor );
+ cellTypeFactor.getFactorValues().add( fv );
+ }
+
+ return experimentalFactorService.create( cellTypeFactor );
+ }
+
+ private void removeCellTypeFactorIfExists( ExpressionExperiment ee ) {
+ ExperimentalFactor existingCellTypeFactor = ee.getExperimentalDesign().getExperimentalFactors().stream()
+ .filter( ef -> ef.getCategory() != null )
+ .filter( ef -> CharacteristicUtils.equals( ef.getCategory().getCategory(), ef.getCategory().getCategoryUri(),
+ Categories.CELL_TYPE.getCategory(), Categories.CELL_TYPE.getCategoryUri() ) )
+ .findFirst()
+ .orElse( null );
+ if ( existingCellTypeFactor != null ) {
+ // this will remove analysis involving the factor and also sample-fv associations
+ log.info( "Removing existing cell type factor for " + ee );
+ experimentalFactorService.remove( existingCellTypeFactor );
+ } else {
+ log.info( "There's no cell type factor for " + ee );
+ }
+ }
}
diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml
index d8b840107e..51f001613f 100644
--- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml
+++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml
@@ -142,5 +142,28 @@
+
+
+
+
+
+
+
+ int
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml
index 3afd549558..f673b92e86 100644
--- a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml
+++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml
@@ -20,24 +20,12 @@
-
-
-
- int
-
-
-
-
+
+
-
-
-
-
-
-
-
-
+
+
diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java
index 5d415405c7..79728e4a46 100644
--- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java
+++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java
@@ -2,6 +2,7 @@
import gemma.gsec.SecurityService;
import org.apache.commons.lang3.RandomStringUtils;
+import org.hibernate.NonUniqueResultException;
import org.hibernate.SessionFactory;
import org.junit.After;
import org.junit.Before;
@@ -18,13 +19,18 @@
import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.DataReplacedEvent;
+import ubic.gemma.model.common.description.Categories;
+import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.quantitationtype.*;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.designElement.CompositeSequence;
+import ubic.gemma.model.expression.experiment.ExperimentalDesign;
+import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService;
@@ -187,6 +193,7 @@ public void setUp() {
}
sessionFactory.getCurrentSession().persist( ad );
ee = new ExpressionExperiment();
+ ee.setExperimentalDesign( new ExperimentalDesign() );
ee.setTaxon( taxon );
// TODO: model bioassays against sub-biomaterial to represent cell aggregates
BioMaterial bm = BioMaterial.Factory.newInstance( "a", taxon );
@@ -218,8 +225,8 @@ public void testAddSingleCellDataVectors() {
assertThat( scExpressionExperimentService.getSingleCellDimensions( ee ) )
.hasSize( 1 )
.allSatisfy( scd -> {
- assertThat( scd.getCellTypeLabel( 0 ) ).isEqualTo( "A" );
- assertThat( scd.getCellTypeLabel( 50 ) ).isEqualTo( "B" );
+ assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 0 ).getValue() ).isEqualTo( "A" );
+ assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 50 ).getValue() ).isEqualTo( "B" );
} );
Collection vectors2 = createSingleCellVectors( true );
@@ -339,15 +346,76 @@ public void testRelabelCellTypes() {
SingleCellDimension scd = vectors.iterator().next().getSingleCellDimension();
scExpressionExperimentService.addSingleCellDataVectors( ee, qt, vectors );
sessionFactory.getCurrentSession().flush();
+ assertThat( scExpressionExperimentService.getCellTypeLabellings( ee ) )
+ .hasSize( 1 );
+ assertThat( scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) ).isNotNull();
+ assertThat( scExpressionExperimentService.getCellTypes( ee ) ).hasSize( 2 )
+ .extracting( Characteristic::getValue )
+ .containsExactlyInAnyOrder( "A", "B" );
String[] ct = new String[100];
for ( int i = 0; i < ct.length; i++ ) {
ct[i] = i < 75 ? "A" : "B";
}
- SingleCellDimension newScd = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ) );
- assertThat( newScd.getId() ).isNotNull();
+ CellTypeLabelling newLabelling = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ), null, null );
+ assertThat( newLabelling ).satisfies( l -> {
+ assertThat( l.getId() ).isNotNull();
+ assertThat( l.isPreferred() ).isTrue();
+ } );
assertThat( ee.getSingleCellExpressionDataVectors() )
.hasSize( 10 )
- .allSatisfy( v -> assertThat( v.getSingleCellDimension() ).isEqualTo( newScd ) );
+ .allSatisfy( v -> assertThat( v.getSingleCellDimension().getCellTypeLabellings() ).contains( newLabelling ) );
+ assertThat( scExpressionExperimentService.getCellTypeLabellings( ee ) )
+ .hasSize( 1 )
+ .contains( newLabelling );
+ assertThat( scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) ).isEqualTo( newLabelling );
+ assertThat( scExpressionExperimentService.getCellTypes( ee ) ).hasSize( 2 )
+ .extracting( Characteristic::getValue )
+ .containsExactlyInAnyOrder( "A", "B" );
+
+ scExpressionExperimentService.removeCellTypeLabels( ee, scd, newLabelling );
+ assertThat( scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) ).isNull();
+
+ // FIXME: add proper assertions for the created factor, but the ExperimentalFactorService is mocked
+ verify( experimentalFactorService, times( 2 ) ).create( any( ExperimentalFactor.class ) );
+ verify( experimentalFactorService, times( 2 ) ).remove( any( ExperimentalFactor.class ) );
+ }
+
+ /**
+ * This a behaviour test when the labelling is not unique. This can be caused by multiple preferred single-cell QTs
+ * or multiple preferred cell type labellings.
+ */
+ @Test
+ public void testGetPreferredCellTypeLabellingWhenNonUnique() {
+ Collection vectors = createSingleCellVectors( true );
+ QuantitationType qt = vectors.iterator().next().getQuantitationType();
+ scExpressionExperimentService.addSingleCellDataVectors( ee, qt, vectors );
+
+ Collection vectors2 = createSingleCellVectors( true );
+ QuantitationType qt2 = vectors2.iterator().next().getQuantitationType();
+ scExpressionExperimentService.addSingleCellDataVectors( ee, qt2, vectors2 );
+ assertThat( qt.getIsPreferred() ).isFalse();
+ assertThat( qt2.getIsPreferred() ).isTrue();
+
+ // now we're going to do something really bad...
+ qt.setIsPreferred( true );
+
+ assertThatThrownBy( () -> scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) )
+ .isInstanceOf( NonUniqueResultException.class );
+ }
+
+ @Autowired
+ private ExperimentalFactorService experimentalFactorService;
+
+ @Test
+ public void testRecreateCellTypeFactor() {
+ when( experimentalFactorService.create( any( ExperimentalFactor.class ) ) ).thenAnswer( a -> a.getArgument( 0 ) );
+ Collection vectors = createSingleCellVectors( true );
+ scExpressionExperimentService.addSingleCellDataVectors( ee, vectors.iterator().next().getQuantitationType(), vectors );
+ ExperimentalFactor factor = scExpressionExperimentService.recreateCellTypeFactor( ee );
+ assertThat( factor.getCategory() ).isNotNull().satisfies( f -> {
+ assertThat( f.getCategory() ).isEqualTo( "cell type" );
+ assertThat( f.getCategoryUri() ).isEqualTo( "http://www.ebi.ac.uk/efo/EFO_0000324" );
+ } );
}
private SingleCellDimension createSingleCellDimension() {
@@ -358,9 +426,14 @@ private SingleCellDimension createSingleCellDimension() {
for ( int i = 0; i < ct.length; i++ ) {
ct[i] = i < 50 ? 0 : 1;
}
- scd.setCellTypes( ct );
- scd.setCellTypeLabels( Arrays.asList( "A", "B" ) );
- scd.setNumberOfCellTypeLabels( 2 );
+ CellTypeLabelling labelling = new CellTypeLabelling();
+ labelling.setPreferred( true );
+ labelling.setCellTypes( ct );
+ labelling.setCellTypeLabels( Arrays.asList(
+ Characteristic.Factory.newInstance( Categories.CELL_TYPE, "A", null ),
+ Characteristic.Factory.newInstance( Categories.CELL_TYPE, "B", null ) ) );
+ labelling.setNumberOfCellTypeLabels( 2 );
+ scd.getCellTypeLabellings().add( labelling );
scd.getBioAssays().addAll( ee.getBioAssays() );
scd.setBioAssaysOffset( new int[] { 0, 25, 50, 75 } );
return scd;