diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Categories.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Categories.java new file mode 100644 index 0000000000..0ef5cf8b75 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Categories.java @@ -0,0 +1,14 @@ +package ubic.gemma.model.common.description; + +import ubic.gemma.core.ontology.OntologyService; + +/** + * Enumeration of commonly used categories for referring to in the code. + *

+ * Entries here have corresponding declarations in {@code EFO.factor.categories.txt} and are also available via + * {@link OntologyService#getCategoryTerms()} in the form of ontology terms. + */ +public final class Categories { + + public static Category CELL_TYPE = new Category( "cell type", "http://www.ebi.ac.uk/efo/EFO_0000324" ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Category.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Category.java new file mode 100644 index 0000000000..8f353cf5c5 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Category.java @@ -0,0 +1,20 @@ +package ubic.gemma.model.common.description; + +import lombok.Value; + +import javax.annotation.Nullable; + +/** + * Represents a category. + *

+ * We intend to revamp the the characteristic hierarchy + * which will make categories persistent alongside {@link ubic.gemma.model.expression.experiment.Statement} and terms. + * @author poirigui + * @see Categories + */ +@Value +public class Category { + String category; + @Nullable + String categoryUri; +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java index d82c265c0d..9671e77387 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java @@ -240,8 +240,11 @@ public boolean equals( Object object ) { * the fields; we can't just compare the hashcodes because they also look at the id, so comparing one transient * and one persistent would always fail... */ - return CharacteristicUtils.equals( category, categoryUri, that.category, that.categoryUri ) - && CharacteristicUtils.equals( value, valueUri, that.value, that.valueUri ); + if ( !CharacteristicUtils.equals( category, categoryUri, that.category, that.categoryUri ) ) return false; + if ( valueUri != null ^ that.valueUri != null ) { + return false; // free-text v.s. ontology term, always false + } + return valueUri != null ? org.apache.commons.lang.StringUtils.equalsIgnoreCase( valueUri, that.valueUri ) : org.apache.commons.lang.StringUtils.equalsIgnoreCase( value, that.value ); } @Override @@ -291,6 +294,22 @@ public static Characteristic newInstance( String name, String description, Strin entity.setEvidenceCode( evidenceCode ); return entity; } + + public static Characteristic newInstance( Category category ) { + Characteristic entity = new Characteristic(); + entity.setCategory( category.getCategory() ); + entity.setCategoryUri( category.getCategoryUri() ); + return entity; + } + + public static Characteristic newInstance( Category category, String value, @Nullable String valueUri ) { + Characteristic entity = new Characteristic(); + entity.setCategory( category.getCategory() ); + entity.setCategoryUri( category.getCategoryUri() ); + entity.setValue( value ); + entity.setValueUri( StringUtils.stripToNull( valueUri ) ); + return entity; + } } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java new file mode 100644 index 0000000000..b67bf182e0 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java @@ -0,0 +1,57 @@ +package ubic.gemma.model.expression.bioAssayData; + +import lombok.Getter; +import lombok.Setter; +import org.springframework.util.Assert; +import ubic.gemma.model.analysis.Analysis; +import ubic.gemma.model.common.description.Characteristic; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +/** + * Represents the labelling of cell types. + */ +@Getter +@Setter +public class CellTypeLabelling extends Analysis { + + /** + * Indicate if this labelling is the preferred one. + */ + private boolean preferred; + + /** + * Cell types assignment to individual cells from the {@link #cellTypeLabels} collections. + */ + private int[] cellTypes; + + /** + * Cell type labels. + */ + private List cellTypeLabels; + + /** + * Number of distinct cell types. + *

+ * This must always be equal to number of distinct elements of {@link #cellTypeLabels}. + */ + private Integer numberOfCellTypeLabels; + + public Characteristic getCellTypeLabel( int index ) { + Assert.notNull( cellTypes, "No cell types have been assigned." ); + Assert.notNull( cellTypeLabels, "No cell labels exist." ); + return cellTypeLabels.get( cellTypes[index] ); + } + + @Override + public int hashCode() { + return Objects.hash( Arrays.hashCode( cellTypes ), cellTypeLabels ); + } + + @Override + public boolean equals( Object object ) { + return super.equals( object ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java index 9321cf15af..ea6eeac693 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java @@ -2,7 +2,6 @@ import lombok.Getter; import lombok.Setter; -import org.springframework.util.Assert; import ubic.gemma.core.util.ListUtils; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.expression.bioAssay.BioAssay; @@ -45,30 +44,10 @@ public class SingleCellDimension implements Identifiable { private int numberOfCells = 0; /** - * Cell types assignment to individual cells from the {@link #cellTypeLabels} collections. - *

- * If supplied, its size must be equal to that of {@link #cellIds}. - */ - @Nullable - private int[] cellTypes; - - /** - * Cell type labels, or null if unknown. - *

- * Those are user-supplied cell type identifiers. Its size must be equal to that of {@link #cellIds}. - *

- * This is stored as a compressed, gzipped blob in the database. See {@link CompressedStringListType} for more details. - */ - @Nullable - private List cellTypeLabels; - - /** - * Number of distinct cell types. - *

- * This must always be equal to number of distinct elements of {@link #cellTypes}. + * Set of cell types assignment to individual cells. This is empty if no cell types have been assigned and should + * always contain a preferred labelling as per {@link CellTypeLabelling#preferred} if non-empty. */ - @Nullable - private Integer numberOfCellTypeLabels; + private Set cellTypeLabellings = new HashSet<>(); /** * List of bioassays that each cell belongs to. @@ -107,19 +86,6 @@ public BioAssay getBioAssayByCellId( String cellId ) { return getBioAssay( getCellIndex( cellId ) ); } - public String getCellTypeLabel( int index ) { - Assert.notNull( cellTypes, "No cell types have been assigned." ); - Assert.notNull( cellTypeLabels, "No cell labels exist." ); - return cellTypeLabels.get( cellTypes[index] ); - } - - /** - * Obtain a cell type label by cell ID. - */ - public String getCellTypeLabelByCellId( String cellId ) { - return getCellTypeLabel( getCellIndex( cellId ) ); - } - private int getCellIndex( String cellId ) { if ( cellIdToIndex == null ) { cellIdToIndex = ListUtils.indexOfElements( cellIds ); @@ -137,7 +103,7 @@ public int hashCode() { return Objects.hash( id ); } // no need to hash numberOfCells, it's derived from cellIds's size - return Objects.hash( cellIds, Arrays.hashCode( cellTypes ), cellTypeLabels, bioAssays, Arrays.hashCode( bioAssaysOffset ) ); + return Objects.hash( cellIds, bioAssays, Arrays.hashCode( bioAssaysOffset ) ); } @Override @@ -149,10 +115,8 @@ public boolean equals( Object obj ) { SingleCellDimension scd = ( SingleCellDimension ) obj; if ( id != null && scd.id != null ) return id.equals( scd.id ); - return Objects.equals( cellTypeLabels, scd.cellTypeLabels ) - && Objects.equals( bioAssays, scd.bioAssays ) + return Objects.equals( bioAssays, scd.bioAssays ) && Arrays.equals( bioAssaysOffset, scd.bioAssaysOffset ) - && Arrays.equals( cellTypes, scd.cellTypes ) && Objects.equals( cellIds, scd.cellIds ); // this is the most expensive to compare } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java index e65426c4a4..bf253e1820 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java @@ -11,6 +11,7 @@ import ubic.gemma.model.expression.arrayDesign.TechnologyType; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; +import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.biomaterial.BioMaterial; @@ -312,12 +313,22 @@ Map> getSampleRemovalEvents( void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimension singleCellDimension ); + List getCellTypeLabellings( ExpressionExperiment ee ); + /** - * Replace the SCD of a given dataset. - * @param ee an expression experiment; its vectors will be refreshed - * @param dimension the existing dimension - * @param newDimension the new dimension - * @return the number of updated vectors as a result + * Obtain the preferred labelling of the preferred single-cell vectors. + * @throws org.springframework.dao.IncorrectResultSizeDataAccessException if there are multiple preferred cell-type + * labellings */ - int replaceSingleCellDimension( ExpressionExperiment ee, SingleCellDimension dimension, SingleCellDimension newDimension ); + @Nullable + CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ); + + /** + * Add the given cell type labelling to the single-cell dimension. + *

+ * If the new labelling is preferred, any existing one is marked as non-preferred. + */ + void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeLabelling cellTypeLabelling ); + + List getCellTypes( ExpressionExperiment ee ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index 9425aa54f4..ecc8156b9b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -45,6 +45,7 @@ import ubic.gemma.model.expression.arrayDesign.TechnologyType; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; +import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.biomaterial.BioMaterial; @@ -1949,9 +1950,8 @@ public void thawForFrontEnd( final ExpressionExperiment expressionExperiment ) { public List getSingleCellDimensions( ExpressionExperiment ee ) { //noinspection unchecked return getSessionFactory().getCurrentSession() - .createQuery( "select scedv.singleCellDimension from SingleCellExpressionDataVector scedv " - + "where scedv.expressionExperiment = :ee " - + "group by scedv.singleCellDimension" ) + .createQuery( "select distinct scedv.singleCellDimension from SingleCellExpressionDataVector scedv " + + "where scedv.expressionExperiment = :ee" ) .setParameter( "ee", ee ) .list(); } @@ -1967,18 +1967,55 @@ public void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimens } @Override - public int replaceSingleCellDimension( ExpressionExperiment ee, SingleCellDimension dimension, SingleCellDimension newDimension ) { - int updatedVectors = getSessionFactory().getCurrentSession() - .createQuery( "update SingleCellExpressionDataVector scd set scd.singleCellDimension = :newDimension where scd.singleCellDimension = :dim" ) - .setParameter( "dim", dimension ) - .setParameter( "newDimension", newDimension ) - .executeUpdate(); - if ( updatedVectors > 0 && Hibernate.isInitialized( ee.getSingleCellExpressionDataVectors() ) ) { - // will reload vectors with the updated SCD - // if the vectors are not initialized, they will be loaded with the updated SCD when they are accessed - getSessionFactory().getCurrentSession().refresh( ee ); + public List getCellTypeLabellings( ExpressionExperiment ee ) { + //noinspection unchecked + return getSessionFactory().getCurrentSession() + .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv " + + "join scedv.singleCellDimension scd " + + "join scd.cellTypeLabellings ctl " + + "where scedv.expressionExperiment = :ee" ) + .setParameter( "ee", ee ) + .list(); + } + + @Nullable + @Override + public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) { + return ( CellTypeLabelling ) getSessionFactory().getCurrentSession() + .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv " + + "join scedv.singleCellDimension scd " + + "join scd.cellTypeLabellings ctl " + + "where scedv.quantitationType.isPreferred = true and ctl.preferred = true and scedv.expressionExperiment = :ee" ) + .setParameter( "ee", ee ) + .uniqueResult(); + } + + @Override + public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling labelling ) { + if ( labelling.isPreferred() ) { + for ( CellTypeLabelling l : dimension.getCellTypeLabellings() ) { + if ( l.isPreferred() ) { + log.info( "Marking existing cell type labelling as non-preferred, a new preferred labelling will be added." ); + l.setPreferred( false ); + break; + } + } } - return updatedVectors; + getSessionFactory().getCurrentSession().persist( labelling ); + dimension.getCellTypeLabellings().add( labelling ); + } + + @Override + public List getCellTypes( ExpressionExperiment ee ) { + //noinspection unchecked + return getSessionFactory().getCurrentSession() + .createQuery( "select distinct ct from SingleCellExpressionDataVector scedv " + + "join scedv.singleCellDimension scd " + + "join scd.cellTypeLabellings ctl " + + "join ctl.cellTypeLabels ct " + + "where scedv.expressionExperiment = :ee and scedv.quantitationType.isPreferred = true and ctl.preferred = true" ) + .setParameter( "ee", ee ) + .list(); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java index b683e42bb9..0388b076ed 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java @@ -1,11 +1,16 @@ package ubic.gemma.persistence.service.expression.experiment; import org.springframework.security.access.annotation.Secured; +import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.protocol.Protocol; import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import javax.annotation.Nullable; import java.util.Collection; import java.util.List; @@ -39,8 +44,51 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua /** * Relabel the cell types of an existing set of single-cell vectors. - * @return a new dimension with the relabeled cell types, the original one is deleted + * @param newCellTypeLabels the new cell types labels, must match the number of cells + * @param labellingProtocol the protocol used to generate the new labelling, or null if unknown + * @return a new, preferred cell type labelling */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - SingleCellDimension relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels ); + CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol labellingProtocol, @Nullable String description ); + + /** + * Remove the given cell type labelling. + * + * If the cell type labelling is preferred and applies the the preferred vectors as per {@link #getPreferredCellTypeLabelling(ExpressionExperiment)}, the cell type factor will be removed. + */ + @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) + void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension scd, CellTypeLabelling cellTypeLabelling ); + + /** + * Obtain all the cell type labellings from all single-cell vectors. + */ + @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) + List getCellTypeLabellings( ExpressionExperiment ee ); + + /** + * Obtain the preferred cell type labelling from the preferred single-cell vectors. + */ + @Nullable + @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) + CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ); + + /** + * Obtain the cell types of a given single-cell dataset. + *

+ * Only the cell types applicable to the preferred single-cell vectors and labelling are returned. + */ + @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) + List getCellTypes( ExpressionExperiment ee ); + + /** + * Recreate the cell type factor based on the preferred labelling of the preferred single-cell vectors. + *

+ * Analyses involving the factor are removed and samples mentioning the factor values are updated as per + * {@link ExperimentalFactorService#remove(ExperimentalFactor)}. + * @return the created cell type factor + * @throws IllegalStateException if the dataset does not have a preferred cell type labelling for its preferred set + * of single-cell vectors + */ + @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) + ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java index 0dfc8db3d3..618e64e3d4 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java @@ -9,13 +9,18 @@ import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataReplacedEvent; +import ubic.gemma.model.common.description.Categories; +import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.description.CharacteristicUtils; +import ubic.gemma.model.common.protocol.Protocol; import ubic.gemma.model.common.quantitationtype.PrimitiveType; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; +import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.designElement.CompositeSequence; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.expression.experiment.*; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; import javax.annotation.Nullable; @@ -34,6 +39,9 @@ public class SingleCellExpressionExperimentServiceImpl implements SingleCellExpr @Autowired private ExpressionExperimentDao expressionExperimentDao; + @Autowired + private ExperimentalFactorService experimentalFactorService; + @Autowired private AuditTrailService auditTrailService; @@ -54,9 +62,11 @@ public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType ee, quantitationType ) ); validateSingleCellDataVectors( ee, quantitationType, vectors ); SingleCellDimension scd = vectors.iterator().next().getSingleCellDimension(); + boolean scdCreated = false; if ( scd.getId() == null ) { log.info( "Creating a new single-cell dimension for " + ee + ": " + scd ); expressionExperimentDao.createSingleCellDimension( ee, scd ); + scdCreated = true; } for ( SingleCellExpressionDataVector v : vectors ) { v.setExpressionExperiment( ee ); @@ -77,6 +87,16 @@ public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType } ee.getQuantitationTypes().add( quantitationType ); expressionExperimentDao.update( ee ); // will take care of creating vectors + if ( quantitationType.getIsPreferred() && scdCreated ) { + CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null ); + if ( preferredLabelling != null ) { + log.info( "New single-cell preferred vectors were added, recreating the cell type factor." ); + recreateCellTypeFactor( ee, preferredLabelling ); + } else { + log.info( "New single-cell preferred vectors do not have cell type labelling, removing any existing cell type factor..." ); + removeCellTypeFactorIfExists( ee ); + } + } auditTrailService.addUpdateEvent( ee, DataAddedEvent.class, String.format( "Added %d vectors for %s with dimension %s", numVectorsAdded, quantitationType, scd ) ); } @@ -114,6 +134,16 @@ public void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationT ee.getSingleCellExpressionDataVectors().addAll( vectors ); int numVectorsAdded = ee.getSingleCellExpressionDataVectors().size() - ( previousSize - numVectorsRemoved ); expressionExperimentDao.update( ee ); + if ( quantitationType.getIsPreferred() && scdCreated ) { + CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null ); + if ( preferredLabelling != null ) { + log.info( "Preferred single-cell vectors were replaced, recreating the cell type factor." ); + recreateCellTypeFactor( ee, preferredLabelling ); + } else { + log.info( "Preferred single-cell vectors do not have cell type labelling, removing any existing cell type factor..." ); + removeCellTypeFactorIfExists( ee ); + } + } auditTrailService.addUpdateEvent( ee, DataReplacedEvent.class, String.format( "Replaced %d vectors with %d vectors for %s with dimension %s.", numVectorsRemoved, numVectorsAdded, quantitationType, scd ) ); } @@ -224,31 +254,69 @@ public List getSingleCellDimensions( ExpressionExperiment e @Override @Transactional - public SingleCellDimension relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels ) { + public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, Protocol protocol, String description ) { Assert.notNull( ee.getId(), "Dataset must be persistent." ); Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." ); - SingleCellDimension newDimension = new SingleCellDimension(); - newDimension.getCellIds().addAll( dimension.getCellIds() ); - newDimension.setNumberOfCells( dimension.getNumberOfCells() ); - newDimension.getBioAssays().addAll( dimension.getBioAssays() ); - newDimension.setBioAssaysOffset( dimension.getBioAssaysOffset() ); + Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." ); + CellTypeLabelling labelling = new CellTypeLabelling(); + labelling.setPreferred( true ); + labelling.setProtocol( protocol ); + labelling.setDescription( description ); int[] ct = new int[dimension.getCellIds().size()]; List labels = newCellTypeLabels.stream().sorted().distinct().collect( Collectors.toList() ); for ( int i = 0; i < ct.length; i++ ) { ct[i] = Collections.binarySearch( labels, newCellTypeLabels.get( i ) ); } - newDimension.setCellTypes( ct ); - newDimension.setCellTypeLabels( labels ); - newDimension.setNumberOfCellTypeLabels( labels.size() ); - validateSingleCellDimension( ee, newDimension ); - expressionExperimentDao.createSingleCellDimension( ee, newDimension ); - int updatedVectors = expressionExperimentDao.replaceSingleCellDimension( ee, dimension, newDimension ); - if ( updatedVectors == 0 ) { - throw new IllegalStateException( "There are no vectors with the dimension: " + dimension + ", cannot relabel cell types." ); + labelling.setCellTypes( ct ); + labelling.setCellTypeLabels( labels.stream() + .map( l -> Characteristic.Factory.newInstance( Categories.CELL_TYPE, l, null ) ) + .collect( Collectors.toList() ) ); + labelling.setNumberOfCellTypeLabels( labels.size() ); + expressionExperimentDao.addCellTypeLabelling( ee, dimension, labelling ); + validateSingleCellDimension( ee, dimension ); + log.info( "Relabelled single-cell vectors for " + ee + " with: " + labelling ); + + // checking labelling.isPreferred() is not enough, the labelling might apply to non-preferred vectors + if ( labelling.equals( getPreferredCellTypeLabelling( ee ) ) ) { + log.info( "New labels are preferred and also apply to preferred single-cell vectors, recreating the cell type factor..." ); + recreateCellTypeFactor( ee, labelling ); + } + + return labelling; + } + + @Override + @Transactional + public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling cellTypeLabelling ) { + Assert.notNull( ee.getId(), "Dataset must be persistent." ); + Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." ); + Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." ); + Assert.isTrue( dimension.getCellTypeLabellings().contains( cellTypeLabelling ), + "The supplied labelling does not belong to the dimension." ); + boolean alsoRemoveFactor = cellTypeLabelling.equals( getPreferredCellTypeLabelling( ee ) ); + dimension.getCellTypeLabellings().remove( cellTypeLabelling ); + if ( alsoRemoveFactor ) { + log.info( "The preferred cell type labels have been removed, removing the cell type factor..." ); + removeCellTypeFactorIfExists( ee ); } - expressionExperimentDao.deleteSingleCellDimension( ee, dimension ); - log.info( "Relabelled " + updatedVectors + " single-cell vectors for " + ee + " with new dimension: " + newDimension ); - return newDimension; + } + + @Override + @Transactional(readOnly = true) + public List getCellTypeLabellings( ExpressionExperiment ee ) { + return expressionExperimentDao.getCellTypeLabellings( ee ); + } + + @Override + @Transactional(readOnly = true) + public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) { + return expressionExperimentDao.getPreferredCellTypeLabelling( ee ); + } + + @Override + @Transactional(readOnly = true) + public List getCellTypes( ExpressionExperiment ee ) { + return expressionExperimentDao.getCellTypes( ee ); } /** @@ -260,28 +328,75 @@ private void validateSingleCellDimension( ExpressionExperiment ee, SingleCellDim "Cell IDs must be unique." ); Assert.isTrue( scbad.getCellIds().size() == scbad.getNumberOfCells(), "The number of cell IDs must match the number of cells." ); - if ( scbad.getCellTypes() != null ) { - Assert.notNull( scbad.getNumberOfCellTypeLabels() ); - Assert.notNull( scbad.getCellTypeLabels() ); - Assert.isTrue( scbad.getCellTypes().length == scbad.getCellIds().size(), + Assert.isTrue( scbad.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).count() <= 1, + "There must be at most one preferred cell type labelling." ); + for ( CellTypeLabelling labelling : scbad.getCellTypeLabellings() ) { + Assert.notNull( labelling.getNumberOfCellTypeLabels() ); + Assert.notNull( labelling.getCellTypeLabels() ); + Assert.isTrue( labelling.getCellTypes().length == scbad.getCellIds().size(), "The number of cell types must match the number of cell IDs." ); - int numberOfCellTypeLabels = scbad.getCellTypeLabels().size(); + int numberOfCellTypeLabels = labelling.getCellTypeLabels().size(); Assert.isTrue( numberOfCellTypeLabels > 0, "There must be at least one cell type label declared in the cellTypeLabels collection." ); - Assert.isTrue( scbad.getCellTypeLabels().stream().distinct().count() == scbad.getCellTypeLabels().size(), + Assert.isTrue( labelling.getCellTypeLabels().stream().distinct().count() == labelling.getCellTypeLabels().size(), "Cell type labels must be unique." ); - Assert.isTrue( numberOfCellTypeLabels == scbad.getNumberOfCellTypeLabels(), + Assert.isTrue( numberOfCellTypeLabels == labelling.getNumberOfCellTypeLabels(), "The number of cell types must match the number of values the cellTypeLabels collection." ); - for ( int k : scbad.getCellTypes() ) { + for ( int k : labelling.getCellTypes() ) { Assert.isTrue( k >= 0 && k < numberOfCellTypeLabels, String.format( "Cell type vector values must be within the [%d, %d[ range.", 0, numberOfCellTypeLabels ) ); } - } else { - Assert.isNull( scbad.getCellTypeLabels() ); - Assert.isNull( scbad.getNumberOfCellTypeLabels(), "There is no cell types assigned, the number of cell types must be null." ); } Assert.isTrue( !scbad.getBioAssays().isEmpty(), "There must be at least one BioAssay." ); Assert.isTrue( ee.getBioAssays().containsAll( scbad.getBioAssays() ), "Not all supplied BioAssays belong to " + ee ); validateSparseRangeArray( scbad.getBioAssays(), scbad.getBioAssaysOffset(), scbad.getNumberOfCells() ); } + + + @Override + @Transactional + public ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee ) { + CellTypeLabelling ctl = getPreferredCellTypeLabelling( ee ); + Assert.notNull( ctl, "There must be a preferred cell type labelling for " + ee + " to update the cell type factor." ); + return recreateCellTypeFactor( ee, ctl ); + } + + private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, CellTypeLabelling ctl ) { + removeCellTypeFactorIfExists( ee ); + // create a new cell type factor + ExperimentalFactor cellTypeFactor = ExperimentalFactor.Factory.newInstance(); + cellTypeFactor.setType( FactorType.CATEGORICAL ); + cellTypeFactor.setCategory( Characteristic.Factory.newInstance( Categories.CELL_TYPE ) ); + cellTypeFactor.setExperimentalDesign( ee.getExperimentalDesign() ); + ee.getExperimentalDesign().getExperimentalFactors().add( cellTypeFactor ); + for ( Characteristic ct : ctl.getCellTypeLabels() ) { + FactorValue fv = new FactorValue(); + Statement s = new Statement(); + s.setCategory( ct.getCategory() ); + s.setCategoryUri( ct.getCategoryUri() ); + s.setSubject( ct.getValue() ); + s.setSubjectUri( ct.getValueUri() ); + fv.getCharacteristics().add( s ); + fv.setExperimentalFactor( cellTypeFactor ); + cellTypeFactor.getFactorValues().add( fv ); + } + + return experimentalFactorService.create( cellTypeFactor ); + } + + private void removeCellTypeFactorIfExists( ExpressionExperiment ee ) { + ExperimentalFactor existingCellTypeFactor = ee.getExperimentalDesign().getExperimentalFactors().stream() + .filter( ef -> ef.getCategory() != null ) + .filter( ef -> CharacteristicUtils.equals( ef.getCategory().getCategory(), ef.getCategory().getCategoryUri(), + Categories.CELL_TYPE.getCategory(), Categories.CELL_TYPE.getCategoryUri() ) ) + .findFirst() + .orElse( null ); + if ( existingCellTypeFactor != null ) { + // this will remove analysis involving the factor and also sample-fv associations + log.info( "Removing existing cell type factor for " + ee ); + experimentalFactorService.remove( existingCellTypeFactor ); + } else { + log.info( "There's no cell type factor for " + ee ); + } + } } diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml index d8b840107e..51f001613f 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml @@ -142,5 +142,28 @@ + + + + + + + + int + + + + + + + + + + + + + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml index 3afd549558..f673b92e86 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml @@ -20,24 +20,12 @@ - - - - int - - - - + + - - - - - - - - + + diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java index 5d415405c7..79728e4a46 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java @@ -2,6 +2,7 @@ import gemma.gsec.SecurityService; import org.apache.commons.lang3.RandomStringUtils; +import org.hibernate.NonUniqueResultException; import org.hibernate.SessionFactory; import org.junit.After; import org.junit.Before; @@ -18,13 +19,18 @@ import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataReplacedEvent; +import ubic.gemma.model.common.description.Categories; +import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.designElement.CompositeSequence; +import ubic.gemma.model.expression.experiment.ExperimentalDesign; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService; @@ -187,6 +193,7 @@ public void setUp() { } sessionFactory.getCurrentSession().persist( ad ); ee = new ExpressionExperiment(); + ee.setExperimentalDesign( new ExperimentalDesign() ); ee.setTaxon( taxon ); // TODO: model bioassays against sub-biomaterial to represent cell aggregates BioMaterial bm = BioMaterial.Factory.newInstance( "a", taxon ); @@ -218,8 +225,8 @@ public void testAddSingleCellDataVectors() { assertThat( scExpressionExperimentService.getSingleCellDimensions( ee ) ) .hasSize( 1 ) .allSatisfy( scd -> { - assertThat( scd.getCellTypeLabel( 0 ) ).isEqualTo( "A" ); - assertThat( scd.getCellTypeLabel( 50 ) ).isEqualTo( "B" ); + assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 0 ).getValue() ).isEqualTo( "A" ); + assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 50 ).getValue() ).isEqualTo( "B" ); } ); Collection vectors2 = createSingleCellVectors( true ); @@ -339,15 +346,76 @@ public void testRelabelCellTypes() { SingleCellDimension scd = vectors.iterator().next().getSingleCellDimension(); scExpressionExperimentService.addSingleCellDataVectors( ee, qt, vectors ); sessionFactory.getCurrentSession().flush(); + assertThat( scExpressionExperimentService.getCellTypeLabellings( ee ) ) + .hasSize( 1 ); + assertThat( scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) ).isNotNull(); + assertThat( scExpressionExperimentService.getCellTypes( ee ) ).hasSize( 2 ) + .extracting( Characteristic::getValue ) + .containsExactlyInAnyOrder( "A", "B" ); String[] ct = new String[100]; for ( int i = 0; i < ct.length; i++ ) { ct[i] = i < 75 ? "A" : "B"; } - SingleCellDimension newScd = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ) ); - assertThat( newScd.getId() ).isNotNull(); + CellTypeLabelling newLabelling = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ), null, null ); + assertThat( newLabelling ).satisfies( l -> { + assertThat( l.getId() ).isNotNull(); + assertThat( l.isPreferred() ).isTrue(); + } ); assertThat( ee.getSingleCellExpressionDataVectors() ) .hasSize( 10 ) - .allSatisfy( v -> assertThat( v.getSingleCellDimension() ).isEqualTo( newScd ) ); + .allSatisfy( v -> assertThat( v.getSingleCellDimension().getCellTypeLabellings() ).contains( newLabelling ) ); + assertThat( scExpressionExperimentService.getCellTypeLabellings( ee ) ) + .hasSize( 1 ) + .contains( newLabelling ); + assertThat( scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) ).isEqualTo( newLabelling ); + assertThat( scExpressionExperimentService.getCellTypes( ee ) ).hasSize( 2 ) + .extracting( Characteristic::getValue ) + .containsExactlyInAnyOrder( "A", "B" ); + + scExpressionExperimentService.removeCellTypeLabels( ee, scd, newLabelling ); + assertThat( scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) ).isNull(); + + // FIXME: add proper assertions for the created factor, but the ExperimentalFactorService is mocked + verify( experimentalFactorService, times( 2 ) ).create( any( ExperimentalFactor.class ) ); + verify( experimentalFactorService, times( 2 ) ).remove( any( ExperimentalFactor.class ) ); + } + + /** + * This a behaviour test when the labelling is not unique. This can be caused by multiple preferred single-cell QTs + * or multiple preferred cell type labellings. + */ + @Test + public void testGetPreferredCellTypeLabellingWhenNonUnique() { + Collection vectors = createSingleCellVectors( true ); + QuantitationType qt = vectors.iterator().next().getQuantitationType(); + scExpressionExperimentService.addSingleCellDataVectors( ee, qt, vectors ); + + Collection vectors2 = createSingleCellVectors( true ); + QuantitationType qt2 = vectors2.iterator().next().getQuantitationType(); + scExpressionExperimentService.addSingleCellDataVectors( ee, qt2, vectors2 ); + assertThat( qt.getIsPreferred() ).isFalse(); + assertThat( qt2.getIsPreferred() ).isTrue(); + + // now we're going to do something really bad... + qt.setIsPreferred( true ); + + assertThatThrownBy( () -> scExpressionExperimentService.getPreferredCellTypeLabelling( ee ) ) + .isInstanceOf( NonUniqueResultException.class ); + } + + @Autowired + private ExperimentalFactorService experimentalFactorService; + + @Test + public void testRecreateCellTypeFactor() { + when( experimentalFactorService.create( any( ExperimentalFactor.class ) ) ).thenAnswer( a -> a.getArgument( 0 ) ); + Collection vectors = createSingleCellVectors( true ); + scExpressionExperimentService.addSingleCellDataVectors( ee, vectors.iterator().next().getQuantitationType(), vectors ); + ExperimentalFactor factor = scExpressionExperimentService.recreateCellTypeFactor( ee ); + assertThat( factor.getCategory() ).isNotNull().satisfies( f -> { + assertThat( f.getCategory() ).isEqualTo( "cell type" ); + assertThat( f.getCategoryUri() ).isEqualTo( "http://www.ebi.ac.uk/efo/EFO_0000324" ); + } ); } private SingleCellDimension createSingleCellDimension() { @@ -358,9 +426,14 @@ private SingleCellDimension createSingleCellDimension() { for ( int i = 0; i < ct.length; i++ ) { ct[i] = i < 50 ? 0 : 1; } - scd.setCellTypes( ct ); - scd.setCellTypeLabels( Arrays.asList( "A", "B" ) ); - scd.setNumberOfCellTypeLabels( 2 ); + CellTypeLabelling labelling = new CellTypeLabelling(); + labelling.setPreferred( true ); + labelling.setCellTypes( ct ); + labelling.setCellTypeLabels( Arrays.asList( + Characteristic.Factory.newInstance( Categories.CELL_TYPE, "A", null ), + Characteristic.Factory.newInstance( Categories.CELL_TYPE, "B", null ) ) ); + labelling.setNumberOfCellTypeLabels( 2 ); + scd.getCellTypeLabellings().add( labelling ); scd.getBioAssays().addAll( ee.getBioAssays() ); scd.setBioAssaysOffset( new int[] { 0, 25, 50, 75 } ); return scd;