From d992d87d0687e460c095a32458b75f4753abb351 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 22 Apr 2024 16:29:17 -0700 Subject: [PATCH 01/19] Update versions for hotfix --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d2d59c4b..4afe4b2e 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ baseCode baseCode baseCode - 1.1.21 + 1.1.22-SNAPSHOT 2003 From 762d28b3cc29e07a6cb91ac13abf3853c7f466fb Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 22 Apr 2024 15:17:25 -0700 Subject: [PATCH 02/19] Few performance optimization based on Gemma profiling Avoid using getLabel() when hashing or comparing ontology resources unless both URIs are null. Lazily cache a label in getLabel(). Ensure that parents or children are collected in a set. --- .../jena/AbstractOntologyResource.java | 25 +++++++++---- .../ontology/jena/AnnotationPropertyImpl.java | 37 +++++++++---------- .../basecode/ontology/jena/JenaUtils.java | 12 +++--- .../jena/RestrictionWithValuesFromFilter.java | 6 +-- 4 files changed, 44 insertions(+), 36 deletions(-) diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java b/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java index 98ccca73..bd06b312 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java @@ -45,6 +45,9 @@ abstract class AbstractOntologyResource implements OntologyResource { @Nullable private final Double score; + private String _label; + private boolean _isLabelNull = false; + protected AbstractOntologyResource( OntResource resource ) { this.res = resource; this.score = null; @@ -67,10 +70,15 @@ public String getLocalName() { @Override public String getLabel() { + if ( _label != null || _isLabelNull ) { + return _label; + } String label = res.getLabel( "EN" ); if ( label == null ) { label = res.getLabel( null ); } + _label = label; + _isLabelNull = label == null; return label; } @@ -104,19 +112,20 @@ public int compareTo( OntologyResource other ) { public boolean equals( Object obj ) { if ( this == obj ) return true; if ( obj == null ) return false; - if ( getClass() != obj.getClass() ) return false; + if ( !( obj instanceof OntologyResource ) ) { + return false; + } final OntologyResource other = ( OntologyResource ) obj; - if ( getLabel() == null ) { - if ( other.getLabel() != null ) return false; - } else if ( !getLabel().equals( other.getLabel() ) ) return false; - if ( getUri() == null ) { - return other.getUri() == null; - } else return getUri().equals( other.getUri() ); + if ( getUri() == null && other.getUri() == null ) { + return Objects.equals( getLabel(), other.getLabel() ); + } else { + return Objects.equals( getUri(), other.getUri() ); + } } @Override public int hashCode() { - return Objects.hash( getLabel(), getUri() ); + return Objects.hash( getUri() ); } @Override diff --git a/src/ubic/basecode/ontology/jena/AnnotationPropertyImpl.java b/src/ubic/basecode/ontology/jena/AnnotationPropertyImpl.java index e920ec21..2e008f0d 100644 --- a/src/ubic/basecode/ontology/jena/AnnotationPropertyImpl.java +++ b/src/ubic/basecode/ontology/jena/AnnotationPropertyImpl.java @@ -25,6 +25,7 @@ import ubic.basecode.ontology.model.AnnotationProperty; import javax.annotation.Nullable; +import java.util.Objects; /** * Note that this is a concrete instance of the annotation. @@ -47,20 +48,6 @@ public AnnotationPropertyImpl( com.hp.hpl.jena.ontology.AnnotationProperty prop, this.object = object; } - @Override - public boolean equals( @Nullable Object obj ) { - if ( this == obj ) return true; - if ( obj == null ) return false; - if ( getClass() != obj.getClass() ) return false; - final AnnotationPropertyImpl other = ( AnnotationPropertyImpl ) obj; - if ( object == null ) { - if ( other.object != null ) return false; - } else if ( !object.equals( other.object ) ) return false; - if ( property == null ) { - return other.property == null; - } else return property.equals( other.property ); - } - @Override public String getProperty() { if ( property.getLabel( null ) != null ) { @@ -95,13 +82,25 @@ public boolean isObsolete() { return super.isObsolete() || property.hasSuperProperty( OBO.ObsoleteProperty, false ); } + @Override + public boolean equals( @Nullable Object obj ) { + if ( this == obj ) return true; + if ( obj == null ) return false; + if ( obj instanceof AnnotationPropertyImpl ) { + final AnnotationPropertyImpl other = ( AnnotationPropertyImpl ) obj; + return super.equals( other ) + && Objects.equals( object, other.object ); + } else if ( obj instanceof AnnotationProperty ) { + final AnnotationProperty other = ( AnnotationProperty ) obj; + return super.equals( other ) + && Objects.equals( getContents(), other.getContents() ); + } + return false; + } + @Override public int hashCode() { - final int PRIME = 31; - int result = 1; - result = PRIME * result + ( ( object == null ) ? 0 : object.hashCode() ); - result = PRIME * result + ( ( property == null ) ? 0 : property.hashCode() ); - return result; + return Objects.hash( super.hashCode(), object ); } @Override diff --git a/src/ubic/basecode/ontology/jena/JenaUtils.java b/src/ubic/basecode/ontology/jena/JenaUtils.java index 85284ca4..11edc116 100644 --- a/src/ubic/basecode/ontology/jena/JenaUtils.java +++ b/src/ubic/basecode/ontology/jena/JenaUtils.java @@ -46,7 +46,7 @@ private static Collection getParentsInternal( OntModel model, Collecti .map( t -> as( t, OntClass.class ) ) .filter( Optional::isPresent ) .map( Optional::get ) - .collect( Collectors.toList() ); + .collect( Collectors.toSet() ); if ( ontClasses.isEmpty() ) { return Collections.emptySet(); } @@ -104,18 +104,18 @@ public static Collection getChildren( OntModel model, Collection getChildrenInternal( OntModel model, Collection terms, boolean direct, @Nullable Set additionalRestrictions ) { - terms = terms.stream() + Set termsSet = terms.stream() .map( t -> t.inModel( model ) ) .filter( t -> t.canAs( OntClass.class ) ) .map( t -> as( t, OntClass.class ) ) .filter( Optional::isPresent ) .map( Optional::get ) - .collect( Collectors.toList() ); - if ( terms.isEmpty() ) { + .collect( Collectors.toSet() ); + if ( termsSet.isEmpty() ) { return Collections.emptySet(); } StopWatch timer = StopWatch.createStarted(); - Iterator it = terms.iterator(); + Iterator it = termsSet.iterator(); ExtendedIterator iterator = it.next().listSubClasses( direct ); while ( it.hasNext() ) { iterator = iterator.andThen( it.next().listSubClasses( direct ) ); @@ -132,7 +132,7 @@ public static Collection getChildrenInternal( OntModel model, Collecti subClassOf = ResourceFactory.createProperty( makeDirect( subClassOf.getURI() ) ); } Set restrictions = UniqueExtendedIterator.create( additionalRestrictions.iterator() ) - .filterKeep( new RestrictionWithValuesFromFilter( terms ) ) + .filterKeep( new RestrictionWithValuesFromFilter( termsSet ) ) .toSet(); for ( Restriction r : restrictions ) { result.addAll( model.listResourcesWithProperty( subClassOf, r ) diff --git a/src/ubic/basecode/ontology/jena/RestrictionWithValuesFromFilter.java b/src/ubic/basecode/ontology/jena/RestrictionWithValuesFromFilter.java index 1eafd03c..4b3e645c 100644 --- a/src/ubic/basecode/ontology/jena/RestrictionWithValuesFromFilter.java +++ b/src/ubic/basecode/ontology/jena/RestrictionWithValuesFromFilter.java @@ -4,16 +4,16 @@ import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.util.iterator.Filter; -import java.util.Collection; +import java.util.Set; /** * Match {@link Restriction} with values from any of the given resources. */ class RestrictionWithValuesFromFilter extends Filter { - private final Collection resource; + private final Set resource; - public RestrictionWithValuesFromFilter( Collection resource ) { + public RestrictionWithValuesFromFilter( Set resource ) { this.resource = resource; } From c438cc1bf03285201922baab5d2305b2b0829b2f Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 23 Apr 2024 15:55:50 -0700 Subject: [PATCH 03/19] Remove rdfs:comment from list of indexed statements --- src/ubic/basecode/ontology/jena/IndexerSelector.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ubic/basecode/ontology/jena/IndexerSelector.java b/src/ubic/basecode/ontology/jena/IndexerSelector.java index cf52a0ba..2be1be42 100644 --- a/src/ubic/basecode/ontology/jena/IndexerSelector.java +++ b/src/ubic/basecode/ontology/jena/IndexerSelector.java @@ -43,7 +43,6 @@ class IndexerSelector implements Selector { static { wantedForIndexing = new HashSet<>(); wantedForIndexing.add( RDFS.label ); - wantedForIndexing.add( RDFS.comment ); wantedForIndexing.add( OBO.id ); wantedForIndexing.add( OBO.hasDbXref ); From 0e73768b09388e94bab012e44fdf752d567fc855 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 24 Apr 2024 16:03:01 -0700 Subject: [PATCH 04/19] Update commons-io to 2.16.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 4afe4b2e..0f5bee2f 100644 --- a/pom.xml +++ b/pom.xml @@ -81,7 +81,7 @@ commons-io commons-io - 2.15.1 + 2.16.1 org.apache.commons From 5a7f0b47fbe7078a22f309c707860276e1d531e3 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 25 Apr 2024 13:59:35 -0700 Subject: [PATCH 05/19] Simplify how baseCode can is configured Only read properties from 'basecode.properties', system properties starting with 'basecode.' and properties set at runtime. --- src/ubic/basecode/util/Configuration.java | 135 ++++++++---------- src/ubic/basecode/util/r/AbstractRClient.java | 2 +- src/ubic/basecode/util/r/RServeClient.java | 37 ++--- 3 files changed, 68 insertions(+), 106 deletions(-) diff --git a/src/ubic/basecode/util/Configuration.java b/src/ubic/basecode/util/Configuration.java index 3f5c030b..19641797 100644 --- a/src/ubic/basecode/util/Configuration.java +++ b/src/ubic/basecode/util/Configuration.java @@ -18,112 +18,95 @@ */ package ubic.basecode.util; -import java.util.Iterator; - -import org.apache.commons.configuration2.CompositeConfiguration; -import org.apache.commons.configuration2.PropertiesConfiguration; -import org.apache.commons.configuration2.SystemConfiguration; -import org.apache.commons.configuration2.ex.ConfigurationException; -import org.apache.commons.configuration2.io.FileHandler; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; + /** * Configuration of ontology services and other things. + *

+ * Configurations are retrieved from three locations: properties set at runtime with {@link #setString(String, String)}, + * system properties and a default properties file named {@code basecode.properties} at the root of the classpath in + * that order. + *

+ * Properties set via system properties must be prefixed with {@code basecode.} to be considered. + *

+ * Properties set at runtime can be reset with {@link #reset()} and {@link #reset(String)}. * * @author paul - * */ public class Configuration { - private static CompositeConfiguration config; - - /** - * Name of the resource containing defaults - */ - private static final String DEFAULT_CONFIGURATION = "ontology.properties"; - - private static Logger log = LoggerFactory.getLogger( Configuration.class ); + private static final Logger log = LoggerFactory.getLogger( Configuration.class ); - /** - * The name of the file users can use to customize. - */ - private static final String USER_CONFIGURATION = "basecode.properties"; + private static final String SYSTEM_PROPERTY_PREFIX = "basecode."; + private static final Properties defaultProps = new Properties(); + private static final Properties props = new Properties(); static { - - config = new CompositeConfiguration(); - config.addConfiguration( new SystemConfiguration() ); - - /* - * the order matters - first come, first serve. Items added later do not overwrite items defined earlier. Thus - * the user configuration has to be listed first. - */ - - try { - // purely for backwards compatibility, if the user hasn't set up ontology.properties. - PropertiesConfiguration pc = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( pc ); - handler.setFileName( "Gemma.properties" ); - handler.load(); - config.addConfiguration( pc ); - } catch ( ConfigurationException e ) { + try ( InputStream is = Configuration.class.getResourceAsStream( "/basecode.properties" ) ) { + if ( is != null ) { + defaultProps.load( is ); + } else { + log.warn( "No basecode.properties was found in the classpath, only system and manually set properties will be considered." ); + } + } catch ( IOException e ) { + throw new RuntimeException( e ); } + } - try { - PropertiesConfiguration pc = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( pc ); - handler.setFileName( USER_CONFIGURATION ); - handler.load(); - config.addConfiguration( pc ); - } catch ( ConfigurationException e ) { + /** + * Obtain a configuration value by key. + */ + @Nullable + public static String getString( String key ) { + String val = props.getProperty( key ); + if ( val == null ) { + val = System.getProperty( SYSTEM_PROPERTY_PREFIX + key ); } - - try { - PropertiesConfiguration pc = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( pc ); - handler.setFileName( DEFAULT_CONFIGURATION ); - handler.load(); - config.addConfiguration( pc ); - } catch ( ConfigurationException e ) { - log.error( DEFAULT_CONFIGURATION + " is missing, ontology loading may fail" ); + if ( val == null ) { + val = defaultProps.getProperty( key ); } + return val; + } - // step through the result and do a final round of variable substitution - for ( Iterator it = config.getKeys(); it.hasNext(); ) { - String key = it.next(); - String property = config.getString( key ); - if ( property != null && property.startsWith( "${" ) && property.endsWith( "}" ) ) { - String keyToSubstitute = property.substring( 2, property.length() - 1 ); - String valueToSubstitute = config.getString( keyToSubstitute ); - log.debug( key + "=" + property + " -> " + valueToSubstitute ); - config.setProperty( key, valueToSubstitute ); - } + /** + * Obtain a boolean configuration value by key. + * + * @see Boolean#parseBoolean(String) + */ + @Nullable + public static Boolean getBoolean( String key ) { + String val = getString( key ); + if ( val != null ) { + return Boolean.parseBoolean( val ); + } else { + return null; } - } /** - * @param key - * @return + * Set a configuration by key. */ - public static boolean getBoolean( String key ) { - return config.getBoolean( key, false ); + public static void setString( String key, String value ) { + props.setProperty( key, value ); } /** - * @param key - * @return + * Reset all configurations set at runtime. */ - public static String getString( String key ) { - return config.getString( key ); + public static void reset() { + props.clear(); } /** - * @param key - * @return + * Reset a specific configuration by key. */ - public static void setString( String key, Object value ) { - config.setProperty( key, value ); + public static void reset( String key ) { + props.remove( key ); } } diff --git a/src/ubic/basecode/util/r/AbstractRClient.java b/src/ubic/basecode/util/r/AbstractRClient.java index 65efc7b1..645659d9 100644 --- a/src/ubic/basecode/util/r/AbstractRClient.java +++ b/src/ubic/basecode/util/r/AbstractRClient.java @@ -622,7 +622,7 @@ public List listEval( Class listEntryType, String command ) { public boolean loadLibrary( String libraryName ) { try { - String userLibPath = Configuration.getString( "basecode.rlibpath" ); + String userLibPath = Configuration.getString( "rlibpath" ); if ( StringUtils.isNotBlank( userLibPath ) ) { voidEval( ".libPaths(" + userLibPath + ")" ); } diff --git a/src/ubic/basecode/util/r/RServeClient.java b/src/ubic/basecode/util/r/RServeClient.java index 0f823515..7fe5f4d9 100644 --- a/src/ubic/basecode/util/r/RServeClient.java +++ b/src/ubic/basecode/util/r/RServeClient.java @@ -18,15 +18,6 @@ */ package ubic.basecode.util.r; -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.util.Iterator; -import java.util.List; - -import org.apache.commons.configuration2.PropertiesConfiguration; -import org.apache.commons.configuration2.ex.ConfigurationException; -import org.apache.commons.configuration2.io.FileHandler; import org.apache.commons.lang3.StringUtils; import org.rosuda.REngine.REXP; import org.rosuda.REngine.REXPMismatchException; @@ -34,10 +25,14 @@ import org.rosuda.REngine.RList; import org.rosuda.REngine.Rserve.RConnection; import org.rosuda.REngine.Rserve.RserveException; - import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix; import ubic.basecode.dataStructure.matrix.DoubleMatrix; -import ubic.basecode.util.ConfigUtils; +import ubic.basecode.util.Configuration; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; /** * @author pavlidis @@ -56,24 +51,8 @@ public class RServeClient extends AbstractRClient { private final static String os = System.getProperty( "os.name" ).toLowerCase(); - /** - * @return - * @throws ConfigurationException - */ - protected static String findRserveCommand() throws ConfigurationException { - URL userSpecificConfigFileLocation = ConfigUtils.locate( "local.properties" ); - - PropertiesConfiguration userConfig = null; - if ( userSpecificConfigFileLocation != null ) { - userConfig = new PropertiesConfiguration(); - FileHandler handler = new FileHandler( userConfig ); - handler.setFileName( "local.properties" ); - handler.load(); - } - String rserveExecutable = null; - if ( userConfig != null ) { - rserveExecutable = userConfig.getString( "rserve.start.command" ); - } + protected static String findRserveCommand() { + String rserveExecutable = Configuration.getString( "rserve.start.command" ); if ( StringUtils.isBlank( rserveExecutable ) ) { log.info( "Rserve command not configured? Trying fallbacks" ); if ( os.startsWith( "windows" ) ) { // lower cased From 1afc021ba0ae80e35c9e03de0c07f41f104924af Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 24 Apr 2024 14:01:09 -0700 Subject: [PATCH 06/19] Few improvements and refactor for ontologies Remove jena-arq and jena-larq dependencies by reimplementing the indexing of statements. Update jena to 2.13.0. Wrap search results in a OntologySearchResult class so that we don't have to add an optional score field in the OntologyResource interface. Add capabilities for excluding words from stemming. --- pom.xml | 45 +-- ...ntology.properties => basecode.properties} | 0 .../jena/AbstractOntologyResource.java | 26 -- .../jena/AbstractOntologyService.java | 282 +++++++---------- src/ubic/basecode/ontology/jena/BFO.java | 8 - .../ontology/jena/IndexerSelector.java | 121 ------- .../basecode/ontology/jena/JenaUtils.java | 2 +- .../ontology/jena/ObjectPropertyImpl.java | 2 +- .../ontology/jena/OntologyIndexer.java | 295 ++++++++++++------ .../ontology/jena/OntologyIndividualImpl.java | 6 - .../ontology/jena/OntologyLoader.java | 14 +- .../ontology/jena/OntologySearch.java | 148 --------- .../jena/OntologySearchJenaException.java | 22 -- .../ontology/jena/OntologyTermImpl.java | 6 - src/ubic/basecode/ontology/jena/RO.java | 4 + .../basecode/ontology/jena/SearchIndex.java | 117 ++++--- .../ontology/model/OntologyResource.java | 8 +- .../ontology/model/OntologyTermSimple.java | 12 - .../ontology/providers/OntologyService.java | 26 +- .../ontology/search/OntologySearch.java | 32 -- .../ontology/search/OntologySearchResult.java | 51 +++ .../ontology/AbstractOntologyTest.java | 7 +- .../ontology/jena/OntologyLoaderTest.java | 3 +- .../ontology/jena/OntologySearchTest.java | 155 +++++---- .../jena/UberonOntologySearchTest.java | 15 +- .../providers/GenericOntologyServiceTest.java | 3 +- .../ontology/providers/ObiServiceTest.java | 7 +- .../providers/UberonOntologyServiceTest.java | 10 +- 28 files changed, 564 insertions(+), 863 deletions(-) rename src/{ontology.properties => basecode.properties} (100%) delete mode 100644 src/ubic/basecode/ontology/jena/BFO.java delete mode 100644 src/ubic/basecode/ontology/jena/IndexerSelector.java delete mode 100644 src/ubic/basecode/ontology/jena/OntologySearch.java delete mode 100644 src/ubic/basecode/ontology/jena/OntologySearchJenaException.java delete mode 100644 src/ubic/basecode/ontology/search/OntologySearch.java create mode 100644 src/ubic/basecode/ontology/search/OntologySearchResult.java diff --git a/pom.xml b/pom.xml index 0f5bee2f..d319b159 100644 --- a/pom.xml +++ b/pom.xml @@ -144,45 +144,7 @@ org.apache.jena jena-core - 2.7.4 - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - - - - - - org.apache.jena - jena-larq - 1.0.0-incubating - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - icu4j - com.ibm.icu - - - - - org.apache.jena - jena-arq - 2.9.4 + 2.13.0 log4j @@ -264,6 +226,11 @@ 4.11.0 test + + org.assertj + assertj-core + 3.25.3 + org.apache.logging.log4j log4j-core diff --git a/src/ontology.properties b/src/basecode.properties similarity index 100% rename from src/ontology.properties rename to src/basecode.properties diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java b/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java index bd06b312..2873264e 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyResource.java @@ -25,11 +25,8 @@ import ubic.basecode.ontology.model.OntologyResource; import javax.annotation.Nullable; -import java.util.Comparator; import java.util.Objects; -import static java.util.Comparator.*; - /** * @author pavlidis */ @@ -37,25 +34,13 @@ abstract class AbstractOntologyResource implements OntologyResource { protected static final Logger log = LoggerFactory.getLogger( AbstractOntologyResource.class ); - private static final Comparator comparator = Comparator - .comparing( OntologyResource::getScore, nullsLast( reverseOrder() ) ) - .thenComparing( OntologyResource::getUri, nullsLast( naturalOrder() ) ); - private final OntResource res; - @Nullable - private final Double score; private String _label; private boolean _isLabelNull = false; protected AbstractOntologyResource( OntResource resource ) { this.res = resource; - this.score = null; - } - - public AbstractOntologyResource( OntResource resource, double score ) { - this.res = resource; - this.score = score; } @Override @@ -97,17 +82,6 @@ public boolean isObsolete() { return res.hasLiteral( OWL2.deprecated, true ); } - @Override - @Nullable - public Double getScore() { - return score; - } - - @Override - public int compareTo( OntologyResource other ) { - return Objects.compare( this, other, comparator ); - } - @Override public boolean equals( Object obj ) { if ( this == obj ) return true; diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java index 4caa2851..7a430f9c 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java @@ -20,12 +20,12 @@ package ubic.basecode.ontology.jena; import com.hp.hpl.jena.ontology.*; -import com.hp.hpl.jena.rdf.arp.ARPErrorNumbers; -import com.hp.hpl.jena.rdf.arp.ParseException; import com.hp.hpl.jena.rdf.model.NodeIterator; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdfxml.xmlinput.ARPErrorNumbers; +import com.hp.hpl.jena.rdfxml.xmlinput.ParseException; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.vocabulary.DC_11; import org.apache.commons.lang3.RandomStringUtils; @@ -39,6 +39,7 @@ import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.providers.OntologyService; import ubic.basecode.ontology.search.OntologySearchException; +import ubic.basecode.ontology.search.OntologySearchResult; import ubic.basecode.util.Configuration; import javax.annotation.Nullable; @@ -47,9 +48,6 @@ import java.io.InterruptedIOException; import java.nio.channels.ClosedByInterruptException; import java.util.*; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -72,26 +70,22 @@ public abstract class AbstractOntologyService implements OntologyService { static { DEFAULT_ADDITIONAL_PROPERTIES = new HashSet<>(); - DEFAULT_ADDITIONAL_PROPERTIES.add( BFO.partOf.getURI() ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.partOf.getURI() ); DEFAULT_ADDITIONAL_PROPERTIES.add( RO.properPartOf.getURI() ); } /** - * Lock used to prevent reads while the ontology is being initialized. - */ - private final ReadWriteLock rwLock = new ReentrantReadWriteLock(); - - /** - * Internal state protected by {@link #rwLock}. + * Internal state. */ @Nullable - private State state = null; + private volatile State state = null; /* settings (applicable for next initialization) */ private LanguageLevel languageLevel = LanguageLevel.FULL; private InferenceMode inferenceMode = InferenceMode.TRANSITIVE; private boolean processImports = true; private boolean searchEnabled = true; + private Set excludedWordsFromStemming = Collections.emptySet(); private Set additionalPropertyUris = DEFAULT_ADDITIONAL_PROPERTIES; @Override @@ -150,6 +144,16 @@ public void setSearchEnabled( boolean searchEnabled ) { this.searchEnabled = searchEnabled; } + @Override + public Set getExcludedWordsFromStemming() { + return getState().map( state -> state.excludedWordsFromStemming ).orElse( excludedWordsFromStemming ); + } + + @Override + public void setExcludedWordsFromStemming( Set excludedWordsFromStemming ) { + this.excludedWordsFromStemming = excludedWordsFromStemming; + } + @Override public Set getAdditionalPropertyUris() { return getState().map( state -> state.additionalPropertyUris ).orElse( additionalPropertyUris ); @@ -168,7 +172,7 @@ public void initialize( InputStream stream, boolean forceIndexing ) { initialize( stream, true, forceIndexing ); } - private void initialize( @Nullable InputStream stream, boolean forceLoad, boolean forceIndexing ) { + private synchronized void initialize( @Nullable InputStream stream, boolean forceLoad, boolean forceIndexing ) { if ( !forceLoad && state != null ) { log.warn( "{} is already loaded, and force=false, not restarting", this ); return; @@ -184,6 +188,7 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea InferenceMode inferenceMode = this.inferenceMode; boolean processImports = this.processImports; boolean searchEnabled = this.searchEnabled; + Set excludedWordsFromStemming = this.excludedWordsFromStemming; // Detect configuration problems. if ( StringUtils.isBlank( ontologyUrl ) ) { @@ -211,7 +216,7 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea SearchIndex index; // loading the model from disk or URL is lengthy - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; try { @@ -223,6 +228,8 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea } } catch ( Exception e ) { if ( isCausedByInterrupt( e ) ) { + // make sure that the thread is interrupted + Thread.currentThread().interrupt(); return; } else { throw new RuntimeException( String.format( "Failed to load ontology model for %s.", this ), e ); @@ -230,7 +237,7 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea } // retrieving restrictions is lengthy - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; // compute additional restrictions @@ -239,18 +246,17 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea .toSet(); // indexing is lengthy, don't bother if we're interrupted - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; if ( searchEnabled && cacheName != null ) { //Checks if the current ontology has changed since it was last loaded. boolean changed = OntologyLoader.hasChanged( cacheName ); - boolean indexExists = OntologyIndexer.getSubjectIndex( cacheName ) != null; + boolean indexExists = OntologyIndexer.getSubjectIndex( cacheName, excludedWordsFromStemming ) != null; boolean forceReindexing = forceLoad && forceIndexing; // indexing is slow, don't do it if we don't have to. try { - index = OntologyIndexer.indexOntology( cacheName, model, - forceReindexing || changed || !indexExists ); + index = OntologyIndexer.indexOntology( cacheName, model, excludedWordsFromStemming, forceReindexing || changed || !indexExists ); } catch ( Exception e ) { if ( isCausedByInterrupt( e ) ) { return; @@ -263,23 +269,17 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea } // if interrupted, we don't need to replace the model and clear the *old* cache - if ( checkIfInterrupted() ) + if ( Thread.currentThread().isInterrupted() ) return; - Lock lock = rwLock.writeLock(); - try { - lock.lock(); - this.state = new State( model, index, additionalRestrictions, languageLevel, inferenceMode, processImports, additionalProperties.stream().map( Property::getURI ).collect( Collectors.toSet() ) ); - if ( cacheName != null ) { - // now that the terms have been replaced, we can clear old caches - try { - OntologyLoader.deleteOldCache( cacheName ); - } catch ( IOException e ) { - log.error( String.format( String.format( "Failed to delete old cache directory for %s.", this ), e ) ); - } + this.state = new State( model, index, excludedWordsFromStemming, additionalRestrictions, languageLevel, inferenceMode, processImports, additionalProperties.stream().map( Property::getURI ).collect( Collectors.toSet() ), null ); + if ( cacheName != null ) { + // now that the terms have been replaced, we can clear old caches + try { + OntologyLoader.deleteOldCache( cacheName ); + } catch ( IOException e ) { + log.error( String.format( String.format( "Failed to delete old cache directory for %s.", this ), e ) ); } - } finally { - lock.unlock(); } loadTime.stop(); @@ -287,14 +287,6 @@ private void initialize( @Nullable InputStream stream, boolean forceLoad, boolea log.info( "Finished loading {} in {}s", this, String.format( "%.2f", loadTime.getTime() / 1000.0 ) ); } - private boolean checkIfInterrupted() { - if ( Thread.interrupted() ) { - log.warn( "The current thread is interrupted, initialization of {} will be stop.", this ); - return true; - } - return false; - } - private static boolean isCausedByInterrupt( Exception e ) { return hasCauseMatching( e, cause -> ( ( cause instanceof ParseException ) && ( ( ParseException ) cause ).getErrorNumber() == ARPErrorNumbers.ERR_INTERRUPTED ) ) || hasCause( e, InterruptedException.class ) || @@ -311,107 +303,87 @@ private static boolean hasCauseMatching( Throwable t, Predicate predi } @Override - public Collection findIndividuals( String search, boolean keepObsoletes ) throws + public Set> findIndividuals( String search, boolean keepObsoletes ) throws OntologySearchException { - Lock lock = rwLock.readLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, no individuals will be returned.", this ); - return Collections.emptySet(); - } - if ( state.index == null ) { - log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); - return Collections.emptySet(); - } - return OntologySearch.matchIndividuals( state.model, state.index, search ) - .mapWith( i -> ( OntologyIndividual ) new OntologyIndividualImpl( i.result, state.additionalRestrictions, i.score ) ) - .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) ) - .toSet(); - } finally { - lock.unlock(); + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, no individuals will be returned.", this ); + return Collections.emptySet(); } + if ( state.index == null ) { + log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); + return Collections.emptySet(); + } + return state.index.searchIndividuals( state.model, search ) + .mapWith( i -> new OntologySearchResult<>( ( OntologyIndividual ) new OntologyIndividualImpl( i.result.as( Individual.class ), state.additionalRestrictions ), i.score ) ) + .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) + .toSet(); } @Override - public Collection findResources( String searchString, boolean keepObsoletes ) throws + public Collection> findResources( String searchString, boolean keepObsoletes ) throws OntologySearchException { - Lock lock = rwLock.readLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, no resources will be returned.", this ); - return Collections.emptySet(); - } - if ( state.index == null ) { - log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); - return Collections.emptySet(); - } - return OntologySearch.matchResources( state.model, state.index, searchString ) - .filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) ) - .mapWith( r -> { - try { - if ( r.result.canAs( OntClass.class ) ) { - return new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions, r.score ); - } else if ( r.result.canAs( Individual.class ) ) { - return new OntologyIndividualImpl( r.result.as( Individual.class ), state.additionalRestrictions, r.score ); - } else { - return ( OntologyResource ) null; - } - } catch ( ConversionException e ) { - log.warn( "Conversion failed for " + r, e ); + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, no resources will be returned.", this ); + return Collections.emptySet(); + } + if ( state.index == null ) { + log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); + return Collections.emptySet(); + } + return state.index.searchResources( state.model, searchString ) + .filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) ) + .mapWith( r -> { + try { + if ( r.result.canAs( OntClass.class ) ) { + return new OntologySearchResult<>( ( OntologyResource ) new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions ), r.score ); + } else if ( r.result.canAs( Individual.class ) ) { + return new OntologySearchResult<>( ( OntologyResource ) new OntologyIndividualImpl( r.result.as( Individual.class ), state.additionalRestrictions ), r.score ); + } else { return null; } - } ) - .filterKeep( where( Objects::nonNull ) ) - .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) ) - .toSet(); - } finally { - lock.unlock(); - } + } catch ( ConversionException e ) { + log.warn( "Conversion failed for {}", r, e ); + return null; + } + } ) + .filterKeep( where( Objects::nonNull ) ) + .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) + .toSet(); } @Override - public Collection findTerm( String search, boolean keepObsoletes ) throws OntologySearchException { - if ( log.isDebugEnabled() ) log.debug( "Searching " + this + " for '" + search + "'" ); - Lock lock = rwLock.readLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, no terms will be returned.", this ); - return Collections.emptySet(); - } - if ( state.index == null ) { - log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); - return Collections.emptySet(); - } - return OntologySearch.matchClasses( state.model, state.index, search ) - .mapWith( r -> ( OntologyTerm ) new OntologyTermImpl( r.result, state.additionalRestrictions, r.score ) ) - .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) ) - .toSet(); - } finally { - lock.unlock(); + public Collection> findTerm( String search, boolean keepObsoletes ) throws OntologySearchException { + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, no terms will be returned.", this ); + return Collections.emptySet(); } + if ( state.index == null ) { + log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); + return Collections.emptySet(); + } + return state.index.searchClasses( state.model, search ) + .mapWith( r -> new OntologySearchResult<>( ( OntologyTerm ) new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions ), r.score ) ) + .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) + .toSet(); } @Override public OntologyTerm findUsingAlternativeId( String alternativeId ) { - Lock lock = state != null && state.alternativeIDs != null ? rwLock.readLock() : rwLock.writeLock(); - try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not ready, null will be returned for alternative ID match.", this ); - return null; - } - if ( state.alternativeIDs == null ) { - log.info( "init search by alternativeID" ); - initSearchByAlternativeId( state ); - } - String termUri = state.alternativeIDs.get( alternativeId ); - return termUri != null ? getTerm( termUri ) : null; - } finally { - lock.unlock(); + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not ready, null will be returned for alternative ID match.", this ); + return null; } + if ( state.alternativeIDs == null ) { + log.info( "init search by alternativeID" ); + this.state = initSearchByAlternativeId( state ); + } + assert state.alternativeIDs != null; + String termUri = state.alternativeIDs.get( alternativeId ); + return termUri != null ? getTerm( termUri ) : null; } @Override @@ -502,7 +474,7 @@ public Set getChildren( Collection terms, boolean di public boolean isEnabled() { // quick path: just lookup the configuration String configParameter = "load." + getOntologyName(); - if ( Configuration.getBoolean( configParameter ) ) { + if ( Boolean.TRUE.equals( Configuration.getBoolean( configParameter ) ) ) { return true; } // could have forced, without setting config @@ -515,7 +487,7 @@ public boolean isOntologyLoaded() { return state != null; } - private Thread initializationThread = null; + private volatile Thread initializationThread = null; @Override public synchronized void startInitializationThread( boolean forceLoad, boolean forceIndexing ) { @@ -599,7 +571,7 @@ protected String getCacheName() { } @Override - public void index( boolean force ) { + public synchronized void index( boolean force ) { String cacheName = getCacheName(); if ( cacheName == null ) { log.warn( "This ontology does not support indexing; assign a cache name to be used." ); @@ -609,29 +581,20 @@ public void index( boolean force ) { log.warn( "Search is not enabled for this ontology." ); return; } + State state = this.state; + if ( state == null ) { + log.warn( "Ontology {} is not initialized, cannot index it.", this ); + return; + } SearchIndex index; - Lock lock = rwLock.readLock(); try { - lock.lock(); - if ( state == null ) { - log.warn( "Ontology {} is not initialized, cannot index it.", this ); - return; - } - index = OntologyIndexer.indexOntology( getCacheName(), state.model, force ); + index = OntologyIndexer.indexOntology( cacheName, state.model, state.excludedWordsFromStemming, force ); } catch ( IOException e ) { log.error( "Failed to generate index for {}.", this, e ); return; - } finally { - lock.unlock(); } // now we replace the index - lock = rwLock.writeLock(); - try { - lock.lock(); - this.state.index = index; - } finally { - lock.unlock(); - } + this.state = new State( state.model, index, state.excludedWordsFromStemming, state.additionalRestrictions, state.languageLevel, state.inferenceMode, state.processImports, state.additionalPropertyUris, state.alternativeIDs ); } /** @@ -648,8 +611,8 @@ public void index( boolean force ) { * trying HP_0001453ibrary.org/obo/HP_0001453 -----> * HP_0000005 */ - private void initSearchByAlternativeId( State state ) { - state.alternativeIDs = new HashMap<>(); + private State initSearchByAlternativeId( State state ) { + Map alternativeIDs = new HashMap<>(); // for all Ontology terms that exist in the tree ExtendedIterator iterator = state.model.listClasses(); while ( iterator.hasNext() ) { @@ -661,12 +624,13 @@ private void initSearchByAlternativeId( State state ) { String baseOntologyUri = ontologyTerm.getUri().substring( 0, ontologyTerm.getUri().lastIndexOf( "/" ) + 1 ); for ( String alternativeId : ontologyTerm.getAlternativeIds() ) { // first way - state.alternativeIDs.put( alternativeId, ontologyTerm.getUri() ); + alternativeIDs.put( alternativeId, ontologyTerm.getUri() ); // second way String alternativeIdModified = alternativeId.replace( ':', '_' ); - state.alternativeIDs.put( baseOntologyUri + alternativeIdModified, ontologyTerm.getUri() ); + alternativeIDs.put( baseOntologyUri + alternativeIdModified, ontologyTerm.getUri() ); } } + return new State( state.model, state.index, state.excludedWordsFromStemming, state.additionalRestrictions, state.languageLevel, state.inferenceMode, state.processImports, state.additionalPropertyUris, alternativeIDs ); } @Override @@ -681,12 +645,11 @@ public void loadTermsInNameSpace( InputStream is, boolean forceIndex ) { while ( initializationThread.isAlive() ) { try { initializationThread.join( 5000 ); - log.warn( "Waiting for auto-initialization to stop so manual initialization can begin ..." ); } catch ( InterruptedException e ) { Thread.currentThread().interrupt(); - log.warn( "Got interrupted while waiting for the initialization thread of {} to finish.", this ); return; } + log.warn( "Waiting for auto-initialization to stop so manual initialization can begin ..." ); ++wait; if ( wait >= maxWait && !initializationThread.isAlive() ) { throw new RuntimeException( String.format( "Got tired of waiting for %s's initialization thread.", this ) ); @@ -703,13 +666,7 @@ public String toString() { } private Optional getState() { - Lock lock = this.rwLock.readLock(); - try { - lock.lock(); - return Optional.ofNullable( state ); - } finally { - lock.unlock(); - } + return Optional.ofNullable( state ); } private Set getOntClassesFromTerms( OntModel model, Collection terms ) { @@ -728,23 +685,26 @@ private Set getOntClassesFromTerms( OntModel model, Collection excludedWordsFromStemming; private final Set additionalRestrictions; private final LanguageLevel languageLevel; private final InferenceMode inferenceMode; private final boolean processImports; private final Set additionalPropertyUris; @Nullable - private Map alternativeIDs; + private final Map alternativeIDs; - private State( OntModel model, @Nullable SearchIndex index, Set additionalRestrictions, @Nullable LanguageLevel languageLevel, InferenceMode inferenceMode, boolean processImports, Set additionalPropertyUris ) { + private State( OntModel model, @Nullable SearchIndex index, Set excludedWordsFromStemming, Set additionalRestrictions, @Nullable LanguageLevel languageLevel, InferenceMode inferenceMode, boolean processImports, Set additionalPropertyUris, @Nullable Map alternativeIDs ) { this.model = model; this.index = index; + this.excludedWordsFromStemming = excludedWordsFromStemming; this.additionalRestrictions = additionalRestrictions; this.languageLevel = languageLevel; this.inferenceMode = inferenceMode; this.processImports = processImports; this.additionalPropertyUris = additionalPropertyUris; + this.alternativeIDs = alternativeIDs; } } } \ No newline at end of file diff --git a/src/ubic/basecode/ontology/jena/BFO.java b/src/ubic/basecode/ontology/jena/BFO.java deleted file mode 100644 index 274636df..00000000 --- a/src/ubic/basecode/ontology/jena/BFO.java +++ /dev/null @@ -1,8 +0,0 @@ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.ResourceFactory; - -class BFO { - public static final Property partOf = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/BFO_0000050" ); -} diff --git a/src/ubic/basecode/ontology/jena/IndexerSelector.java b/src/ubic/basecode/ontology/jena/IndexerSelector.java deleted file mode 100644 index 2be1be42..00000000 --- a/src/ubic/basecode/ontology/jena/IndexerSelector.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * The baseCode project - * - * Copyright (c) 2008-2019 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.ontology.ConversionException; -import com.hp.hpl.jena.rdf.model.*; -import com.hp.hpl.jena.vocabulary.OWL2; -import com.hp.hpl.jena.vocabulary.RDFS; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Collection; -import java.util.HashSet; - -/** - * Used to limit which parts of ontologies get indexed for searching. This avoids indexing some parts of ontologies such - * as "examples" and "definitions" but this is set up in a partly ontology-specific way (that is, hard-coded). - * - * @author paul - */ -class IndexerSelector implements Selector { - - private static final Logger log = LoggerFactory.getLogger( IndexerSelector.class ); - - private static final Collection wantedForIndexing; - - static { - wantedForIndexing = new HashSet<>(); - wantedForIndexing.add( RDFS.label ); - - wantedForIndexing.add( OBO.id ); - wantedForIndexing.add( OBO.hasDbXref ); - wantedForIndexing.add( OBO.hasSynonym ); - wantedForIndexing.add( OBO.hasExactSynonym ); - wantedForIndexing.add( OBO.hasBroadSynonym ); - wantedForIndexing.add( OBO.hasNarrowSynonym ); - wantedForIndexing.add( OBO.hasRelatedSynonym ); - wantedForIndexing.add( OBO.alternativeLabel ); - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#getObject() - */ - @Override - public RDFNode getObject() { - return null; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#getPredicate() - */ - @Override - public Property getPredicate() { - return null; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#getSubject() - */ - @Override - public Resource getSubject() { - return null; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#isSimple() - */ - @Override - public boolean isSimple() { - return false; - } - - /* - * (non-Javadoc) - * - * @see com.hp.hpl.jena.rdf.model.Selector#test(com.hp.hpl.jena.rdf.model.Statement) - */ - @Override - public boolean test( Statement s ) { - if ( s.getSubject().getURI() == null ) { - return false; - } - - boolean retain = wantedForIndexing.contains( s.getPredicate() ); - - // bit of a special case ... - if ( s.getPredicate().equals( OWL2.annotatedProperty ) && s.getObject().canAs( Property.class ) ) { - try { - retain = wantedForIndexing.contains( s.getObject().as( Property.class ) ); - } catch ( ConversionException e ) { - log.warn( "Conversion of " + s.getObject() + " to " + Property.class.getName() + " failed.", e ); - } - } - - return retain; - } -} diff --git a/src/ubic/basecode/ontology/jena/JenaUtils.java b/src/ubic/basecode/ontology/jena/JenaUtils.java index 11edc116..b7d96758 100644 --- a/src/ubic/basecode/ontology/jena/JenaUtils.java +++ b/src/ubic/basecode/ontology/jena/JenaUtils.java @@ -228,7 +228,7 @@ public static Optional as( RDFNode resource, Class cla try { return Optional.of( resource.as( clazz ) ); } catch ( ConversionException e ) { - log.warn( "Conversion of " + resource + " to " + clazz.getName() + " failed." ); + log.warn( "Conversion of {} to {} failed.", resource, clazz.getName() ); return Optional.empty(); } } diff --git a/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java b/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java index eb9dcf4f..4230727a 100644 --- a/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java +++ b/src/ubic/basecode/ontology/jena/ObjectPropertyImpl.java @@ -53,7 +53,7 @@ public Collection getRange() { OntClass class1 = r.asClass(); result.add( new OntologyTermImpl( class1, additionalRestrictions ) ); } else { - log.warn( "Don't know how to deal with " + r ); + log.warn( "Don't know how to deal with {}", r ); } } return result; diff --git a/src/ubic/basecode/ontology/jena/OntologyIndexer.java b/src/ubic/basecode/ontology/jena/OntologyIndexer.java index 281e30ae..59e6ca70 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndexer.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndexer.java @@ -18,31 +18,48 @@ */ package ubic.basecode.ontology.jena; +import com.hp.hpl.jena.graph.NodeFactory; import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.shared.JenaException; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; +import com.hp.hpl.jena.util.iterator.WrappedIterator; +import com.hp.hpl.jena.vocabulary.RDFS; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.jena.larq.IndexBuilderSubject; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; +import org.apache.lucene.queryParser.MultiFieldQueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ubic.basecode.ontology.search.OntologySearchException; import ubic.basecode.util.Configuration; -import java.io.File; +import javax.annotation.Nullable; import java.io.IOException; +import java.nio.file.Path; import java.nio.file.Paths; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** + * A Lucene-based ontology indexer. + * * @author pavlidis */ class OntologyIndexer { @@ -50,139 +67,219 @@ class OntologyIndexer { private static final Logger log = LoggerFactory.getLogger( OntologyIndexer.class ); /** - * @return indexlarq with default analyzer (English), or null if no index is available. DOES not create the - * index if it doesn't exist. + * THose are build-in fields that are always indexed. */ - public static SearchIndex getSubjectIndex( String name ) { - Analyzer analyzer = new EnglishAnalyzer( Version.LUCENE_36 ); - return getSubjectIndex( name, analyzer ); - } + private static final String + ID_FIELD = "_ID", + LOCAL_NAME_FIELD = "_LOCAL_NAME"; - /** - * Loads or creates an index from an existing OntModel. Any existing index will loaded unless force=true. It will be - * created if there isn't one already, or if force=true. - */ - public static SearchIndex indexOntology( String name, OntModel model, boolean force ) throws JenaException, IOException { + public static class IndexableProperty { + private final Property property; + private final boolean analyzed; - if ( force ) { - return index( name, model ); + public IndexableProperty( Property property, boolean analyzed ) { + this.property = property; + this.analyzed = analyzed; } - SearchIndex index = getSubjectIndex( name ); - if ( index == null ) { - log.warn( "Index not found, or there was an error, re-indexing " + name ); - return index( name, model ); + public Property getProperty() { + return property; } - log.info( "A valid index for " + name + " already exists, using" ); - return index; + public boolean isAnalyzed() { + return analyzed; + } } - private static File getIndexPath( String name ) { - if ( StringUtils.isBlank( name ) ) { - throw new IllegalArgumentException( "The ontology must have a suitable name for being indexed." ); - } - String ontologyDir = Configuration.getString( "ontology.index.dir" ); // e.g., /something/gemmaData/compass - if ( StringUtils.isBlank( ontologyDir ) ) { - return Paths.get( System.getProperty( "java.io.tmpdir" ), "searchIndices", "ontology", name ).toFile(); - } - return Paths.get( ontologyDir, "ontology", name ).toFile(); + public static final Collection DEFAULT_INDEXABLE_PROPERTIES; + + static { + DEFAULT_INDEXABLE_PROPERTIES = new HashSet<>(); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( RDFS.label, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.id, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasDbXref, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasExactSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasBroadSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasNarrowSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasRelatedSynonym, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.alternativeLabel, true ) ); } /** - * Find the search index (will not create it) - * - * @return Index, or null if there is no index. + * Obtain an ontology index with the default indexable properties. */ - private static SearchIndex getSubjectIndex( String name, Analyzer analyzer ) { - log.debug( "Loading index: " + name ); - File indexdir = getIndexPath( name ); - File indexdirstd = getIndexPath( name + ".std" ); + @Nullable + public static SearchIndex getSubjectIndex( String name, Set excludedFromStemming ) { + return getSubjectIndex( name, DEFAULT_INDEXABLE_PROPERTIES, excludedFromStemming ); + } + + /** + * Obtain an index with default analyzer (English), or null if no index is available. + *

+ * DOES not create the index if it doesn't exist. + */ + @Nullable + public static SearchIndex getSubjectIndex( String name, Collection indexableProperties, Set excludedFromStemming ) { + log.debug( "Loading index: {}", name ); try { // we do not put this in the try-with-open because we want these to *stay* open - FSDirectory directory = FSDirectory.open( indexdir ); - FSDirectory directorystd = FSDirectory.open( indexdirstd ); - + FSDirectory directory = FSDirectory.open( getIndexPath( name ).toFile() ); + FSDirectory directoryStd = FSDirectory.open( getIndexPath( name + ".std" ).toFile() ); if ( !IndexReader.indexExists( directory ) ) { return null; } - if ( !IndexReader.indexExists( directorystd ) ) { + if ( !IndexReader.indexExists( directoryStd ) ) { return null; } - - IndexReader reader = IndexReader.open( directory ); - IndexReader readerstd = IndexReader.open( directorystd ); - MultiReader r = new MultiReader( reader, readerstd ); - return new SearchIndex( r, analyzer ); - + log.info( "Loading index at {} and {}", directory, directoryStd ); + return openIndex( directory, directoryStd, indexableProperties, excludedFromStemming ); } catch ( IOException e ) { - log.warn( "Index for " + name + " could not be read: " + e.getMessage() ); + log.warn( "Index for {} could not be read: {}", name, e.getMessage(), e ); return null; } } /** - * Create an on-disk index from an existing OntModel. Any existing index will be deleted/overwritten. + * Index an ontology with the default indexable properties. */ - private static SearchIndex index( String name, OntModel model ) throws JenaException, IOException { - - File indexdir = getIndexPath( name ); + public static SearchIndex indexOntology( String name, OntModel model, Set excludedFromStemming, boolean force ) throws JenaException, IOException { + return indexOntology( name, model, DEFAULT_INDEXABLE_PROPERTIES, excludedFromStemming, force ); + } - StopWatch timer = new StopWatch(); - timer.start(); - FSDirectory dir = FSDirectory.open( indexdir ); - log.info( "Indexing " + name + " to: " + indexdir ); + /** + * Loads or creates an index from an existing OntModel. Any existing index will loaded unless force=true. It will be + * created if there isn't one already, or if force=true. + */ + public static SearchIndex indexOntology( String name, OntModel model, Collection indexableProperties, Set excludedFromStemming, boolean force ) throws JenaException, IOException { + if ( force ) { + return index( name, model, indexableProperties, excludedFromStemming ); + } + SearchIndex index = getSubjectIndex( name, excludedFromStemming ); + if ( index == null ) { + log.warn( "Index not found, or there was an error, re-indexing {}...", name ); + return index( name, model, indexableProperties, excludedFromStemming ); + } + log.info( "A valid index for {} already exists, using", name ); + return index; + } - /* - * adjust the analyzer ... - */ - Analyzer analyzer = new EnglishAnalyzer( Version.LUCENE_36 ); - IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_36, analyzer ); - IndexWriter indexWriter = new IndexWriter( dir, config ); - indexWriter.deleteAll(); // start with clean slate. - assert 0 == indexWriter.numDocs(); + private static Path getIndexPath( String name ) { + if ( StringUtils.isBlank( name ) ) { + throw new IllegalArgumentException( "The ontology must have a suitable name for being indexed." ); + } + String ontologyDir = Configuration.getString( "ontology.index.dir" ); // e.g., /something/gemmaData/compass + if ( StringUtils.isBlank( ontologyDir ) ) { + return Paths.get( System.getProperty( "java.io.tmpdir" ), "searchIndices", "ontology", name ); + } + return Paths.get( ontologyDir, "ontology", name ); + } - IndexBuilderSubject larqSubjectBuilder = new IndexBuilderSubject( indexWriter ); - StmtIterator listStatements = model.listStatements( new IndexerSelector() ); - larqSubjectBuilder.indexStatements( listStatements ); - indexWriter.commit(); - log.info( indexWriter.numDocs() + " Statements indexed..." ); - indexWriter.close(); + /** + * Create an on-disk index from an existing OntModel. Any existing index will be deleted/overwritten. + */ + private static SearchIndex index( String name, OntModel model, Collection indexableProperties, Set excludedFromStemming ) throws JenaException, IOException { + Directory dir = index( name, model, new EnglishAnalyzer( Version.LUCENE_36, EnglishAnalyzer.getDefaultStopSet(), excludedFromStemming ), getIndexPath( name ), indexableProperties ); + // we need to also analyze using the Standard analyzer, which doesn't do stemming and allows wildcard. + Directory dirStd = index( name, model, new StandardAnalyzer( Version.LUCENE_36 ), getIndexPath( name + ".std" ), indexableProperties ); + return openIndex( dir, dirStd, indexableProperties, excludedFromStemming ); + } - Directory dirstd = indexStd( name, model ); + private static Directory index( String name, OntModel model, Analyzer analyzer, Path indexDir, Collection indexableProperties ) throws IOException { + StopWatch timer = StopWatch.createStarted(); + FSDirectory dir = FSDirectory.open( indexDir.toFile() ); + log.info( "Indexing {} to: {}...", name, indexDir ); + IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_36, analyzer ); + try ( IndexWriter indexWriter = new IndexWriter( dir, config ) ) { + indexWriter.deleteAll(); // start with clean slate. + assert 0 == indexWriter.numDocs(); + Map indexablePropertiesByField = indexableProperties.stream() + .collect( Collectors.toMap( p -> p.getProperty().getURI(), p -> p ) ); + ExtendedIterator subjects = model.listSubjects() + .filterDrop( new BnodeFilter<>() ); + while ( subjects.hasNext() ) { + Resource subject = subjects.next(); + String id = subject.getURI(); + Document doc = new Document(); + doc.add( new Field( ID_FIELD, id, Field.Store.YES, Field.Index.NOT_ANALYZED ) ); + doc.add( new Field( LOCAL_NAME_FIELD, subject.getLocalName(), Field.Store.NO, Field.Index.NOT_ANALYZED ) ); + for ( IndexableProperty prop : indexableProperties ) { + StmtIterator listStatements = subject.listProperties( prop.property ); + while ( listStatements.hasNext() ) { + Statement s = listStatements.next(); + String field = s.getPredicate().getURI(); + String value = JenaUtils.asString( s.getObject() ); + doc.add( new Field( field, value, Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ) ); + } + } + indexWriter.addDocument( doc ); + } + indexWriter.commit(); + log.info( "Done indexing {} subjects of {} in {} s.", indexWriter.numDocs(), name, String.format( "%.2f", timer.getTime() / 1000.0 ) ); + } + return dir; + } - MultiReader r = new MultiReader( IndexReader.open( dir ), IndexReader.open( dirstd ) ); + private static SearchIndex openIndex( Directory dir, Directory dirStd, Collection indexableProperties, Set excludedFromStemming ) throws IOException { + String[] searchableFields = Stream.concat( Stream.of( ID_FIELD, LOCAL_NAME_FIELD ), indexableProperties.stream().map( p -> p.property ).map( Resource::getURI ) ) + .distinct() + .toArray( String[]::new ); + return new LuceneSearchIndex( searchableFields, new MultiReader( IndexReader.open( dir ), IndexReader.open( dirStd ) ), new EnglishAnalyzer( Version.LUCENE_36, EnglishAnalyzer.getDefaultStopSet(), excludedFromStemming ) ); + } - // workaround to get the EnglishAnalyzer. - SearchIndex index = new SearchIndex( r, new EnglishAnalyzer( Version.LUCENE_36 ) ); - // larqSubjectBuilder.getIndex(); // always returns a StandardAnalyazer - assert index.getLuceneQueryParser().getAnalyzer() instanceof EnglishAnalyzer; + private static class LuceneSearchIndex implements SearchIndex { - log.info( "Done indexing of " + name + " in " + String.format( "%.2f", timer.getTime() / 1000.0 ) + "s" ); + private static final Logger log = LoggerFactory.getLogger( LuceneSearchIndex.class ); - return index; - } + private final String[] searchableFields; + private final IndexReader index; + private final Analyzer analyzer; - /** - * We need to also analyze using the Standard analyzer, which doesn't do stemming and allows wildcard. - */ - private static Directory indexStd( String name, OntModel model ) throws JenaException, IOException { + public LuceneSearchIndex( String[] searchableFields, IndexReader index, Analyzer analyzer ) { + this.searchableFields = searchableFields; + this.index = index; + this.analyzer = analyzer; + } - File file = getIndexPath( name + ".std" ); + @Override + public ExtendedIterator search( OntModel model, String queryString ) throws OntologySearchException { + if ( StringUtils.isBlank( queryString ) ) { + throw new IllegalArgumentException( "Query cannot be blank" ); + } + StopWatch timer = StopWatch.createStarted(); + try { + Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString ); + TopDocs hits = new IndexSearcher( index ).search( query, 500 ); + // in general, results are found in both regular and std index, so we divide by 2 the initial capacity + Set seenIds = new HashSet<>( hits.totalHits / 2 ); + List resources = new ArrayList<>( hits.totalHits / 2 ); + for ( int i = 0; i < hits.totalHits; i++ ) { + Document doc = index.document( hits.scoreDocs[i].doc ); + String id = doc.get( ID_FIELD ); + if ( seenIds.contains( id ) ) { + continue; + } + RDFNode node = model.getRDFNode( NodeFactory.createURI( id ) ); + resources.add( new JenaSearchResult( node, hits.scoreDocs[i].score ) ); + seenIds.add( id ); + } + return WrappedIterator.create( resources.iterator() ); + } catch ( ParseException e ) { + throw new OntologySearchException( "Failed to parse search query.", queryString, e ); + } catch ( IOException e ) { + throw new OntologySearchException( "An I/O error occured while searching.", queryString, e ); + } finally { + timer.stop(); + if ( timer.getTime() > 100 ) { + log.warn( "Ontology resource search for: {} took {} ms.", queryString, timer.getTime() ); + } + } + } - FSDirectory dir = FSDirectory.open( file ); - dir.getLockFactory().clearLock( dir.getLockID() ); - log.info( "Index to: " + file ); - Analyzer analyzer = new StandardAnalyzer( Version.LUCENE_36 ); - IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_36, analyzer ); - IndexWriter indexWriter = new IndexWriter( dir, config ); - indexWriter.deleteAll(); - IndexBuilderSubject larqSubjectBuilder = new IndexBuilderSubject( indexWriter ); - StmtIterator listStatements = model.listStatements( new IndexerSelector() ); - larqSubjectBuilder.indexStatements( listStatements ); - indexWriter.commit(); - log.info( indexWriter.numDocs() + " Statements indexed..." ); - indexWriter.close(); - return dir; + @Override + public void close() throws IOException { + index.close(); + } } + } diff --git a/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java b/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java index 61731377..e888875f 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java @@ -44,12 +44,6 @@ public OntologyIndividualImpl( Individual ind, Set additionalRestri this.additionalRestrictions = additionalRestrictions; } - public OntologyIndividualImpl( Individual ind, Set additionalRestrictions, double score ) { - super( ind, score ); - this.ind = ind; - this.additionalRestrictions = additionalRestrictions; - } - @Override public OntologyTerm getInstanceOf() { Resource type = ind.getRDFType(); diff --git a/src/ubic/basecode/ontology/jena/OntologyLoader.java b/src/ubic/basecode/ontology/jena/OntologyLoader.java index 38c27a7c..97cb8f3b 100644 --- a/src/ubic/basecode/ontology/jena/OntologyLoader.java +++ b/src/ubic/basecode/ontology/jena/OntologyLoader.java @@ -107,7 +107,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } catch ( ClosedByInterruptException e ) { throw e; } catch ( IOException e ) { - log.error( "Failed to load ontology model for " + url + ", will attempt to load from disk.", e ); + log.error( "Failed to load ontology model for {}, will attempt to load from disk.", url, e ); attemptToLoadFromDisk = true; } finally { if ( urlc instanceof HttpURLConnection ) { @@ -129,7 +129,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, // the ontology. FileUtils.createParentDirectories( oldFile ); Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING ); - log.info( "Load model from disk: " + timer.getTime() + "ms" ); + log.info( "Load model from disk took {} ms", timer.getTime() ); } } else { throw new RuntimeException( @@ -138,7 +138,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } else if ( tempFile.exists() ) { // Model was successfully loaded into memory from URL with given cacheName // Save cache to disk (rename temp file) - log.info( "Caching ontology to disk: " + cacheName + " under " + f.getAbsolutePath() ); + log.info( "Caching ontology to disk: {} under {}", cacheName, f.getAbsolutePath() ); try { // Need to compare previous to current so instead of overwriting we'll move the old file if ( f.exists() ) { @@ -149,12 +149,12 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } Files.move( tempFile.toPath(), f.toPath(), StandardCopyOption.REPLACE_EXISTING ); } catch ( IOException e ) { - log.error( "Failed to cache ontology " + url + " to disk.", e ); + log.error( "Failed to cache ontology {} to disk.", url, e ); } } } - log.info( "Loading ontology model for " + url + " took " + timer.getTime() + "ms" ); + log.info( "Loading ontology model for {} took {} ms", url, timer.getTime() ); return model; } @@ -236,7 +236,7 @@ public static URLConnection openConnection( String url ) throws IOException { if ( StringUtils.isBlank( newUrl ) ) { throw new RuntimeException( String.format( "Redirect response for %s is lacking a 'Location' header.", url ) ); } - log.debug( "Redirect to " + newUrl + " from " + url ); + log.debug( "Redirect to {} from {}", newUrl, url ); urlc = openConnectionInternal( newUrl ); } } @@ -251,7 +251,7 @@ private static URLConnection openConnectionInternal( String url ) throws IOExcep if ( urlc instanceof HttpURLConnection ) { ( ( HttpURLConnection ) urlc ).setInstanceFollowRedirects( true ); } - log.debug( "Connecting to " + url ); + log.debug( "Connecting to {}", url ); urlc.connect(); // Will error here on bad URL return urlc; } diff --git a/src/ubic/basecode/ontology/jena/OntologySearch.java b/src/ubic/basecode/ontology/jena/OntologySearch.java deleted file mode 100644 index a51b9974..00000000 --- a/src/ubic/basecode/ontology/jena/OntologySearch.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * The basecode project - * - * Copyright (c) 2007-2019 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.ontology.ConversionException; -import com.hp.hpl.jena.ontology.Individual; -import com.hp.hpl.jena.ontology.OntClass; -import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.shared.JenaException; -import com.hp.hpl.jena.util.iterator.ExtendedIterator; -import com.hp.hpl.jena.util.iterator.Map1Iterator; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.time.StopWatch; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import ubic.basecode.ontology.search.OntologySearchException; - -import java.util.Objects; -import java.util.Optional; - -import static com.hp.hpl.jena.sparql.util.ModelUtils.convertGraphNodeToRDFNode; -import static ubic.basecode.ontology.jena.JenaUtils.where; - -/** - * @author pavlidis - */ -class OntologySearch { - - private static final Logger log = LoggerFactory.getLogger( OntologySearch.class ); - - /** - * Find classes that match the query string. - * - * @param model that goes with the index - * @param index to search - * @return Collection of OntologyTerm objects - */ - public static ExtendedIterator> matchClasses( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException { - return runSearch( model, index, queryString ) - .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( OntClass.class ) ) ) - .mapWith( r -> r.as( OntClass.class ) ) - .filterKeep( where( Objects::nonNull ) ); - } - - /** - * Find individuals that match the query string - * - * @param model that goes with the index - * @param index to search - * @return Collection of OntologyTerm objects - */ - public static ExtendedIterator> matchIndividuals( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException { - return runSearch( model, index, queryString ) - .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( Individual.class ) ) ) - .mapWith( r -> r.as( Individual.class ) ) - .filterKeep( where( Objects::nonNull ) ); - } - - /** - * Find OntologyIndividuals and OntologyTerms that match the query string. Search with a wildcard is attempted - * whenever possible. - * - * @param model that goes with the index - * @param index to search - * @return Collection of OntologyResource objects - */ - public static ExtendedIterator> matchResources( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException { - return runSearch( model, index, queryString ) - .filterKeep( where( o -> o.result.isURIResource() && o.result.isResource() ) ) - .mapWith( r -> r.as( Resource.class ) ) - .filterKeep( where( Objects::nonNull ) ); - } - - private static ExtendedIterator> runSearch( Model model, SearchIndex index, String queryString ) throws OntologySearchJenaException { - if ( StringUtils.isBlank( queryString ) ) { - throw new IllegalArgumentException( "Query cannot be blank" ); - } - StopWatch timer = StopWatch.createStarted(); - try { - return new Map1Iterator<>( o -> new SearchResult<>( o.getLuceneDocId(), convertGraphNodeToRDFNode( o.getNode(), model ), o.getScore() ), index.search( queryString ) ); - } catch ( JenaException e ) { - throw new OntologySearchJenaException( "Failed to search with query.", queryString, e ); - } finally { - timer.stop(); - if ( timer.getTime() > 100 ) { - log.warn( "Ontology resource search for: {} took {} ms.", queryString, timer.getTime() ); - } - } - } - - public static class SearchResult { - public final int docId; - public final T result; - public final double score; - - private SearchResult( int docId, T result, double score ) { - this.docId = docId; - this.result = result; - this.score = score; - } - - @Override - public boolean equals( Object obj ) { - if ( obj instanceof SearchResult ) { - return Objects.equals( result, ( ( SearchResult ) obj ).result ); - } - return false; - } - - @Override - public int hashCode() { - return Objects.hash( result ); - } - - @Override - public String toString() { - return String.format( "%s [docId = %d, score = %f]", result, docId, score ); - } - - private SearchResult as( Class clazz ) { - try { - return new SearchResult<>( docId, result.as( clazz ), score ); - } catch ( ConversionException e ) { - log.warn( "Conversion of " + result + " to " + clazz.getName() + " failed.", e ); - return null; - } - } - } -} diff --git a/src/ubic/basecode/ontology/jena/OntologySearchJenaException.java b/src/ubic/basecode/ontology/jena/OntologySearchJenaException.java deleted file mode 100644 index bcb4cc71..00000000 --- a/src/ubic/basecode/ontology/jena/OntologySearchJenaException.java +++ /dev/null @@ -1,22 +0,0 @@ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.shared.JenaException; -import ubic.basecode.ontology.search.OntologySearchException; - -/** - * Base class for Jena-related ontology search exceptions. - */ -class OntologySearchJenaException extends OntologySearchException { - - private final JenaException cause; - - public OntologySearchJenaException( String message, String query, JenaException cause ) { - super( message, query, cause ); - this.cause = cause; - } - - @Override - public JenaException getCause() { - return cause; - } -} diff --git a/src/ubic/basecode/ontology/jena/OntologyTermImpl.java b/src/ubic/basecode/ontology/jena/OntologyTermImpl.java index 04e9edb5..fba48075 100644 --- a/src/ubic/basecode/ontology/jena/OntologyTermImpl.java +++ b/src/ubic/basecode/ontology/jena/OntologyTermImpl.java @@ -61,12 +61,6 @@ public OntologyTermImpl( OntClass resource, Set additionalRestricti this.additionalRestrictions = additionalRestrictions; } - public OntologyTermImpl( OntClass resource, Set additionalRestrictions, double score ) { - super( resource, score ); - this.ontResource = resource; - this.additionalRestrictions = additionalRestrictions; - } - @Override public Collection getAlternativeIds() { return getAnnotations( HAS_ALTERNATE_ID ).stream().map( AnnotationProperty::getContents ).collect( Collectors.toSet() ); diff --git a/src/ubic/basecode/ontology/jena/RO.java b/src/ubic/basecode/ontology/jena/RO.java index 86d5ec73..340579e2 100644 --- a/src/ubic/basecode/ontology/jena/RO.java +++ b/src/ubic/basecode/ontology/jena/RO.java @@ -5,5 +5,9 @@ class RO { + /** + * This is actually part of RO, see + */ + public static final Property partOf = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/BFO_0000050" ); public static final Property properPartOf = ResourceFactory.createProperty( "http://www.obofoundry.org/ro/ro.owl#proper_part_of" ); } diff --git a/src/ubic/basecode/ontology/jena/SearchIndex.java b/src/ubic/basecode/ontology/jena/SearchIndex.java index 0157263d..cf868858 100644 --- a/src/ubic/basecode/ontology/jena/SearchIndex.java +++ b/src/ubic/basecode/ontology/jena/SearchIndex.java @@ -1,74 +1,73 @@ -/* - * The baseCode project - * - * Copyright (c) 2013 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - */ package ubic.basecode.ontology.jena; -import com.hp.hpl.jena.util.iterator.Map1Iterator; -import org.apache.jena.larq.ARQLuceneException; -import org.apache.jena.larq.HitLARQ; -import org.apache.jena.larq.IndexLARQ; -import org.apache.jena.larq.LARQ; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopDocs; +import com.hp.hpl.jena.ontology.Individual; +import com.hp.hpl.jena.ontology.OntClass; +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; +import ubic.basecode.ontology.search.OntologySearchException; -import java.io.IOException; -import java.util.Arrays; -import java.util.Iterator; +import java.util.Objects; -/** - * Customization to deal with MultiReader and 'open' while indexing is going on ...? Might not be needed. - * - * @author Paul - */ -class SearchIndex extends IndexLARQ { +import static ubic.basecode.ontology.jena.JenaUtils.where; - public SearchIndex( MultiReader r, Analyzer a ) { - super( r, a ); - } +interface SearchIndex extends AutoCloseable { - @Override - public Iterator search( String queryString ) { - try { - final IndexSearcher s = getIndexSearcher(); - Query query = getLuceneQueryParser().parse( queryString ); + /** + * Find RDF nodes matching the given query string. + */ + ExtendedIterator search( OntModel model, String queryString ) throws OntologySearchException; - TopDocs topDocs = s.search( query, null, LARQ.NUM_RESULTS ); + /** + * Find classes that match the query string. + * + * @param model that goes with the index + * @return Collection of OntologyTerm objects + */ + default ExtendedIterator searchClasses( OntModel model, String queryString ) throws OntologySearchException { + return search( model, queryString ) + .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( OntClass.class ) ) ) + .filterKeep( where( Objects::nonNull ) ); + } - return new Map1Iterator<>( object -> new HitLARQ( s, object ), Arrays.asList( topDocs.scoreDocs ).iterator() ); - } catch ( Exception e ) { - throw new ARQLuceneException( "Error during search for '" + queryString + ";", e ); - } + /** + * Find individuals that match the query string + * + * @param model that goes with the index + * @return Collection of OntologyTerm objects + */ + default ExtendedIterator searchIndividuals( OntModel model, String queryString ) throws OntologySearchException { + return search( model, queryString ) + .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( Individual.class ) ) ) + .filterKeep( where( Objects::nonNull ) ); + } + + /** + * Find OntologyIndividuals and OntologyTerms that match the query string. Search with a wildcard is attempted + * whenever possible. + * + * @param model that goes with the index + * @return Collection of OntologyResource objects + */ + default ExtendedIterator searchResources( OntModel model, String queryString ) throws OntologySearchException { + return search( model, queryString ) + .filterKeep( where( o -> o.result.isURIResource() && o.result.isResource() ) ) + .filterKeep( where( Objects::nonNull ) ); } - private synchronized IndexSearcher getIndexSearcher() throws IOException { - if ( !reader.isCurrent() ) { - // this is the problematic line ... multireader cannot be reopened; was IndexReader newReader = - // IndexReader.openIfChanged(reader, true) ; + class JenaSearchResult { + + public final RDFNode result; + public final double score; - IndexReader newReader = IndexReader.openIfChanged( reader ); - if ( newReader != null ) { - reader.close(); - reader = newReader; - searcher = new IndexSearcher( reader ); - } + JenaSearchResult( RDFNode result, double score ) { + this.result = result; + this.score = score; } - return searcher; + @Override + public String toString() { + return String.format( "%s score=%f", result, score ); + } } - } diff --git a/src/ubic/basecode/ontology/model/OntologyResource.java b/src/ubic/basecode/ontology/model/OntologyResource.java index 2284ebc0..f7fecbb1 100644 --- a/src/ubic/basecode/ontology/model/OntologyResource.java +++ b/src/ubic/basecode/ontology/model/OntologyResource.java @@ -23,7 +23,7 @@ /** * @author pavlidis */ -public interface OntologyResource extends Comparable { +public interface OntologyResource { /** * A URI if known, otherwise null. @@ -52,10 +52,4 @@ public interface OntologyResource extends Comparable { * Whether the resource is marked as obsolete. */ boolean isObsolete(); - - /** - * If this is result from a free-text search, a corresponding score, otherwise null. - */ - @Nullable - Double getScore(); } diff --git a/src/ubic/basecode/ontology/model/OntologyTermSimple.java b/src/ubic/basecode/ontology/model/OntologyTermSimple.java index d22bb703..5aadf4da 100644 --- a/src/ubic/basecode/ontology/model/OntologyTermSimple.java +++ b/src/ubic/basecode/ontology/model/OntologyTermSimple.java @@ -17,7 +17,6 @@ import javax.annotation.Nullable; import java.io.Serializable; import java.util.Collection; -import java.util.Comparator; import java.util.Objects; /** @@ -131,17 +130,6 @@ public boolean isObsolete() { return obsolete; } - @Nullable - @Override - public Double getScore() { - return null; - } - - @Override - public int compareTo( OntologyResource other ) { - return Objects.compare( getUri(), other.getUri(), Comparator.nullsLast( Comparator.naturalOrder() ) ); - } - @Override public boolean equals( Object obj ) { if ( this == obj ) return true; diff --git a/src/ubic/basecode/ontology/providers/OntologyService.java b/src/ubic/basecode/ontology/providers/OntologyService.java index fb8970ce..0729d850 100644 --- a/src/ubic/basecode/ontology/providers/OntologyService.java +++ b/src/ubic/basecode/ontology/providers/OntologyService.java @@ -4,6 +4,7 @@ import ubic.basecode.ontology.model.OntologyResource; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.search.OntologySearchException; +import ubic.basecode.ontology.search.OntologySearchResult; import javax.annotation.Nullable; import java.io.InputStream; @@ -129,6 +130,19 @@ enum InferenceMode { */ void setSearchEnabled( boolean searchEnabled ); + /** + * Obtain the words that should be excluded from stemming. + *

+ * By default, all words are subject to stemming. The exact implementation of stemming depends on the actual search + * implementation. + */ + Set getExcludedWordsFromStemming(); + + /** + * Set words that should be excluded from stemming when searching. + */ + void setExcludedWordsFromStemming( Set excludedWordsFromStemming ); + /** * Obtain the URIs used as additional properties when inferring parents and children. *

@@ -169,7 +183,7 @@ enum InferenceMode { *

* Obsolete terms are filtered out. */ - default Collection findIndividuals( String search ) throws OntologySearchException { + default Collection> findIndividuals( String search ) throws OntologySearchException { return findIndividuals( search, false ); } @@ -179,7 +193,7 @@ default Collection findIndividuals( String search ) throws O * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection findIndividuals( String search, boolean keepObsoletes ) throws OntologySearchException; + Set> findIndividuals( String search, boolean keepObsoletes ) throws OntologySearchException; /** * Looks for any resources (terms or individuals) that match the given search string @@ -189,7 +203,7 @@ default Collection findIndividuals( String search ) throws O * @return results, or an empty collection if the results are empty OR the ontology is not available to be * searched. */ - default Collection findResources( String searchString ) throws OntologySearchException { + default Collection> findResources( String searchString ) throws OntologySearchException { return findResources( searchString, false ); } @@ -199,14 +213,14 @@ default Collection findResources( String searchString ) throws * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection findResources( String search, boolean keepObsoletes ) throws OntologySearchException; + Collection> findResources( String search, boolean keepObsoletes ) throws OntologySearchException; /** * Looks for any terms that match the given search string. *

* Obsolete terms are filtered out. */ - default Collection findTerm( String search ) throws OntologySearchException { + default Collection> findTerm( String search ) throws OntologySearchException { return findTerm( search, false ); } @@ -217,7 +231,7 @@ default Collection findTerm( String search ) throws OntologySearch * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection findTerm( String search, boolean keepObsoletes ) throws OntologySearchException; + Collection> findTerm( String search, boolean keepObsoletes ) throws OntologySearchException; /** * Find a term using an alternative ID. diff --git a/src/ubic/basecode/ontology/search/OntologySearch.java b/src/ubic/basecode/ontology/search/OntologySearch.java deleted file mode 100644 index bf16d97f..00000000 --- a/src/ubic/basecode/ontology/search/OntologySearch.java +++ /dev/null @@ -1,32 +0,0 @@ -package ubic.basecode.ontology.search; - -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.text.StringEscapeUtils; - -public class OntologySearch { - - // Lucene cannot properly parse these characters... gives a query parse error. - // OntologyTerms don't contain them anyway - private final static char[] INVALID_CHARS = {':', '(', ')', '?', '^', '[', ']', '{', '}', '!', '~', '"', '\''}; - - /** - * Will remove characters that jena is unable to parse. Will also escape and remove leading and trailing white space - * (which also causes jena to die) - * - * @param toStrip the string to clean - * @return - */ - public static String stripInvalidCharacters( String toStrip ) { - String result = StringUtils.strip( toStrip ); - for ( char badChar : INVALID_CHARS ) { - result = StringUtils.remove( result, badChar ); - } - /* - * Queries cannot start with '*' or ? - */ - result = result.replaceAll( "^\\**", "" ); - result = result.replaceAll( "^\\?*", "" ); - - return StringEscapeUtils.escapeJava( result ).trim(); - } -} diff --git a/src/ubic/basecode/ontology/search/OntologySearchResult.java b/src/ubic/basecode/ontology/search/OntologySearchResult.java new file mode 100644 index 00000000..f50af3e5 --- /dev/null +++ b/src/ubic/basecode/ontology/search/OntologySearchResult.java @@ -0,0 +1,51 @@ +package ubic.basecode.ontology.search; + +import ubic.basecode.ontology.model.OntologyResource; + +import java.util.Comparator; +import java.util.Objects; + +/** + * Represents a search result from an ontology. + * + * @author poirigui + */ +public class OntologySearchResult implements Comparator> { + + private final T result; + private final double score; + + public OntologySearchResult( T result, double score ) { + this.result = result; + this.score = score; + } + + public T getResult() { + return result; + } + + public double getScore() { + return score; + } + + @Override + public int hashCode() { + return Objects.hash( result ); + } + + @Override + public boolean equals( Object obj ) { + if ( this == obj ) { + return true; + } + if ( !( obj instanceof OntologySearchResult ) ) { + return false; + } + return Objects.equals( result, ( ( OntologySearchResult ) obj ).result ); + } + + @Override + public int compare( OntologySearchResult searchResult, OntologySearchResult t1 ) { + return Double.compare( searchResult.score, t1.score ); + } +} diff --git a/test/ubic/basecode/ontology/AbstractOntologyTest.java b/test/ubic/basecode/ontology/AbstractOntologyTest.java index b9eb42c4..8face625 100644 --- a/test/ubic/basecode/ontology/AbstractOntologyTest.java +++ b/test/ubic/basecode/ontology/AbstractOntologyTest.java @@ -17,12 +17,9 @@ public class AbstractOntologyTest { protected static Path tempDir; - private static String prevCacheDir, prevIndexDir; @BeforeClass public static void setUpOntologyCacheDir() throws IOException { - prevCacheDir = Configuration.getString( "ontology.cache.dir" ); - prevIndexDir = Configuration.getString( "ontology.index.dir" ); tempDir = Files.createTempDirectory( "baseCode" ); Configuration.setString( "ontology.cache.dir", tempDir.resolve( "ontologyCache" ).toAbsolutePath().toString() ); Configuration.setString( "ontology.index.dir", tempDir.resolve( "searchIndices" ).toAbsolutePath().toString() ); @@ -33,8 +30,8 @@ public static void clearOntologyCacheDir() throws IOException { try { PathUtils.deleteDirectory( tempDir ); } finally { - Configuration.setString( "ontology.cache.dir", prevCacheDir ); - Configuration.setString( "ontology.index.dir", prevIndexDir ); + Configuration.reset( "ontology.cache.dir" ); + Configuration.reset( "ontology.index.dir" ); } } } diff --git a/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java b/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java index 7430ce0c..3153017c 100644 --- a/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java +++ b/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java @@ -19,6 +19,7 @@ import ubic.basecode.ontology.AbstractOntologyTest; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.providers.GenericOntologyService; +import ubic.basecode.ontology.search.OntologySearchResult; import java.io.File; import java.io.InputStream; @@ -62,7 +63,7 @@ public void testCacheOntologyToDisk() throws Exception { assertFalse( OntologyLoader.getTmpDiskCachePath( name ).exists() ); assertFalse( OntologyLoader.getOldDiskCachePath( name ).exists() ); - Collection r = s.findTerm( "Mouse" ); + Collection> r = s.findTerm( "Mouse" ); assertFalse( r.isEmpty() ); // Recreate OntologyService using this cache file diff --git a/test/ubic/basecode/ontology/jena/OntologySearchTest.java b/test/ubic/basecode/ontology/jena/OntologySearchTest.java index 20bca5da..f04e316b 100644 --- a/test/ubic/basecode/ontology/jena/OntologySearchTest.java +++ b/test/ubic/basecode/ontology/jena/OntologySearchTest.java @@ -16,20 +16,21 @@ import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.shared.JenaException; import com.hp.hpl.jena.vocabulary.OWL2; +import com.hp.hpl.jena.vocabulary.RDFS; +import org.assertj.core.api.Assertions; import org.junit.Test; import ubic.basecode.ontology.AbstractOntologyTest; import java.io.InputStream; import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.Set; import java.util.zip.GZIPInputStream; import static java.util.Objects.requireNonNull; import static org.junit.Assert.*; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.*; /** * Most of these tests were moved over from Gemma. @@ -43,17 +44,32 @@ public final void testIndexing() throws Exception { InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/mged.owl.gz" ) ) ); OntModel model = OntologyLoader.loadMemoryModel( is, "owl-test", false ); - SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, true ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); - Collection> name = OntologySearch.matchClasses( model, index, "Bedding" ).toSet(); + Collection name = index.searchClasses( model, "Bedding" ).toSet(); - assertEquals( 2, name.size() ); + assertEquals( 1, name.size() ); index.close(); - index = OntologyIndexer.indexOntology( "MGEDTEST", model, true ); - name = OntologySearch.matchClasses( model, index, "Bedding" ).toSet(); + index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); + name = index.searchClasses( model, "Bedding" ).toSet(); - assertEquals( 2, name.size() ); + assertEquals( 1, name.size() ); + index.close(); + } + + @Test + public void testStemming() throws Exception { + InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/mged.owl.gz" ) ) ); + OntModel model = OntologyLoader.loadMemoryModel( is, "owl-test", false ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); + Set results = index.searchClasses( model, "bed" ).toSet(); + Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) + .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); index.close(); } @@ -69,22 +85,24 @@ public final void testOmitBadPredicates() throws Exception { model = OntologyLoader.loadMemoryModel( is, "NIFTEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "NIFTEST", model, true ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "NIFTEST", model, indexableProperties, Collections.emptySet(), true ); - Collection> name = OntologySearch.matchClasses( model, index, "Organ" ).toSet(); + Collection name = index.searchClasses( model, "Organ" ).toSet(); // for ( OntClass ontologyTerm : name ) { // log.debug( ontologyTerm.toString() ); // } // should get : Organ, Human Tissue and Organ Resource for Research, United Network for Organ Sharing assertEquals( 3, name.size() ); - name = OntologySearch.matchClasses( model, index, "Anatomical entity" ).toSet(); + name = index.searchClasses( model, "Anatomical entity" ).toSet(); // for ( OntClass ontologyTerm : name ) { // log.debug( ontologyTerm.toString() ); // } assertEquals( 1, name.size() ); - name = OntologySearch.matchClasses( model, index, "liver" ).toSet(); // this is an "example" that we want to avoid + name = index.searchClasses( model, "liver" ).toSet(); // this is an "example" that we want to avoid // leading to "Organ". // for ( OntClass ontologyTerm : name ) { @@ -106,16 +124,16 @@ public final void testOmitBadPredicates2() throws Exception { model = OntologyLoader.loadMemoryModel( is, "EFTEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "EFTEST", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "EFTEST", model, Collections.emptySet(), true ); // positive control - Collection> searchResults = OntologySearch.matchClasses( model, index, "monocyte" ).toSet(); + Collection searchResults = index.searchClasses( model, "monocyte" ).toSet(); assertFalse( "Should have found something for 'monocyte'", searchResults.isEmpty() ); assertEquals( 1, searchResults.size() ); // this is a "definition" that we want to avoid leading to "Monocyte". - searchResults = OntologySearch.matchClasses( model, index, "liver" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "liver" ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() ); } @@ -130,15 +148,15 @@ public final void testOmitDefinitions() throws Exception { model = OntologyLoader.loadMemoryModel( is, "DO_TEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "DO_TEST", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "DO_TEST", model, Collections.emptySet(), true ); // positive control - Set> searchResults = OntologySearch.matchClasses( model, index, "acute leukemia" ).toSet(); + Set searchResults = index.searchClasses( model, "acute leukemia" ).toSet(); assertFalse( "Should have found something for 'acute leukemia'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = OntologySearch.matchClasses( model, index, "liver" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "liver" ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() ); } @@ -153,33 +171,36 @@ public final void testOmitDefinitions2() throws Exception { model = OntologyLoader.loadMemoryModel( is, "NIFORG_TEST", false ); } - SearchIndex index = OntologyIndexer.indexOntology( "NIFORG_TEST", model, true ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + SearchIndex index = OntologyIndexer.indexOntology( "NIFORG_TEST", model, indexableProperties, Collections.emptySet(), true ); // positive control - Collection> searchResults = OntologySearch.matchClasses( model, index, "Mammal" ).toSet(); + Collection searchResults = index.searchClasses( model, "Mammal" ).toSet(); assertFalse( "Should have found something for 'Mammal'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = OntologySearch.matchClasses( model, index, "skin" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "skin" ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'skin'" ); } - searchResults = OntologySearch.matchClasses( model, index, "approximate" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "approximate" ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'approximate'" ); } - searchResults = OntologySearch.matchClasses( model, index, "Bug" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "Bug" ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'Bug'" ); } - searchResults = OntologySearch.matchClasses( model, index, "birnlex_2" ) + searchResults = index.searchClasses( model, "birnlex_2" ) .toSet(); - assertEquals( 1, searchResults.size() ); - assertTrue( searchResults.iterator().next().result.hasLiteral( OWL2.deprecated, true ) ); - + Assertions.assertThat( searchResults ).hasSize( 1 ).extracting( sr -> sr.result ) + .satisfiesOnlyOnce( r -> { + assertTrue( r.as( OntClass.class ).hasLiteral( OWL2.deprecated, true ) ); + } ); index.close(); } @@ -191,15 +212,15 @@ public final void testOmitDefinitions3() throws Exception { model = OntologyLoader.loadMemoryModel( is, "OBI_TEST" ); } - SearchIndex index = OntologyIndexer.indexOntology( "OBI_TEST", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "OBI_TEST", model, Collections.emptySet(), true ); // positive control - Set> searchResults = OntologySearch.matchClasses( model, index, "irradiation" ).toSet(); + Set searchResults = index.searchClasses( model, "irradiation" ).toSet(); assertFalse( "Should have found something for 'irradiation'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = OntologySearch.matchClasses( model, index, "skin" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "skin" ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'skin'" ); } @@ -213,15 +234,15 @@ public final void testOmitDefinitions4() throws Exception { OntModel model = OntologyLoader.loadMemoryModel( is, "NIFAN_TEST2", false ); is.close(); - SearchIndex index = OntologyIndexer.indexOntology( "NIFAN_TEST2", model, true ); + SearchIndex index = OntologyIndexer.indexOntology( "NIFAN_TEST2", model, Collections.emptySet(), true ); // positive control - Collection> searchResults = OntologySearch.matchClasses( model, index, "eye" ).toSet(); + Collection searchResults = index.searchClasses( model, "eye" ).toSet(); assertFalse( "Should have found something for 'eye'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "brain" - searchResults = OntologySearch.matchClasses( model, index, "muscle" ).toSet(); - for ( OntologySearch.SearchResult ontologyTerm : searchResults ) { + searchResults = index.searchClasses( model, "muscle" ).toSet(); + for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'muscle'" ); } @@ -234,60 +255,30 @@ public final void testPersistence() throws Exception { InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/mged.owl.gz" ) ) ); OntModel model = OntologyLoader.loadMemoryModel( is, "owl-test", false ); - SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, false ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + + SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), false ); index.close(); // now load it off disk - index = OntologyIndexer.getSubjectIndex( "MGEDTEST" ); + index = OntologyIndexer.getSubjectIndex( "MGEDTEST", indexableProperties, Collections.emptySet() ); assertNotNull( index ); - Collection> name = OntologySearch.matchClasses( model, index, "bedding" ).toSet(); - assertEquals( 2, name.size() ); + Collection name = index.searchClasses( model, "bedding" ).toSet(); + assertEquals( 1, name.size() ); // test wildcard. Works with stemmed term, wild card doesn't do anything - name = OntologySearch.matchClasses( model, index, "bed*" ).toSet(); + name = index.searchClasses( model, "bed*" ).toSet(); assertEquals( 2, name.size() ); // stemmed term. - name = OntologySearch.matchClasses( model, index, "bed" ).toSet(); - assertEquals( 2, name.size() ); + name = index.searchClasses( model, "bed" ).toSet(); + assertEquals( 1, name.size() ); - name = OntologySearch.matchClasses( model, index, "beddin*" ).toSet(); + name = index.searchClasses( model, "beddin*" ).toSet(); assertEquals( 2, name.size() ); index.close(); } - - @Test - public final void matchClasses_whenIndexRaisesJenaException_thenWrapItWithOntologyJenaSearchException() { - OntModel model = mock( OntModel.class ); - SearchIndex index = mock( SearchIndex.class ); - when( index.search( any() ) ).thenThrow( new JenaException( "Some random exception raised by Jena." ) ); - OntologySearchJenaException e = assertThrows( OntologySearchJenaException.class, () -> OntologySearch.matchClasses( model, index, "test" ) ); - assertEquals( "test", e.getQuery() ); - assertEquals( "Some random exception raised by Jena.", e.getCause().getMessage() ); - verify( index ).search( "test" ); - } - - @Test - public final void matchIndividuals_whenIndexRaisesJenaException_thenWrapItWithOntologyJenaSearchException() { - OntModel model = mock( OntModel.class ); - SearchIndex index = mock( SearchIndex.class ); - when( index.search( any() ) ).thenThrow( new JenaException( "Some random exception raised by Jena." ) ); - OntologySearchJenaException e = assertThrows( OntologySearchJenaException.class, () -> OntologySearch.matchIndividuals( model, index, "test" ) ); - assertEquals( "test", e.getQuery() ); - assertEquals( "Some random exception raised by Jena.", e.getCause().getMessage() ); - verify( index ).search( "test" ); - } - - @Test - public final void matchResources_whenIndexRaisesJenaException_thenWrapItWithOntologyJenaSearchException() { - OntModel model = mock( OntModel.class ); - SearchIndex index = mock( SearchIndex.class ); - when( index.search( any() ) ).thenThrow( new JenaException( "Some random exception raised by Jena." ) ); - OntologySearchJenaException e = assertThrows( OntologySearchJenaException.class, () -> OntologySearch.matchIndividuals( model, index, "test" ) ); - assertEquals( "test", e.getQuery() ); - assertEquals( "Some random exception raised by Jena.", e.getCause().getMessage() ); - verify( index ).search( "test" ); - } } diff --git a/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java b/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java index f0c41fc5..c0782332 100644 --- a/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java +++ b/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java @@ -3,12 +3,14 @@ import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModelSpec; +import com.hp.hpl.jena.vocabulary.RDFS; import org.junit.BeforeClass; import org.junit.Test; import ubic.basecode.ontology.search.OntologySearchException; import java.io.IOException; import java.io.InputStream; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -26,7 +28,9 @@ public class UberonOntologySearchTest { public static void setUpUberon() throws IOException { try ( InputStream is = new GZIPInputStream( requireNonNull( OntologySearchTest.class.getResourceAsStream( "/data/uberon.owl.gz" ) ) ) ) { uberon = OntologyLoader.loadMemoryModel( is, "UBERON_TEST2", true, OntModelSpec.OWL_MEM ); - uberonIndex = OntologyIndexer.indexOntology( "UBERON_TEST2", uberon, false ); + HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); + indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); + uberonIndex = OntologyIndexer.indexOntology( "UBERON_TEST2", uberon, indexableProperties, Collections.emptySet(), true ); } } @@ -34,7 +38,7 @@ public static void setUpUberon() throws IOException { public void testOmitDefinition() throws OntologySearchException { OntClass brain = uberon.getOntClass( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( brain ); - Set> searchResults = OntologySearch.matchClasses( uberon, uberonIndex, "brain" ).toSet(); + Set searchResults = uberonIndex.searchClasses( uberon, "brain" ).toSet(); assertEquals( 128, searchResults.size() ); } @@ -42,9 +46,8 @@ public void testOmitDefinition() throws OntologySearchException { public void testScore() throws OntologySearchException { OntClass brain = uberon.getOntClass( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( brain ); - List> searchResults = OntologySearch.matchClasses( uberon, uberonIndex, "brain" ).toList(); - assertEquals( 446, searchResults.size() ); - assertEquals( 3.33, searchResults.get( 0 ).score, 0.01 ); - assertEquals( 128, new HashSet<>( searchResults ).size() ); + List searchResults = uberonIndex.searchClasses( uberon, "brain" ).toList(); + assertEquals( 128, searchResults.size() ); + assertEquals( 3.85, searchResults.get( 0 ).score, 0.01 ); } } diff --git a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java index 09f25e7d..52e23fc0 100644 --- a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java @@ -22,6 +22,7 @@ import org.junit.Test; import ubic.basecode.ontology.AbstractOntologyTest; import ubic.basecode.ontology.model.OntologyTerm; +import ubic.basecode.ontology.search.OntologySearchResult; import ubic.basecode.util.Configuration; import java.net.URL; @@ -44,7 +45,7 @@ public void testGenericOntologyServiceMem() throws Exception { s1.initialize( true, false ); GenericOntologyService s = s1; - Collection r = s.findTerm( "Mouse" ); + Collection> r = s.findTerm( "Mouse" ); assertFalse( r.isEmpty() ); } diff --git a/test/ubic/basecode/ontology/providers/ObiServiceTest.java b/test/ubic/basecode/ontology/providers/ObiServiceTest.java index 6dd8752d..2ae4f95e 100644 --- a/test/ubic/basecode/ontology/providers/ObiServiceTest.java +++ b/test/ubic/basecode/ontology/providers/ObiServiceTest.java @@ -19,6 +19,7 @@ import ubic.basecode.ontology.model.OntologyIndividual; import ubic.basecode.ontology.model.OntologyResource; import ubic.basecode.ontology.model.OntologyTerm; +import ubic.basecode.ontology.search.OntologySearchResult; import java.util.Collection; @@ -38,13 +39,13 @@ public void testLoadAndSearch() throws Exception { assertTrue( m.isOntologyLoaded() ); - Collection hits = m.findTerm( "batch" ); + Collection> hits = m.findTerm( "batch" ); assertFalse( hits.isEmpty() ); - Collection ihits = m.findIndividuals( "batch" ); + Collection> ihits = m.findIndividuals( "batch" ); assertFalse( ihits.isEmpty() ); - Collection rhits = m.findResources( "batch" ); + Collection> rhits = m.findResources( "batch" ); assertFalse( rhits.isEmpty() ); } } diff --git a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java index 9ae9754e..709d79f5 100644 --- a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java @@ -7,11 +7,13 @@ import ubic.basecode.ontology.OntologyTermTest; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.search.OntologySearchException; +import ubic.basecode.ontology.search.OntologySearchResult; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.Collection; +import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import static java.util.Objects.requireNonNull; @@ -100,8 +102,8 @@ public void testGetChildrenFromMultipleTerms() { @Test public void testGetChildrenFromMultipleTermsWithSearch() throws OntologySearchException { - Collection terms = uberon.findTerm( "brain" ); - Collection matches = uberon.getChildren( terms, false, true ); + Collection> terms = uberon.findTerm( "brain" ); + Collection matches = uberon.getChildren( terms.stream().map( OntologySearchResult::getResult ).collect( Collectors.toSet() ), false, true ); assertEquals( 1870, matches.size() ); } @@ -109,8 +111,8 @@ public void testGetChildrenFromMultipleTermsWithSearch() throws OntologySearchEx public void testFindTerm() throws OntologySearchException { assertEquals( 123, uberon.findTerm( "brain" ).size() ); assertEquals( 128, uberon.findTerm( "brain", true ).size() ); - OntologyTerm firstResult = uberon.findTerm( "brain" ).iterator().next(); - assertNotNull( firstResult.getScore() ); + OntologySearchResult firstResult = uberon.findTerm( "brain" ).iterator().next(); + assertNotNull( firstResult ); assertEquals( 2.8577, firstResult.getScore(), 0.0001 ); } } From 975cd6122ca3a19096823cea22b33024859a48ec Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 25 Apr 2024 14:24:29 -0700 Subject: [PATCH 07/19] ontology: Add an option to specify the maximum number of search results --- .../jena/AbstractOntologyService.java | 12 ++--- .../ontology/jena/OntologyIndexer.java | 54 +++++++++++++++---- .../basecode/ontology/jena/SearchIndex.java | 39 +++----------- .../ontology/providers/OntologyService.java | 18 +++---- .../ontology/jena/OntologyLoaderTest.java | 4 +- .../ontology/jena/OntologySearchTest.java | 46 ++++++++-------- .../jena/UberonOntologySearchTest.java | 4 +- .../providers/GenericOntologyServiceTest.java | 2 +- .../ontology/providers/ObiServiceTest.java | 6 +-- .../providers/UberonOntologyServiceTest.java | 8 +-- 10 files changed, 99 insertions(+), 94 deletions(-) diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java index 7a430f9c..0cd59145 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java @@ -303,7 +303,7 @@ private static boolean hasCauseMatching( Throwable t, Predicate predi } @Override - public Set> findIndividuals( String search, boolean keepObsoletes ) throws + public Set> findIndividuals( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException { State state = this.state; if ( state == null ) { @@ -314,14 +314,14 @@ public Set> findIndividuals( String sea log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); return Collections.emptySet(); } - return state.index.searchIndividuals( state.model, search ) + return state.index.searchIndividuals( state.model, search, maxResults ) .mapWith( i -> new OntologySearchResult<>( ( OntologyIndividual ) new OntologyIndividualImpl( i.result.as( Individual.class ), state.additionalRestrictions ), i.score ) ) .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) .toSet(); } @Override - public Collection> findResources( String searchString, boolean keepObsoletes ) throws + public Collection> findResources( String searchString, int maxResults, boolean keepObsoletes ) throws OntologySearchException { State state = this.state; if ( state == null ) { @@ -332,7 +332,7 @@ public Collection> findResources( String log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); return Collections.emptySet(); } - return state.index.searchResources( state.model, searchString ) + return state.index.search( state.model, searchString, maxResults ) .filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) ) .mapWith( r -> { try { @@ -354,7 +354,7 @@ public Collection> findResources( String } @Override - public Collection> findTerm( String search, boolean keepObsoletes ) throws OntologySearchException { + public Collection> findTerm( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException { State state = this.state; if ( state == null ) { log.warn( "Ontology {} is not ready, no terms will be returned.", this ); @@ -364,7 +364,7 @@ public Collection> findTerm( String search, b log.warn( "Attempt to search {} when index is null, no results will be returned.", this ); return Collections.emptySet(); } - return state.index.searchClasses( state.model, search ) + return state.index.searchClasses( state.model, search, maxResults ) .mapWith( r -> new OntologySearchResult<>( ( OntologyTerm ) new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions ), r.score ) ) .filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) ) .toSet(); diff --git a/src/ubic/basecode/ontology/jena/OntologyIndexer.java b/src/ubic/basecode/ontology/jena/OntologyIndexer.java index 59e6ca70..9eadb5af 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndexer.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndexer.java @@ -18,9 +18,12 @@ */ package ubic.basecode.ontology.jena; -import com.hp.hpl.jena.graph.NodeFactory; import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.*; +import com.hp.hpl.jena.ontology.OntResource; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.shared.JenaException; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.util.iterator.WrappedIterator; @@ -32,15 +35,14 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -71,7 +73,9 @@ class OntologyIndexer { */ private static final String ID_FIELD = "_ID", - LOCAL_NAME_FIELD = "_LOCAL_NAME"; + LOCAL_NAME_FIELD = "_LOCAL_NAME", + IS_CLASS_FIELD = "_IS_CLASS", + IS_INDIVIDUAL_FIELD = "_IS_INDIVIDUAL"; public static class IndexableProperty { private final Property property; @@ -203,6 +207,16 @@ private static Directory index( String name, OntModel model, Analyzer analyzer, Document doc = new Document(); doc.add( new Field( ID_FIELD, id, Field.Store.YES, Field.Index.NOT_ANALYZED ) ); doc.add( new Field( LOCAL_NAME_FIELD, subject.getLocalName(), Field.Store.NO, Field.Index.NOT_ANALYZED ) ); + boolean isClass, isIndividual; + if ( subject.canAs( OntResource.class ) ) { + isClass = subject.as( OntResource.class ).isClass(); + isIndividual = subject.as( OntResource.class ).isIndividual(); + } else { + isClass = false; + isIndividual = false; + } + doc.add( new NumericField( IS_CLASS_FIELD ).setIntValue( isClass ? 1 : 0 ) ); + doc.add( new NumericField( IS_INDIVIDUAL_FIELD ).setIntValue( isIndividual ? 1 : 0 ) ); for ( IndexableProperty prop : indexableProperties ) { StmtIterator listStatements = subject.listProperties( prop.property ); while ( listStatements.hasNext() ) { @@ -242,26 +256,44 @@ public LuceneSearchIndex( String[] searchableFields, IndexReader index, Analyzer } @Override - public ExtendedIterator search( OntModel model, String queryString ) throws OntologySearchException { + public ExtendedIterator search( OntModel model, String queryString, int maxResults ) throws OntologySearchException { + return search( model, queryString, null, maxResults ); + } + + @Override + public ExtendedIterator searchClasses( OntModel model, String queryString, int maxResults ) throws OntologySearchException { + return search( model, queryString, NumericRangeFilter.newIntRange( IS_CLASS_FIELD, 1, 1, true, true ), maxResults ); + } + + @Override + public ExtendedIterator searchIndividuals( OntModel model, String queryString, int maxResults ) throws OntologySearchException { + return search( model, queryString, NumericRangeFilter.newIntRange( IS_INDIVIDUAL_FIELD, 1, 1, true, true ), maxResults ); + } + + private ExtendedIterator search( OntModel model, String queryString, @Nullable Filter filter, int maxResults ) throws OntologySearchException { if ( StringUtils.isBlank( queryString ) ) { throw new IllegalArgumentException( "Query cannot be blank" ); } StopWatch timer = StopWatch.createStarted(); try { Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString ); - TopDocs hits = new IndexSearcher( index ).search( query, 500 ); // in general, results are found in both regular and std index, so we divide by 2 the initial capacity + // we also have to double the number of hits to account for duplicates + TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 3 ); Set seenIds = new HashSet<>( hits.totalHits / 2 ); List resources = new ArrayList<>( hits.totalHits / 2 ); - for ( int i = 0; i < hits.totalHits; i++ ) { + for ( int i = 0; i < hits.scoreDocs.length; i++ ) { Document doc = index.document( hits.scoreDocs[i].doc ); String id = doc.get( ID_FIELD ); if ( seenIds.contains( id ) ) { continue; } - RDFNode node = model.getRDFNode( NodeFactory.createURI( id ) ); - resources.add( new JenaSearchResult( node, hits.scoreDocs[i].score ) ); + Resource res = model.getResource( id ); + resources.add( new JenaSearchResult( res, hits.scoreDocs[i].score ) ); seenIds.add( id ); + if ( seenIds.size() >= maxResults ) { + break; + } } return WrappedIterator.create( resources.iterator() ); } catch ( ParseException e ) { diff --git a/src/ubic/basecode/ontology/jena/SearchIndex.java b/src/ubic/basecode/ontology/jena/SearchIndex.java index cf868858..7b6a3b73 100644 --- a/src/ubic/basecode/ontology/jena/SearchIndex.java +++ b/src/ubic/basecode/ontology/jena/SearchIndex.java @@ -1,22 +1,16 @@ package ubic.basecode.ontology.jena; -import com.hp.hpl.jena.ontology.Individual; -import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import ubic.basecode.ontology.search.OntologySearchException; -import java.util.Objects; - -import static ubic.basecode.ontology.jena.JenaUtils.where; - interface SearchIndex extends AutoCloseable { /** * Find RDF nodes matching the given query string. */ - ExtendedIterator search( OntModel model, String queryString ) throws OntologySearchException; + ExtendedIterator search( OntModel model, String queryString, int maxResults ) throws OntologySearchException; /** * Find classes that match the query string. @@ -24,11 +18,7 @@ interface SearchIndex extends AutoCloseable { * @param model that goes with the index * @return Collection of OntologyTerm objects */ - default ExtendedIterator searchClasses( OntModel model, String queryString ) throws OntologySearchException { - return search( model, queryString ) - .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( OntClass.class ) ) ) - .filterKeep( where( Objects::nonNull ) ); - } + ExtendedIterator searchClasses( OntModel model, String queryString, int maxResults ) throws OntologySearchException; /** * Find individuals that match the query string @@ -36,31 +26,14 @@ default ExtendedIterator searchClasses( OntModel model, String * @param model that goes with the index * @return Collection of OntologyTerm objects */ - default ExtendedIterator searchIndividuals( OntModel model, String queryString ) throws OntologySearchException { - return search( model, queryString ) - .filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( Individual.class ) ) ) - .filterKeep( where( Objects::nonNull ) ); - } - - /** - * Find OntologyIndividuals and OntologyTerms that match the query string. Search with a wildcard is attempted - * whenever possible. - * - * @param model that goes with the index - * @return Collection of OntologyResource objects - */ - default ExtendedIterator searchResources( OntModel model, String queryString ) throws OntologySearchException { - return search( model, queryString ) - .filterKeep( where( o -> o.result.isURIResource() && o.result.isResource() ) ) - .filterKeep( where( Objects::nonNull ) ); - } + ExtendedIterator searchIndividuals( OntModel model, String queryString, int maxResults ) throws OntologySearchException; class JenaSearchResult { - public final RDFNode result; + public final Resource result; public final double score; - JenaSearchResult( RDFNode result, double score ) { + JenaSearchResult( Resource result, double score ) { this.result = result; this.score = score; } diff --git a/src/ubic/basecode/ontology/providers/OntologyService.java b/src/ubic/basecode/ontology/providers/OntologyService.java index 0729d850..7dbe4386 100644 --- a/src/ubic/basecode/ontology/providers/OntologyService.java +++ b/src/ubic/basecode/ontology/providers/OntologyService.java @@ -183,8 +183,8 @@ enum InferenceMode { *

* Obsolete terms are filtered out. */ - default Collection> findIndividuals( String search ) throws OntologySearchException { - return findIndividuals( search, false ); + default Collection> findIndividuals( String search, int maxResults ) throws OntologySearchException { + return findIndividuals( search, maxResults, false ); } /** @@ -193,7 +193,7 @@ default Collection> findIndividuals( St * @param search search query * @param keepObsoletes retain obsolete terms */ - Set> findIndividuals( String search, boolean keepObsoletes ) throws OntologySearchException; + Set> findIndividuals( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException; /** * Looks for any resources (terms or individuals) that match the given search string @@ -203,8 +203,8 @@ default Collection> findIndividuals( St * @return results, or an empty collection if the results are empty OR the ontology is not available to be * searched. */ - default Collection> findResources( String searchString ) throws OntologySearchException { - return findResources( searchString, false ); + default Collection> findResources( String searchString, int maxResults ) throws OntologySearchException { + return findResources( searchString, maxResults, false ); } /** @@ -213,15 +213,15 @@ default Collection> findResources( String * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection> findResources( String search, boolean keepObsoletes ) throws OntologySearchException; + Collection> findResources( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException; /** * Looks for any terms that match the given search string. *

* Obsolete terms are filtered out. */ - default Collection> findTerm( String search ) throws OntologySearchException { - return findTerm( search, false ); + default Collection> findTerm( String search, int maxResults ) throws OntologySearchException { + return findTerm( search, maxResults, false ); } @@ -231,7 +231,7 @@ default Collection> findTerm( String search ) * @param search search query * @param keepObsoletes retain obsolete terms */ - Collection> findTerm( String search, boolean keepObsoletes ) throws OntologySearchException; + Collection> findTerm( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException; /** * Find a term using an alternative ID. diff --git a/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java b/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java index 3153017c..5260bdbf 100644 --- a/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java +++ b/test/ubic/basecode/ontology/jena/OntologyLoaderTest.java @@ -63,7 +63,7 @@ public void testCacheOntologyToDisk() throws Exception { assertFalse( OntologyLoader.getTmpDiskCachePath( name ).exists() ); assertFalse( OntologyLoader.getOldDiskCachePath( name ).exists() ); - Collection> r = s.findTerm( "Mouse" ); + Collection> r = s.findTerm( "Mouse", 500 ); assertFalse( r.isEmpty() ); // Recreate OntologyService using this cache file @@ -74,7 +74,7 @@ public void testCacheOntologyToDisk() throws Exception { assertFalse( OntologyLoader.getTmpDiskCachePath( name ).exists() ); assertFalse( OntologyLoader.getOldDiskCachePath( name ).exists() ); - r = s.findTerm( "Mouse" ); + r = s.findTerm( "Mouse", 500 ); assertFalse( r.isEmpty() ); // Recreate OntologyService with bad URL and no cache diff --git a/test/ubic/basecode/ontology/jena/OntologySearchTest.java b/test/ubic/basecode/ontology/jena/OntologySearchTest.java index f04e316b..4152b1e1 100644 --- a/test/ubic/basecode/ontology/jena/OntologySearchTest.java +++ b/test/ubic/basecode/ontology/jena/OntologySearchTest.java @@ -48,13 +48,13 @@ public final void testIndexing() throws Exception { indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); - Collection name = index.searchClasses( model, "Bedding" ).toSet(); + Collection name = index.searchClasses( model, "Bedding", 500 ).toSet(); assertEquals( 1, name.size() ); index.close(); index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); - name = index.searchClasses( model, "Bedding" ).toSet(); + name = index.searchClasses( model, "Bedding", 500 ).toSet(); assertEquals( 1, name.size() ); index.close(); @@ -67,7 +67,7 @@ public void testStemming() throws Exception { HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); - Set results = index.searchClasses( model, "bed" ).toSet(); + Set results = index.searchClasses( model, "bed", 500 ).toSet(); Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); index.close(); @@ -89,20 +89,20 @@ public final void testOmitBadPredicates() throws Exception { indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); SearchIndex index = OntologyIndexer.indexOntology( "NIFTEST", model, indexableProperties, Collections.emptySet(), true ); - Collection name = index.searchClasses( model, "Organ" ).toSet(); + Collection name = index.searchClasses( model, "Organ", 500 ).toSet(); // for ( OntClass ontologyTerm : name ) { // log.debug( ontologyTerm.toString() ); // } // should get : Organ, Human Tissue and Organ Resource for Research, United Network for Organ Sharing assertEquals( 3, name.size() ); - name = index.searchClasses( model, "Anatomical entity" ).toSet(); + name = index.searchClasses( model, "Anatomical entity", 500 ).toSet(); // for ( OntClass ontologyTerm : name ) { // log.debug( ontologyTerm.toString() ); // } assertEquals( 1, name.size() ); - name = index.searchClasses( model, "liver" ).toSet(); // this is an "example" that we want to avoid + name = index.searchClasses( model, "liver", 500 ).toSet(); // this is an "example" that we want to avoid // leading to "Organ". // for ( OntClass ontologyTerm : name ) { @@ -127,12 +127,12 @@ public final void testOmitBadPredicates2() throws Exception { SearchIndex index = OntologyIndexer.indexOntology( "EFTEST", model, Collections.emptySet(), true ); // positive control - Collection searchResults = index.searchClasses( model, "monocyte" ).toSet(); + Collection searchResults = index.searchClasses( model, "monocyte", 500 ).toSet(); assertFalse( "Should have found something for 'monocyte'", searchResults.isEmpty() ); assertEquals( 1, searchResults.size() ); // this is a "definition" that we want to avoid leading to "Monocyte". - searchResults = index.searchClasses( model, "liver" ).toSet(); + searchResults = index.searchClasses( model, "liver", 500 ).toSet(); for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() ); } @@ -151,11 +151,11 @@ public final void testOmitDefinitions() throws Exception { SearchIndex index = OntologyIndexer.indexOntology( "DO_TEST", model, Collections.emptySet(), true ); // positive control - Set searchResults = index.searchClasses( model, "acute leukemia" ).toSet(); + Set searchResults = index.searchClasses( model, "acute leukemia", 500 ).toSet(); assertFalse( "Should have found something for 'acute leukemia'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = index.searchClasses( model, "liver" ).toSet(); + searchResults = index.searchClasses( model, "liver", 500 ).toSet(); for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() ); } @@ -176,26 +176,26 @@ public final void testOmitDefinitions2() throws Exception { SearchIndex index = OntologyIndexer.indexOntology( "NIFORG_TEST", model, indexableProperties, Collections.emptySet(), true ); // positive control - Collection searchResults = index.searchClasses( model, "Mammal" ).toSet(); + Collection searchResults = index.searchClasses( model, "Mammal", 500 ).toSet(); assertFalse( "Should have found something for 'Mammal'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = index.searchClasses( model, "skin" ).toSet(); + searchResults = index.searchClasses( model, "skin", 500 ).toSet(); for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'skin'" ); } - searchResults = index.searchClasses( model, "approximate" ).toSet(); + searchResults = index.searchClasses( model, "approximate", 500 ).toSet(); for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'approximate'" ); } - searchResults = index.searchClasses( model, "Bug" ).toSet(); + searchResults = index.searchClasses( model, "Bug", 500 ).toSet(); for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'Bug'" ); } - searchResults = index.searchClasses( model, "birnlex_2" ) + searchResults = index.searchClasses( model, "birnlex_2", 500 ) .toSet(); Assertions.assertThat( searchResults ).hasSize( 1 ).extracting( sr -> sr.result ) .satisfiesOnlyOnce( r -> { @@ -215,11 +215,11 @@ public final void testOmitDefinitions3() throws Exception { SearchIndex index = OntologyIndexer.indexOntology( "OBI_TEST", model, Collections.emptySet(), true ); // positive control - Set searchResults = index.searchClasses( model, "irradiation" ).toSet(); + Set searchResults = index.searchClasses( model, "irradiation", 500 ).toSet(); assertFalse( "Should have found something for 'irradiation'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "acute leukemia". - searchResults = index.searchClasses( model, "skin" ).toSet(); + searchResults = index.searchClasses( model, "skin", 500 ).toSet(); for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'skin'" ); } @@ -237,11 +237,11 @@ public final void testOmitDefinitions4() throws Exception { SearchIndex index = OntologyIndexer.indexOntology( "NIFAN_TEST2", model, Collections.emptySet(), true ); // positive control - Collection searchResults = index.searchClasses( model, "eye" ).toSet(); + Collection searchResults = index.searchClasses( model, "eye", 500 ).toSet(); assertFalse( "Should have found something for 'eye'", searchResults.isEmpty() ); // this is a "definition" that we want to avoid leading to "brain" - searchResults = index.searchClasses( model, "muscle" ).toSet(); + searchResults = index.searchClasses( model, "muscle", 500 ).toSet(); for ( SearchIndex.JenaSearchResult ontologyTerm : searchResults ) { fail( "Should not have found " + ontologyTerm.toString() + " for 'muscle'" ); } @@ -266,18 +266,18 @@ public final void testPersistence() throws Exception { assertNotNull( index ); - Collection name = index.searchClasses( model, "bedding" ).toSet(); + Collection name = index.searchClasses( model, "bedding", 500 ).toSet(); assertEquals( 1, name.size() ); // test wildcard. Works with stemmed term, wild card doesn't do anything - name = index.searchClasses( model, "bed*" ).toSet(); + name = index.searchClasses( model, "bed*", 500 ).toSet(); assertEquals( 2, name.size() ); // stemmed term. - name = index.searchClasses( model, "bed" ).toSet(); + name = index.searchClasses( model, "bed", 500 ).toSet(); assertEquals( 1, name.size() ); - name = index.searchClasses( model, "beddin*" ).toSet(); + name = index.searchClasses( model, "beddin*", 500 ).toSet(); assertEquals( 2, name.size() ); index.close(); } diff --git a/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java b/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java index c0782332..7c325c5d 100644 --- a/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java +++ b/test/ubic/basecode/ontology/jena/UberonOntologySearchTest.java @@ -38,7 +38,7 @@ public static void setUpUberon() throws IOException { public void testOmitDefinition() throws OntologySearchException { OntClass brain = uberon.getOntClass( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( brain ); - Set searchResults = uberonIndex.searchClasses( uberon, "brain" ).toSet(); + Set searchResults = uberonIndex.searchClasses( uberon, "brain", 500 ).toSet(); assertEquals( 128, searchResults.size() ); } @@ -46,7 +46,7 @@ public void testOmitDefinition() throws OntologySearchException { public void testScore() throws OntologySearchException { OntClass brain = uberon.getOntClass( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( brain ); - List searchResults = uberonIndex.searchClasses( uberon, "brain" ).toList(); + List searchResults = uberonIndex.searchClasses( uberon, "brain", 500 ).toList(); assertEquals( 128, searchResults.size() ); assertEquals( 3.85, searchResults.get( 0 ).score, 0.01 ); } diff --git a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java index 52e23fc0..2b9f51ae 100644 --- a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java @@ -45,7 +45,7 @@ public void testGenericOntologyServiceMem() throws Exception { s1.initialize( true, false ); GenericOntologyService s = s1; - Collection> r = s.findTerm( "Mouse" ); + Collection> r = s.findTerm( "Mouse", 500 ); assertFalse( r.isEmpty() ); } diff --git a/test/ubic/basecode/ontology/providers/ObiServiceTest.java b/test/ubic/basecode/ontology/providers/ObiServiceTest.java index 2ae4f95e..032df53e 100644 --- a/test/ubic/basecode/ontology/providers/ObiServiceTest.java +++ b/test/ubic/basecode/ontology/providers/ObiServiceTest.java @@ -39,13 +39,13 @@ public void testLoadAndSearch() throws Exception { assertTrue( m.isOntologyLoaded() ); - Collection> hits = m.findTerm( "batch" ); + Collection> hits = m.findTerm( "batch", 500 ); assertFalse( hits.isEmpty() ); - Collection> ihits = m.findIndividuals( "batch" ); + Collection> ihits = m.findIndividuals( "batch", 500 ); assertFalse( ihits.isEmpty() ); - Collection> rhits = m.findResources( "batch" ); + Collection> rhits = m.findResources( "batch", 500 ); assertFalse( rhits.isEmpty() ); } } diff --git a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java index 709d79f5..ee415186 100644 --- a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java @@ -102,16 +102,16 @@ public void testGetChildrenFromMultipleTerms() { @Test public void testGetChildrenFromMultipleTermsWithSearch() throws OntologySearchException { - Collection> terms = uberon.findTerm( "brain" ); + Collection> terms = uberon.findTerm( "brain", 500 ); Collection matches = uberon.getChildren( terms.stream().map( OntologySearchResult::getResult ).collect( Collectors.toSet() ), false, true ); assertEquals( 1870, matches.size() ); } @Test public void testFindTerm() throws OntologySearchException { - assertEquals( 123, uberon.findTerm( "brain" ).size() ); - assertEquals( 128, uberon.findTerm( "brain", true ).size() ); - OntologySearchResult firstResult = uberon.findTerm( "brain" ).iterator().next(); + assertEquals( 123, uberon.findTerm( "brain", 500 ).size() ); + assertEquals( 128, uberon.findTerm( "brain", 500, true ).size() ); + OntologySearchResult firstResult = uberon.findTerm( "brain", 500 ).iterator().next(); assertNotNull( firstResult ); assertEquals( 2.8577, firstResult.getScore(), 0.0001 ); } From 5a31f6f51b971626d1fa5805302cc0bc86c0d2d5 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 26 Apr 2024 11:40:59 -0700 Subject: [PATCH 08/19] More cleanups Eliminate AbstractOntologyMemoryBackedService and introduce two abstract class in the providers package: one for built-in baseCode ontologies and another for general use. --- .../AbstractOntologyMemoryBackedService.java | 91 ------------------- .../jena/AbstractOntologyService.java | 84 ++++++++++++----- .../ontology/jena/OntologyLoader.java | 6 +- .../AbstractBaseCodeOntologyService.java | 48 ++++++++++ .../providers/AbstractOntologyService.java | 9 ++ .../providers/CellLineOntologyService.java | 26 ++---- .../providers/CellTypeOntologyService.java | 28 ++---- .../providers/ChebiOntologyService.java | 27 ++---- .../providers/DiseaseOntologyService.java | 26 ++---- .../ExperimentalFactorOntologyService.java | 24 ++--- .../providers/FMAOntologyService.java | 29 ++---- .../providers/GenericOntologyService.java | 42 +++++++-- .../HumanDevelopmentOntologyService.java | 27 ++---- .../HumanPhenotypeOntologyService.java | 27 ++---- .../MammalianPhenotypeOntologyService.java | 27 ++---- .../providers/MedicOntologyService.java | 11 +-- .../MouseDevelopmentOntologyService.java | 25 ++--- .../providers/NIFSTDOntologyService.java | 10 +- .../ontology/providers/ObiService.java | 25 ++--- .../ontology/providers/OntologyService.java | 7 +- .../providers/SequenceOntologyService.java | 27 ++---- .../providers/UberonOntologyService.java | 27 ++---- .../providers/UnitsOntologyService.java | 27 ++---- .../ontology/providers/package-info.java | 6 ++ .../ontology/jena/OntologySearchTest.java | 13 +++ 25 files changed, 291 insertions(+), 408 deletions(-) delete mode 100644 src/ubic/basecode/ontology/jena/AbstractOntologyMemoryBackedService.java create mode 100644 src/ubic/basecode/ontology/providers/AbstractBaseCodeOntologyService.java create mode 100644 src/ubic/basecode/ontology/providers/AbstractOntologyService.java diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyMemoryBackedService.java b/src/ubic/basecode/ontology/jena/AbstractOntologyMemoryBackedService.java deleted file mode 100644 index 32b96f3b..00000000 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyMemoryBackedService.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * The baseCode project - * - * Copyright (c) 2013 University of British Columbia - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on - * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the - * specific language governing permissions and limitations under the License. - */ -package ubic.basecode.ontology.jena; - -import com.hp.hpl.jena.ontology.OntModelSpec; -import com.hp.hpl.jena.ontology.ProfileRegistry; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.reasoner.ReasonerFactory; -import com.hp.hpl.jena.reasoner.rulesys.OWLFBRuleReasonerFactory; -import com.hp.hpl.jena.reasoner.rulesys.OWLMicroReasonerFactory; -import com.hp.hpl.jena.reasoner.rulesys.OWLMiniReasonerFactory; -import com.hp.hpl.jena.reasoner.transitiveReasoner.TransitiveReasonerFactory; -import ubic.basecode.ontology.model.OntologyModel; -import ubic.basecode.util.Configuration; - -import java.io.IOException; -import java.io.InputStream; - -/** - * This class has some stuff that's specific to in-memory ontologies. Unlike database backed ontologies we don't use a - * pool keeping only one instance of model in memory. - * - * @author paul - */ -public abstract class AbstractOntologyMemoryBackedService extends AbstractOntologyService { - - @Override - protected String getOntologyUrl() { - return Configuration.getString( "url." + getOntologyName() ); - } - - @Override - protected OntologyModel loadModel( boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { - return new OntologyModelImpl( OntologyLoader.loadMemoryModel( this.getOntologyUrl(), this.getCacheName(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); - } - - @Override - protected OntologyModel loadModelFromStream( InputStream is, boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { - return new OntologyModelImpl( OntologyLoader.loadMemoryModel( is, this.getOntologyUrl(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); - } - - private OntModelSpec getSpec( LanguageLevel languageLevel, InferenceMode inferenceMode ) { - String profile; - switch ( languageLevel ) { - case FULL: - profile = ProfileRegistry.OWL_LANG; - break; - case DL: - profile = ProfileRegistry.OWL_DL_LANG; - break; - case LITE: - profile = ProfileRegistry.OWL_LITE_LANG; - break; - default: - throw new UnsupportedOperationException( String.format( "Unsupported OWL language level %s.", languageLevel ) ); - } - ReasonerFactory reasonerFactory; - switch ( inferenceMode ) { - case FULL: - reasonerFactory = OWLFBRuleReasonerFactory.theInstance(); - break; - case MINI: - reasonerFactory = OWLMiniReasonerFactory.theInstance(); - break; - case MICRO: - reasonerFactory = OWLMicroReasonerFactory.theInstance(); - break; - case TRANSITIVE: - reasonerFactory = TransitiveReasonerFactory.theInstance(); - break; - case NONE: - reasonerFactory = null; - break; - default: - throw new UnsupportedOperationException( String.format( "Unsupported inference level %s.", inferenceMode ) ); - } - return new OntModelSpec( ModelFactory.createMemModelMaker(), null, reasonerFactory, profile ); - } -} diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java index 0cd59145..778eb014 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java @@ -20,12 +20,14 @@ package ubic.basecode.ontology.jena; import com.hp.hpl.jena.ontology.*; -import com.hp.hpl.jena.rdf.model.NodeIterator; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.rdfxml.xmlinput.ARPErrorNumbers; import com.hp.hpl.jena.rdfxml.xmlinput.ParseException; +import com.hp.hpl.jena.reasoner.ReasonerFactory; +import com.hp.hpl.jena.reasoner.rulesys.OWLFBRuleReasonerFactory; +import com.hp.hpl.jena.reasoner.rulesys.OWLMicroReasonerFactory; +import com.hp.hpl.jena.reasoner.rulesys.OWLMiniReasonerFactory; +import com.hp.hpl.jena.reasoner.transitiveReasoner.TransitiveReasonerFactory; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.vocabulary.DC_11; import org.apache.commons.lang3.RandomStringUtils; @@ -40,7 +42,6 @@ import ubic.basecode.ontology.providers.OntologyService; import ubic.basecode.ontology.search.OntologySearchException; import ubic.basecode.ontology.search.OntologySearchResult; -import ubic.basecode.util.Configuration; import javax.annotation.Nullable; import java.io.IOException; @@ -472,13 +473,8 @@ public Set getChildren( Collection terms, boolean di @Override public boolean isEnabled() { - // quick path: just lookup the configuration - String configParameter = "load." + getOntologyName(); - if ( Boolean.TRUE.equals( Configuration.getBoolean( configParameter ) ) ) { - return true; - } // could have forced, without setting config - return getState().isPresent(); + return isOntologyEnabled() || isOntologyLoaded() || isInitializationThreadAlive(); } @Override @@ -549,16 +545,9 @@ public void waitForInitializationThread() throws InterruptedException { protected abstract String getOntologyUrl(); /** - * Delegates the call as to load the model into memory or leave it on disk. Simply delegates to either - * OntologyLoader.loadMemoryModel( url ); OR OntologyLoader.loadPersistentModel( url, spec ); - */ - protected abstract OntologyModel loadModel( boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException; - - - /** - * Load a model from a given input stream. + * Indicate if this ontology is enabled. */ - protected abstract OntologyModel loadModelFromStream( InputStream stream, boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException; + protected abstract boolean isOntologyEnabled(); /** * A name for caching this ontology, or null to disable caching. @@ -566,8 +555,59 @@ public void waitForInitializationThread() throws InterruptedException { * Note that if null is returned, the ontology will not have full-text search capabilities. */ @Nullable - protected String getCacheName() { - return getOntologyName(); + protected abstract String getCacheName(); + + /** + * Delegates the call as to load the model into memory or leave it on disk. Simply delegates to either + * OntologyLoader.loadMemoryModel( url ); OR OntologyLoader.loadPersistentModel( url, spec ); + */ + protected OntologyModel loadModel( boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { + return new OntologyModelImpl( OntologyLoader.loadMemoryModel( this.getOntologyUrl(), this.getCacheName(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); + } + + /** + * Load a model from a given input stream. + */ + protected OntologyModel loadModelFromStream( InputStream is, boolean processImports, LanguageLevel languageLevel, InferenceMode inferenceMode ) throws IOException { + return new OntologyModelImpl( OntologyLoader.loadMemoryModel( is, this.getOntologyUrl(), processImports, this.getSpec( languageLevel, inferenceMode ) ) ); + } + + private OntModelSpec getSpec( LanguageLevel languageLevel, InferenceMode inferenceMode ) { + String profile; + switch ( languageLevel ) { + case FULL: + profile = ProfileRegistry.OWL_LANG; + break; + case DL: + profile = ProfileRegistry.OWL_DL_LANG; + break; + case LITE: + profile = ProfileRegistry.OWL_LITE_LANG; + break; + default: + throw new UnsupportedOperationException( String.format( "Unsupported OWL language level %s.", languageLevel ) ); + } + ReasonerFactory reasonerFactory; + switch ( inferenceMode ) { + case FULL: + reasonerFactory = OWLFBRuleReasonerFactory.theInstance(); + break; + case MINI: + reasonerFactory = OWLMiniReasonerFactory.theInstance(); + break; + case MICRO: + reasonerFactory = OWLMicroReasonerFactory.theInstance(); + break; + case TRANSITIVE: + reasonerFactory = TransitiveReasonerFactory.theInstance(); + break; + case NONE: + reasonerFactory = null; + break; + default: + throw new UnsupportedOperationException( String.format( "Unsupported inference level %s.", inferenceMode ) ); + } + return new OntModelSpec( ModelFactory.createMemModelMaker(), null, reasonerFactory, profile ); } @Override diff --git a/src/ubic/basecode/ontology/jena/OntologyLoader.java b/src/ubic/basecode/ontology/jena/OntologyLoader.java index 97cb8f3b..a5381ef2 100644 --- a/src/ubic/basecode/ontology/jena/OntologyLoader.java +++ b/src/ubic/basecode/ontology/jena/OntologyLoader.java @@ -52,18 +52,18 @@ class OntologyLoader { private static final String OLD_CACHE_SUFFIX = ".old"; private static final String TMP_CACHE_SUFFIX = ".tmp"; - public static OntModel loadMemoryModel( InputStream is, String url ) throws JenaException { + public static OntModel loadMemoryModel( InputStream is, String url ) throws JenaException, IOException { return loadMemoryModel( is, url, true ); } /** * Load an ontology into memory. Use this type of model when fast access is critical and memory is available. */ - public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports ) throws JenaException { + public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports ) throws JenaException, IOException { return loadMemoryModel( is, url, processImports, OntModelSpec.OWL_MEM_TRANS_INF ); } - public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports, OntModelSpec spec ) throws JenaException { + public static OntModel loadMemoryModel( InputStream is, String url, boolean processImports, OntModelSpec spec ) throws JenaException, IOException { OntModel model = getMemoryModel( url, processImports, spec ); model.read( is, null ); return model; diff --git a/src/ubic/basecode/ontology/providers/AbstractBaseCodeOntologyService.java b/src/ubic/basecode/ontology/providers/AbstractBaseCodeOntologyService.java new file mode 100644 index 00000000..5c4b0c3b --- /dev/null +++ b/src/ubic/basecode/ontology/providers/AbstractBaseCodeOntologyService.java @@ -0,0 +1,48 @@ +package ubic.basecode.ontology.providers; + +import ubic.basecode.util.Configuration; + +import javax.annotation.Nullable; + +/** + * Base class for all ontologies built-in to the baseCode project. + *

+ * The ontologies that subclass this will honor settings in the {@code basecode.properties} file for loading and + * locating the ontology. + * + * @author poirigui + */ +public abstract class AbstractBaseCodeOntologyService extends AbstractOntologyService { + + private final String name; + private final String cacheName; + + /** + * Intentionally package-private constructor. + */ + AbstractBaseCodeOntologyService( String name, String cacheName ) { + this.name = name; + this.cacheName = cacheName; + } + + @Override + protected String getOntologyName() { + return name; + } + + @Override + protected String getOntologyUrl() { + return Configuration.getString( "url." + cacheName ); + } + + @Override + protected boolean isOntologyEnabled() { + return Boolean.TRUE.equals( Configuration.getBoolean( "load." + cacheName ) ); + } + + @Nullable + @Override + public String getCacheName() { + return cacheName; + } +} diff --git a/src/ubic/basecode/ontology/providers/AbstractOntologyService.java b/src/ubic/basecode/ontology/providers/AbstractOntologyService.java new file mode 100644 index 00000000..7549a348 --- /dev/null +++ b/src/ubic/basecode/ontology/providers/AbstractOntologyService.java @@ -0,0 +1,9 @@ +package ubic.basecode.ontology.providers; + +/** + * Base class for all ontology services. + *

+ * The actual implementation is provided by the {@code ubic.basecode.ontology.jena} package. + */ +public abstract class AbstractOntologyService extends ubic.basecode.ontology.jena.AbstractOntologyService { +} diff --git a/src/ubic/basecode/ontology/providers/CellLineOntologyService.java b/src/ubic/basecode/ontology/providers/CellLineOntologyService.java index 9c60ff24..e77be6a9 100644 --- a/src/ubic/basecode/ontology/providers/CellLineOntologyService.java +++ b/src/ubic/basecode/ontology/providers/CellLineOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,26 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * See http://www.obofoundry.org/cgi-bin/detail.cgi?id=CLO - * + * Cell Line Ontology + * * @author paul - * */ -public class CellLineOntologyService extends AbstractOntologyMemoryBackedService { - - private static final String ONTOLOGY_URL = "url.cellLineOntology"; - - @Override - protected String getOntologyName() { - return "cellLineOntology"; - } +public class CellLineOntologyService extends AbstractBaseCodeOntologyService { - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public CellLineOntologyService() { + super( "Cell Line Ontology", "cellLineOntology" ); } } diff --git a/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java b/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java index 6ebb09e8..bfbcf55b 100644 --- a/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java +++ b/src/ubic/basecode/ontology/providers/CellTypeOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,26 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * See http://www.obofoundry.org/cgi-bin/detail.cgi?id=cell - * + * Cell Ontology + * * @author paul - * */ -public class CellTypeOntologyService extends AbstractOntologyMemoryBackedService { - - private static final String ONTOLOGY_URL = "url.cellTypeOntology"; - - @Override - protected String getOntologyName() { - return "cellTypeOntology"; - } +public class CellTypeOntologyService extends AbstractBaseCodeOntologyService { - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public CellTypeOntologyService() { + super( "Cell Ontology", "cellTypeOntology" ); } -} +} \ No newline at end of file diff --git a/src/ubic/basecode/ontology/providers/ChebiOntologyService.java b/src/ubic/basecode/ontology/providers/ChebiOntologyService.java index d6659c6f..59b87ee2 100644 --- a/src/ubic/basecode/ontology/providers/ChebiOntologyService.java +++ b/src/ubic/basecode/ontology/providers/ChebiOntologyService.java @@ -1,8 +1,8 @@ /* * The basecode project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Loads the CHEBI Ontology at startup in its own thread. Controlled in build.properties by load.chebiOntology - * + * Chemical Entities of Biological Interest + * * @author klc - * */ -public class ChebiOntologyService extends AbstractOntologyMemoryBackedService { +public class ChebiOntologyService extends AbstractBaseCodeOntologyService { - private static final String CHEBI_ONTOLOGY_URL = "url.chebiOntology"; - - @Override - protected String getOntologyName() { - return "chebiOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( CHEBI_ONTOLOGY_URL ); + public ChebiOntologyService() { + super( "CHEBI", "chebiOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java b/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java index 7ade3f5b..11ec6808 100644 --- a/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java +++ b/src/ubic/basecode/ontology/providers/DiseaseOntologyService.java @@ -1,8 +1,8 @@ /* * The Gemma21 project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,26 +18,16 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** * Holds a copy of the Disease Ontology. - * + * * @author klc + * @deprecated use MONDO instead */ -public class DiseaseOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class DiseaseOntologyService extends AbstractBaseCodeOntologyService { - private static final String DISEASE_ONTOLOGY_URL = "url.diseaseOntology"; - - @Override - protected String getOntologyName() { - return "diseaseOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( DISEASE_ONTOLOGY_URL ); + public DiseaseOntologyService() { + super( "Disease Ontology", "diseaseOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java b/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java index cdde9564..3ebc37a0 100644 --- a/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java +++ b/src/ubic/basecode/ontology/providers/ExperimentalFactorOntologyService.java @@ -1,38 +1,34 @@ /* * The baseCode project - * + * * Copyright (c) 2012 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; import ubic.basecode.util.Configuration; /** + * Experimental Factor Ontology + * * @author Paul - * */ -public class ExperimentalFactorOntologyService extends AbstractOntologyMemoryBackedService { +public class ExperimentalFactorOntologyService extends AbstractBaseCodeOntologyService { - private static final String EF_ONTOLOGY_URL = "url.efOntology"; - - @Override - protected String getOntologyName() { - return "experimentalFactorOntology"; + public ExperimentalFactorOntologyService() { + super( "Experimental Factor Ontology", "experimentalFactorOntology" ); } @Override protected String getOntologyUrl() { - return Configuration.getString( EF_ONTOLOGY_URL ); + return Configuration.getString( "url.efOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/FMAOntologyService.java b/src/ubic/basecode/ontology/providers/FMAOntologyService.java index 1f80f650..b12b43d8 100644 --- a/src/ubic/basecode/ontology/providers/FMAOntologyService.java +++ b/src/ubic/basecode/ontology/providers/FMAOntologyService.java @@ -1,8 +1,8 @@ /* * The Gemma21 project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,16 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Holds a copy of the FMA Ontology on disk. This gets loaded on startup. - * + * Foundational Model of Anatomy Ontology (subset) + * * @author klc - * + * @deprecated this ontology is inactive, use UBERON instead */ -public class FMAOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class FMAOntologyService extends AbstractBaseCodeOntologyService { - private static final String FMA_ONTOLOGY_URL = "url.fmaOntology"; - - @Override - protected String getOntologyName() { - return "fmaOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( FMA_ONTOLOGY_URL ); + public FMAOntologyService() { + super( "Foundational Model of Anatomy Ontology (subset)", "fmaOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/GenericOntologyService.java b/src/ubic/basecode/ontology/providers/GenericOntologyService.java index 2747c235..d83762ee 100644 --- a/src/ubic/basecode/ontology/providers/GenericOntologyService.java +++ b/src/ubic/basecode/ontology/providers/GenericOntologyService.java @@ -14,31 +14,47 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; +import org.apache.commons.lang3.StringUtils; + +import javax.annotation.Nullable; /** - * A way to create ad hoc ontology services (in memory) for testing + * A way to create ad-hoc in-memory ontology services. * * @author Paul */ -public class GenericOntologyService extends AbstractOntologyMemoryBackedService { +public class GenericOntologyService extends AbstractOntologyService { private final String url; private final String name; - private final boolean cache; + @Nullable + private final String cacheName; + + public GenericOntologyService( String name, String url, @Nullable String cacheName ) { + this.name = name; + this.url = url; + this.cacheName = cacheName; + } public GenericOntologyService( String name, String url ) { - this( name, url, false ); + this( name, url, null ); } + /** + * @deprecated use {@link #GenericOntologyService(String, String, String)} with an explicit cache name instead + */ + @Deprecated public GenericOntologyService( String name, String url, boolean cache ) { - this( name, url, cache, true ); + this( name, url, cache ? StringUtils.deleteWhitespace( name ) : null ); } + /** + * @deprecated use {@link #GenericOntologyService(String, String, String)} with an explicit cache name instead and + * {@link #setProcessImports(boolean)} + */ + @Deprecated public GenericOntologyService( String name, String url, boolean cache, boolean processImports ) { - this.name = name; - this.url = url; - this.cache = cache; + this( name, url, cache ); setProcessImports( processImports ); } @@ -53,7 +69,13 @@ protected String getOntologyUrl() { } @Override + protected boolean isOntologyEnabled() { + return true; + } + + @Override + @Nullable protected String getCacheName() { - return this.cache ? this.name : null; + return cacheName; } } diff --git a/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java b/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java index 171a029f..8f69b197 100644 --- a/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java +++ b/src/ubic/basecode/ontology/providers/HumanDevelopmentOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,25 +19,16 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Human developmental anatomy, abstract + * * @author paul - * + * @deprecated this ontology was last updated in unmaintained since 2013 */ -public class HumanDevelopmentOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class HumanDevelopmentOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.humanDevelOntology"; - - @Override - protected String getOntologyName() { - return "humanDevelOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public HumanDevelopmentOntologyService() { + super( "Human Development Ontology", "humanDevelOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java b/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java index 2b7a201d..b8b24e65 100644 --- a/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java +++ b/src/ubic/basecode/ontology/providers/HumanPhenotypeOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2011 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * See http://bioportal.bioontology.org/ontologies/45774?p=terms&conceptid=HP%3A0001264 for example. - * + * Human Phenotype Ontology + * * @author paul - * */ -public class HumanPhenotypeOntologyService extends AbstractOntologyMemoryBackedService { +public class HumanPhenotypeOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.humanPhenotypeOntology"; - - @Override - protected String getOntologyName() { - return "humanPhenotypeOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public HumanPhenotypeOntologyService() { + super( "Human Phenotype Ontology", "humanPhenotypeOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java b/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java index ffa43d00..9acb5d6f 100644 --- a/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java +++ b/src/ubic/basecode/ontology/providers/MammalianPhenotypeOntologyService.java @@ -1,8 +1,8 @@ /* * The basecode project - * + * * Copyright (c) 2007-2019 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Mammalian Phenotype Ontology + * * @author klc - * */ -public class MammalianPhenotypeOntologyService extends AbstractOntologyMemoryBackedService { - - private static final String ONTOLOGY_URL = "url.mammalPhenotypeOntology"; +public class MammalianPhenotypeOntologyService extends AbstractBaseCodeOntologyService { - @Override - protected String getOntologyName() { - - return "mammalPhenotypeOntology"; + public MammalianPhenotypeOntologyService() { + super( "Mammalian Phenotype Ontology", "mammalPhenotypeOntology" ); } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); - - } - } diff --git a/src/ubic/basecode/ontology/providers/MedicOntologyService.java b/src/ubic/basecode/ontology/providers/MedicOntologyService.java index 96d56e49..8a4d9820 100644 --- a/src/ubic/basecode/ontology/providers/MedicOntologyService.java +++ b/src/ubic/basecode/ontology/providers/MedicOntologyService.java @@ -18,7 +18,6 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; import ubic.basecode.ontology.model.OntologyModel; import java.io.IOException; @@ -29,21 +28,21 @@ * MEDIC ONTOLOGY USED BY PHENOCARTA, its represents MESH terms as a tree so with can use the parent structure that a * normal mesh term doesnt have *

- * MEDIC comes from the CTD folks. See http://ctd.mdibl.org/voc.go?type=disease. Unfortunately I do not know where our + * MEDIC comes from the CTD folks. See .... Unfortunately I do not know where our * medic.owl file came from (PP) * * @author Nicolas */ -public class MedicOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class MedicOntologyService extends AbstractBaseCodeOntologyService { /** * FIXME this shouldn't be hard-coded like this, we should load it like any other ontology service. */ private static final String MEDIC_ONTOLOGY_FILE = "/data/loader/ontology/medic.owl.gz"; - @Override - protected String getOntologyName() { - return "medicOntology"; + public MedicOntologyService() { + super( "Medic Ontology", "medicOntology" ); } @Override diff --git a/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java b/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java index 2dcf1826..cbcc54f0 100644 --- a/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java +++ b/src/ubic/basecode/ontology/providers/MouseDevelopmentOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,25 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Mouse Developmental Anatomy Ontology + * * @author paul - * */ -public class MouseDevelopmentOntologyService extends AbstractOntologyMemoryBackedService { +public class MouseDevelopmentOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.mouseDevelOntology"; - - @Override - protected String getOntologyName() { - return "mouseDevelOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public MouseDevelopmentOntologyService() { + super( "Mouse Development Anatomy Ontology", "mouseDevelOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java b/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java index ad2ec55a..1b133b10 100644 --- a/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java +++ b/src/ubic/basecode/ontology/providers/NIFSTDOntologyService.java @@ -14,7 +14,6 @@ */ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; import ubic.basecode.ontology.model.OntologyModel; import java.io.IOException; @@ -24,19 +23,16 @@ /** * @author paul */ -public class NIFSTDOntologyService extends AbstractOntologyMemoryBackedService { +@Deprecated +public class NIFSTDOntologyService extends AbstractBaseCodeOntologyService { private static final String NIFSTD_ONTOLOGY_FILE = "/data/loader/ontology/nif-gemma.owl.gz"; public NIFSTDOntologyService() { + super( "NISFTD", "nisftdOntology" ); setProcessImports( false ); } - @Override - protected String getOntologyName() { - return "nifstdOntology"; - } - @Override protected String getOntologyUrl() { return "classpath:" + NIFSTD_ONTOLOGY_FILE; diff --git a/src/ubic/basecode/ontology/providers/ObiService.java b/src/ubic/basecode/ontology/providers/ObiService.java index f8e4ce9f..910b9ed4 100644 --- a/src/ubic/basecode/ontology/providers/ObiService.java +++ b/src/ubic/basecode/ontology/providers/ObiService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2010 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,25 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** + * Ontology for Biomedical Investigations + * * @author paul - * */ -public class ObiService extends AbstractOntologyMemoryBackedService { +public class ObiService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.obiOntology"; - - @Override - protected String getOntologyName() { - return "obiOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public ObiService() { + super( "Ontology for Biomedical Investigations", "obiOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/OntologyService.java b/src/ubic/basecode/ontology/providers/OntologyService.java index 7dbe4386..8cc3ccd1 100644 --- a/src/ubic/basecode/ontology/providers/OntologyService.java +++ b/src/ubic/basecode/ontology/providers/OntologyService.java @@ -117,9 +117,9 @@ enum InferenceMode { *

* Search is enabled by default. * - * @see #findTerm(String, boolean) - * @see #findIndividuals(String, boolean) - * @see #findResources(String, boolean) + * @see #findTerm(String, int, boolean) + * @see #findIndividuals(String, int, boolean) + * @see #findResources(String, int, boolean) */ boolean isSearchEnabled(); @@ -302,7 +302,6 @@ default Set getChildren( Collection terms, boolean d */ Set getChildren( Collection terms, boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ); - /** * Check if this ontology is enabled. */ diff --git a/src/ubic/basecode/ontology/providers/SequenceOntologyService.java b/src/ubic/basecode/ontology/providers/SequenceOntologyService.java index 6aa71d33..55b707b1 100644 --- a/src/ubic/basecode/ontology/providers/SequenceOntologyService.java +++ b/src/ubic/basecode/ontology/providers/SequenceOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2013 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Support for the Sequence Ontology - * + * Sequence types and features ontology + * * @author Paul - * */ -public class SequenceOntologyService extends AbstractOntologyMemoryBackedService { +public class SequenceOntologyService extends AbstractBaseCodeOntologyService { - private static final String SO_ONTOLOGY_URL = "url.seqOntology"; - - @Override - protected String getOntologyName() { - return "seqOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( SO_ONTOLOGY_URL ); + public SequenceOntologyService() { + super( "Sequence types and features ontology", "seqOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/UberonOntologyService.java b/src/ubic/basecode/ontology/providers/UberonOntologyService.java index 51967a98..d9e2c6e1 100644 --- a/src/ubic/basecode/ontology/providers/UberonOntologyService.java +++ b/src/ubic/basecode/ontology/providers/UberonOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2015 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Uberon - * + * Uberon multi-species anatomy ontology + * * @author paul - * */ -public class UberonOntologyService extends AbstractOntologyMemoryBackedService { +public class UberonOntologyService extends AbstractBaseCodeOntologyService { - private static final String ONTOLOGY_URL = "url.uberonOntology"; - - @Override - protected String getOntologyName() { - return "uberonOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( ONTOLOGY_URL ); + public UberonOntologyService() { + super( "Uberon multi-species anatomy ontology", "uberonOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/UnitsOntologyService.java b/src/ubic/basecode/ontology/providers/UnitsOntologyService.java index ed1df01c..1670b011 100644 --- a/src/ubic/basecode/ontology/providers/UnitsOntologyService.java +++ b/src/ubic/basecode/ontology/providers/UnitsOntologyService.java @@ -1,8 +1,8 @@ /* * The baseCode project - * + * * Copyright (c) 2013 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -19,27 +19,14 @@ package ubic.basecode.ontology.providers; -import ubic.basecode.ontology.jena.AbstractOntologyMemoryBackedService; -import ubic.basecode.util.Configuration; - /** - * Support for the units ontology - * + * Units of measurement ontology + * * @author Paul - * */ -public class UnitsOntologyService extends AbstractOntologyMemoryBackedService { +public class UnitsOntologyService extends AbstractBaseCodeOntologyService { - private static final String UNITs_ONTOLOGY_URL = "url.unitsOntology"; - - @Override - protected String getOntologyName() { - return "unitsOntology"; - } - - @Override - protected String getOntologyUrl() { - return Configuration.getString( UNITs_ONTOLOGY_URL ); + public UnitsOntologyService() { + super( "Units of measurement ontology", "unitsOntology" ); } - } diff --git a/src/ubic/basecode/ontology/providers/package-info.java b/src/ubic/basecode/ontology/providers/package-info.java index 46d274b3..c3735344 100644 --- a/src/ubic/basecode/ontology/providers/package-info.java +++ b/src/ubic/basecode/ontology/providers/package-info.java @@ -1,5 +1,11 @@ /** + * This package contains baseCode built-in ontologies and a {@link ubic.basecode.ontology.providers.GenericOntologyService} + * to implement your own ontologies. * + * @author klc + * @author nicolas + * @author paul + * @author poirigui */ @ParametersAreNonnullByDefault package ubic.basecode.ontology.providers; diff --git a/test/ubic/basecode/ontology/jena/OntologySearchTest.java b/test/ubic/basecode/ontology/jena/OntologySearchTest.java index 4152b1e1..ed0d0631 100644 --- a/test/ubic/basecode/ontology/jena/OntologySearchTest.java +++ b/test/ubic/basecode/ontology/jena/OntologySearchTest.java @@ -67,9 +67,22 @@ public void testStemming() throws Exception { HashSet indexableProperties = new HashSet<>( OntologyIndexer.DEFAULT_INDEXABLE_PROPERTIES ); indexableProperties.add( new OntologyIndexer.IndexableProperty( RDFS.comment, true ) ); SearchIndex index = OntologyIndexer.indexOntology( "MGEDTEST", model, indexableProperties, Collections.emptySet(), true ); + + // bedding is stemmed to bed Set results = index.searchClasses( model, "bed", 500 ).toSet(); Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); + + // plural query + results = index.searchClasses( model, "beddings", 500 ).toSet(); + Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) + .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); + + // plural query + results = index.searchClasses( model, "beds", 500 ).toSet(); + Assertions.assertThat( results ).extracting( sr -> sr.result.as( OntClass.class ).getURI() ) + .containsExactly( "http://mged.sourceforge.net/ontologies/MGEDOntology.owl#Bedding" ); + index.close(); } From a08c6087a291d7c61154907b36321ecf4d354954 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 26 Apr 2024 12:46:52 -0700 Subject: [PATCH 09/19] Ensure that all mentioned configurations in the code are in basecode.properties --- src/basecode.properties | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/basecode.properties b/src/basecode.properties index 2610a9b1..61406ce7 100644 --- a/src/basecode.properties +++ b/src/basecode.properties @@ -37,4 +37,7 @@ url.fmaOntology=http://purl.obolibrary.org/obo/fma.owl ontology.index.dir= ontology.cache.dir= -ncbo.api.key= \ No newline at end of file +ncbo.api.key= + +rserve.start.command= +rlibpath= \ No newline at end of file From 045f306a9b3a223eff1eb00286540b07216a77ad Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 29 Apr 2024 11:40:53 -0700 Subject: [PATCH 10/19] Include sub-properties in the inference Ensure that sub-properties are included when matching terms via additional restrictions. Not all ontologies declare relevant sub-properties, so we enumerate the most common one from RO. --- .../jena/AbstractOntologyService.java | 40 ++++++++++++++----- src/ubic/basecode/ontology/jena/IAO.java | 9 +++++ .../basecode/ontology/jena/JenaUtils.java | 30 +++++++++++--- src/ubic/basecode/ontology/jena/OBO.java | 1 - .../ontology/jena/OntologyIndexer.java | 9 ++--- src/ubic/basecode/ontology/jena/RO.java | 28 ++++++++++++- .../jena/RestrictionWithOnPropertyFilter.java | 4 +- .../basecode/ontology/jena/package-info.java | 2 +- .../providers/GenericOntologyServiceTest.java | 7 ++-- .../providers/UberonOntologyServiceTest.java | 18 ++++----- 10 files changed, 110 insertions(+), 38 deletions(-) create mode 100644 src/ubic/basecode/ontology/jena/IAO.java diff --git a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java index 778eb014..ef8e1d84 100644 --- a/src/ubic/basecode/ontology/jena/AbstractOntologyService.java +++ b/src/ubic/basecode/ontology/jena/AbstractOntologyService.java @@ -20,7 +20,10 @@ package ubic.basecode.ontology.jena; import com.hp.hpl.jena.ontology.*; -import com.hp.hpl.jena.rdf.model.*; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.NodeIterator; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdfxml.xmlinput.ARPErrorNumbers; import com.hp.hpl.jena.rdfxml.xmlinput.ParseException; import com.hp.hpl.jena.reasoner.ReasonerFactory; @@ -67,12 +70,31 @@ public abstract class AbstractOntologyService implements OntologyService { /** * Properties through which propagation is allowed for {@link #getParents(Collection, boolean, boolean)}} */ - private static final Set DEFAULT_ADDITIONAL_PROPERTIES; + private static final Set DEFAULT_ADDITIONAL_PROPERTIES; static { DEFAULT_ADDITIONAL_PROPERTIES = new HashSet<>(); - DEFAULT_ADDITIONAL_PROPERTIES.add( RO.partOf.getURI() ); - DEFAULT_ADDITIONAL_PROPERTIES.add( RO.properPartOf.getURI() ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.partOf ); + // all those are sub-properties of partOf, but some ontologies might not have them + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.activeIngredientIn ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.boundingLayerOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.branchingPartOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.determinedBy ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.ends ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.isSubsequenceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.isEndSequenceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.isStartSequenceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.lumenOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.luminalSpaceOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.mainStemOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.memberOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.occurrentPartOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.skeletonOf ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.starts ); + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.subclusterOf ); + // used by some older ontologies + //noinspection deprecation + DEFAULT_ADDITIONAL_PROPERTIES.add( RO.properPartOf ); } /** @@ -87,7 +109,7 @@ public abstract class AbstractOntologyService implements OntologyService { private boolean processImports = true; private boolean searchEnabled = true; private Set excludedWordsFromStemming = Collections.emptySet(); - private Set additionalPropertyUris = DEFAULT_ADDITIONAL_PROPERTIES; + private Set additionalPropertyUris = DEFAULT_ADDITIONAL_PROPERTIES.stream().map( Property::getURI ).collect( Collectors.toSet() ); @Override public String getName() { @@ -183,8 +205,6 @@ private synchronized void initialize( @Nullable InputStream stream, boolean forc String ontologyUrl = getOntologyUrl(); String ontologyName = getOntologyName(); String cacheName = getCacheName(); - Set additionalProperties = this.additionalPropertyUris.stream() - .map( ResourceFactory::createProperty ).collect( Collectors.toSet() ); LanguageLevel languageLevel = this.languageLevel; InferenceMode inferenceMode = this.inferenceMode; boolean processImports = this.processImports; @@ -242,9 +262,9 @@ private synchronized void initialize( @Nullable InputStream stream, boolean forc return; // compute additional restrictions - Set additionalRestrictions = model.listRestrictions() - .filterKeep( new RestrictionWithOnPropertyFilter( additionalProperties ) ) - .toSet(); + Set additionalProperties = additionalPropertyUris.stream().map( model::getProperty ).collect( Collectors.toSet() ); + Set additionalRestrictions = JenaUtils.listRestrictionsOnProperties( model, additionalProperties, true ).toSet(); + // indexing is lengthy, don't bother if we're interrupted if ( Thread.currentThread().isInterrupted() ) diff --git a/src/ubic/basecode/ontology/jena/IAO.java b/src/ubic/basecode/ontology/jena/IAO.java new file mode 100644 index 00000000..8053c3b7 --- /dev/null +++ b/src/ubic/basecode/ontology/jena/IAO.java @@ -0,0 +1,9 @@ +package ubic.basecode.ontology.jena; + +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.ResourceFactory; + +public class IAO { + + public static final Property alternativeLabel = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/IAO_0000118" ); +} diff --git a/src/ubic/basecode/ontology/jena/JenaUtils.java b/src/ubic/basecode/ontology/jena/JenaUtils.java index b7d96758..8f9647fa 100644 --- a/src/ubic/basecode/ontology/jena/JenaUtils.java +++ b/src/ubic/basecode/ontology/jena/JenaUtils.java @@ -1,9 +1,6 @@ package ubic.basecode.ontology.jena; -import com.hp.hpl.jena.ontology.ConversionException; -import com.hp.hpl.jena.ontology.OntClass; -import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.ontology.Restriction; +import com.hp.hpl.jena.ontology.*; import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.util.iterator.Filter; @@ -193,7 +190,6 @@ public static Resource getRestrictionValue( Restriction r ) { } } - /** * Use to pretty-print a RDFNode */ @@ -232,4 +228,28 @@ public static Optional as( RDFNode resource, Class cla return Optional.empty(); } } + + /** + * List all restrictions in the given model on any of the given properties. + */ + public static ExtendedIterator listRestrictionsOnProperties( OntModel model, Set props, boolean includeSubProperties ) { + if ( includeSubProperties ) { + Set allProps = new HashSet<>( props ); + for ( Property p : props ) { + Property property = p.inModel( model ); + // include sub-properties for inference + if ( property.canAs( OntProperty.class ) ) { + OntProperty op = property.as( OntProperty.class ); + ExtendedIterator it = op.listSubProperties( false ); + while ( it.hasNext() ) { + OntProperty sp = it.next(); + allProps.add( sp ); + log.info( "Inferred {} from {}", sp, property ); + } + } + } + props = allProps; + } + return model.listRestrictions().filterKeep( new RestrictionWithOnPropertyFilter( props ) ); + } } diff --git a/src/ubic/basecode/ontology/jena/OBO.java b/src/ubic/basecode/ontology/jena/OBO.java index 1be6bb71..0b596727 100644 --- a/src/ubic/basecode/ontology/jena/OBO.java +++ b/src/ubic/basecode/ontology/jena/OBO.java @@ -19,7 +19,6 @@ private static Property property( String name ) { public static final Property hasBroadSynonym = property( "hasBroadSynonm" ); public static final Property hasNarrowSynonym = property( "hasNarrowSynonym" ); public static final Property hasRelatedSynonym = property( "hasRelatedSynonym" ); - public static final Property alternativeLabel = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/IAO_0000118" ); public static final Resource ObsoleteClass = ResourceFactory.createResource( "http://www.geneontology.org/formats/oboInOwl#ObsoleteClass" ); public static final Property ObsoleteProperty = property( "ObsoleteProperty" ); } diff --git a/src/ubic/basecode/ontology/jena/OntologyIndexer.java b/src/ubic/basecode/ontology/jena/OntologyIndexer.java index 9eadb5af..f5b11e9f 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndexer.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndexer.java @@ -69,7 +69,7 @@ class OntologyIndexer { private static final Logger log = LoggerFactory.getLogger( OntologyIndexer.class ); /** - * THose are build-in fields that are always indexed. + * Those are build-in fields that are always indexed. */ private static final String ID_FIELD = "_ID", @@ -107,7 +107,7 @@ public boolean isAnalyzed() { DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasBroadSynonym, true ) ); DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasNarrowSynonym, true ) ); DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.hasRelatedSynonym, true ) ); - DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( OBO.alternativeLabel, true ) ); + DEFAULT_INDEXABLE_PROPERTIES.add( new IndexableProperty( IAO.alternativeLabel, true ) ); } /** @@ -125,7 +125,7 @@ public static SearchIndex getSubjectIndex( String name, Set excludedFrom */ @Nullable public static SearchIndex getSubjectIndex( String name, Collection indexableProperties, Set excludedFromStemming ) { - log.debug( "Loading index: {}", name ); + log.debug( "Loading index for {}...", name ); try { // we do not put this in the try-with-open because we want these to *stay* open FSDirectory directory = FSDirectory.open( getIndexPath( name ).toFile() ); @@ -136,10 +136,9 @@ public static SearchIndex getSubjectIndex( String name, Collection { - private final Set properties; + private final Set properties; - public RestrictionWithOnPropertyFilter( Set properties ) { + public RestrictionWithOnPropertyFilter( Set properties ) { this.properties = properties; } diff --git a/src/ubic/basecode/ontology/jena/package-info.java b/src/ubic/basecode/ontology/jena/package-info.java index 5d1211e8..a10a5631 100644 --- a/src/ubic/basecode/ontology/jena/package-info.java +++ b/src/ubic/basecode/ontology/jena/package-info.java @@ -1,5 +1,5 @@ /** - * + * Implementation of {@link ubic.basecode.ontology.providers.OntologyService} using Apache Jena. */ @ParametersAreNonnullByDefault package ubic.basecode.ontology.jena; diff --git a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java index 2b9f51ae..a74cb90a 100644 --- a/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/GenericOntologyServiceTest.java @@ -19,6 +19,7 @@ package ubic.basecode.ontology.providers; +import org.assertj.core.api.Assertions; import org.junit.Test; import ubic.basecode.ontology.AbstractOntologyTest; import ubic.basecode.ontology.model.OntologyTerm; @@ -80,10 +81,10 @@ public void testWithoutOntologyCacheDir() { new GenericOntologyService( "foo", resource.toString(), false, false ) .initialize( true, true ); } ); - assertTrue( e.getMessage().matches( "No cache directory is set for foo \\[file:.+], cannot force indexing." ) ); + Assertions.assertThat( e ) + .hasMessageMatching( "No cache directory is set for foo.+, cannot force indexing\\." ); } finally { - Configuration.setString( "ontology.cache.dir", prevCacheDir ); - Configuration.setString( "ontology.index.dir", prevIndexDir ); + Configuration.reset(); } } } diff --git a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java index ee415186..ad4d3810 100644 --- a/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java +++ b/test/ubic/basecode/ontology/providers/UberonOntologyServiceTest.java @@ -57,7 +57,7 @@ public void testGetParentsFromMultipleTerms() { OntologyTerm brain = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); OntologyTerm liver = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0002107" ); Collection children = uberon.getParents( Arrays.asList( brain, liver ), false, true ); - assertEquals( 30, children.size() ); + assertEquals( 41, children.size() ); assertFalse( children.contains( uberon.getTerm( OWL2.Nothing.getURI() ) ) ); } @@ -66,7 +66,7 @@ public void testGetParentsHasPart() { OntologyTerm t = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( t ); Collection parents = t.getParents( true ); - assertEquals( 3, parents.size() ); + assertEquals( 4, parents.size() ); // does not contain itself assertFalse( parents.contains( t ) ); // via subclass @@ -81,9 +81,9 @@ public void testGetParentsHasPart() { public void testGetChildrenHasPart() { OntologyTerm t = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); assertNotNull( t ); - assertEquals( 76, t.getChildren( true ).size() ); + assertEquals( 81, t.getChildren( true ).size() ); Collection children = t.getChildren( false ); - assertEquals( 1496, children.size() ); + assertEquals( 1995, children.size() ); // via subclass of, insect adult brain assertTrue( children.contains( uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_6003624" ) ) ); // via part of, nucleus of brain @@ -97,22 +97,22 @@ public void testGetChildrenFromMultipleTerms() { OntologyTerm brain = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0000955" ); OntologyTerm liver = uberon.getTerm( "http://purl.obolibrary.org/obo/UBERON_0002107" ); Collection children = uberon.getChildren( Arrays.asList( brain, liver ), false, true ); - assertEquals( 1562, children.size() ); + assertEquals( 2077, children.size() ); } @Test public void testGetChildrenFromMultipleTermsWithSearch() throws OntologySearchException { Collection> terms = uberon.findTerm( "brain", 500 ); Collection matches = uberon.getChildren( terms.stream().map( OntologySearchResult::getResult ).collect( Collectors.toSet() ), false, true ); - assertEquals( 1870, matches.size() ); + assertEquals( 2684, matches.size() ); } @Test public void testFindTerm() throws OntologySearchException { - assertEquals( 123, uberon.findTerm( "brain", 500 ).size() ); - assertEquals( 128, uberon.findTerm( "brain", 500, true ).size() ); + assertEquals( 98, uberon.findTerm( "brain", 500 ).size() ); + assertEquals( 103, uberon.findTerm( "brain", 500, true ).size() ); OntologySearchResult firstResult = uberon.findTerm( "brain", 500 ).iterator().next(); assertNotNull( firstResult ); - assertEquals( 2.8577, firstResult.getScore(), 0.0001 ); + assertEquals( 1.5367, firstResult.getScore(), 0.0001 ); } } From ce3840689028f55482d0dce34579bdc35727bec2 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 29 Apr 2024 14:21:02 -0700 Subject: [PATCH 11/19] ontology: Use debug logs for indexer and loader logs --- src/ubic/basecode/ontology/jena/OntologyIndexer.java | 6 +++--- src/ubic/basecode/ontology/jena/OntologyLoader.java | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ubic/basecode/ontology/jena/OntologyIndexer.java b/src/ubic/basecode/ontology/jena/OntologyIndexer.java index f5b11e9f..5d195efb 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndexer.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndexer.java @@ -163,7 +163,7 @@ public static SearchIndex indexOntology( String name, OntModel model, Collection log.warn( "Index not found, or there was an error, re-indexing {}...", name ); return index( name, model, indexableProperties, excludedFromStemming ); } - log.info( "A valid index for {} already exists, using", name ); + log.debug( "A valid index for {} already exists, using", name ); return index; } @@ -191,7 +191,7 @@ private static SearchIndex index( String name, OntModel model, Collection indexableProperties ) throws IOException { StopWatch timer = StopWatch.createStarted(); FSDirectory dir = FSDirectory.open( indexDir.toFile() ); - log.info( "Indexing {} to: {}...", name, indexDir ); + log.debug( "Indexing {} to: {}...", name, indexDir ); IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_36, analyzer ); try ( IndexWriter indexWriter = new IndexWriter( dir, config ) ) { indexWriter.deleteAll(); // start with clean slate. @@ -228,7 +228,7 @@ private static Directory index( String name, OntModel model, Analyzer analyzer, indexWriter.addDocument( doc ); } indexWriter.commit(); - log.info( "Done indexing {} subjects of {} in {} s.", indexWriter.numDocs(), name, String.format( "%.2f", timer.getTime() / 1000.0 ) ); + log.debug( "Done indexing {} subjects of {} in {} s.", indexWriter.numDocs(), name, String.format( "%.2f", timer.getTime() / 1000.0 ) ); } return dir; } diff --git a/src/ubic/basecode/ontology/jena/OntologyLoader.java b/src/ubic/basecode/ontology/jena/OntologyLoader.java index a5381ef2..f31278b4 100644 --- a/src/ubic/basecode/ontology/jena/OntologyLoader.java +++ b/src/ubic/basecode/ontology/jena/OntologyLoader.java @@ -129,7 +129,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, // the ontology. FileUtils.createParentDirectories( oldFile ); Files.copy( f.toPath(), oldFile.toPath(), StandardCopyOption.REPLACE_EXISTING ); - log.info( "Load model from disk took {} ms", timer.getTime() ); + log.debug( "Load model from disk took {} ms", timer.getTime() ); } } else { throw new RuntimeException( @@ -138,7 +138,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } else if ( tempFile.exists() ) { // Model was successfully loaded into memory from URL with given cacheName // Save cache to disk (rename temp file) - log.info( "Caching ontology to disk: {} under {}", cacheName, f.getAbsolutePath() ); + log.debug( "Caching ontology to disk: {} under {}", cacheName, f.getAbsolutePath() ); try { // Need to compare previous to current so instead of overwriting we'll move the old file if ( f.exists() ) { @@ -154,7 +154,7 @@ public static OntModel loadMemoryModel( String url, @Nullable String cacheName, } } - log.info( "Loading ontology model for {} took {} ms", url, timer.getTime() ); + log.debug( "Loading ontology model for {} took {} ms", url, timer.getTime() ); return model; } From 5e6a22b6b1c5bf94e7d4b422ff9ceea092fa66db Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 29 Apr 2024 16:05:32 -0700 Subject: [PATCH 12/19] Better handle indexing of literals and URI resources Parse numeric literals and ignore blank nodes. --- .../basecode/ontology/jena/JenaUtils.java | 1 - .../ontology/jena/OntologyIndexer.java | 51 ++++++++++++++++--- .../ontology/providers/ObiServiceTest.java | 34 ++++++------- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/src/ubic/basecode/ontology/jena/JenaUtils.java b/src/ubic/basecode/ontology/jena/JenaUtils.java index 8f9647fa..ba639f3b 100644 --- a/src/ubic/basecode/ontology/jena/JenaUtils.java +++ b/src/ubic/basecode/ontology/jena/JenaUtils.java @@ -244,7 +244,6 @@ public static ExtendedIterator listRestrictionsOnProperties( OntMod while ( it.hasNext() ) { OntProperty sp = it.next(); allProps.add( sp ); - log.info( "Inferred {} from {}", sp, property ); } } } diff --git a/src/ubic/basecode/ontology/jena/OntologyIndexer.java b/src/ubic/basecode/ontology/jena/OntologyIndexer.java index 5d195efb..a215a6f5 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndexer.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndexer.java @@ -18,12 +18,10 @@ */ package ubic.basecode.ontology.jena; +import com.hp.hpl.jena.datatypes.xsd.XSDDateTime; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntResource; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.rdf.model.Statement; -import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.rdf.model.*; import com.hp.hpl.jena.shared.JenaException; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.util.iterator.WrappedIterator; @@ -35,6 +33,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -221,8 +220,46 @@ private static Directory index( String name, OntModel model, Analyzer analyzer, while ( listStatements.hasNext() ) { Statement s = listStatements.next(); String field = s.getPredicate().getURI(); - String value = JenaUtils.asString( s.getObject() ); - doc.add( new Field( field, value, Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ) ); + Fieldable f; + if ( s.getObject().isLiteral() ) { + Literal l = s.getObject().asLiteral(); + if ( l.getValue() instanceof String ) { + f = new Field( field, l.getString(), Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ); + } else if ( l.getValue() instanceof Number ) { + NumericField nf = new NumericField( field ); + if ( l.getValue() instanceof Integer ) { + nf.setIntValue( s.getInt() ); + } else if ( l.getValue() instanceof Long ) { + nf.setLongValue( s.getLong() ); + } else if ( l.getValue() instanceof Float ) { + nf.setFloatValue( s.getFloat() ); + } else if ( l.getValue() instanceof Double ) { + nf.setDoubleValue( s.getDouble() ); + } else { + log.warn( "Skipping numeric literal of unsupported type: {}", l ); + continue; + } + f = nf; + } else if ( l.getValue() instanceof XSDDateTime ) { + f = new NumericField( field ) + .setLongValue( ( ( XSDDateTime ) l.getValue() ).asCalendar().getTime().getTime() ); + } else if ( l.getValue() instanceof Boolean ) { + f = new NumericField( field ).setIntValue( Boolean.TRUE.equals( l.getValue() ) ? 1 : 0 ); + } else { + log.warn( "Skipping literal of unsupported type: {}", l ); + continue; + } + } else if ( s.getObject().isURIResource() ) { + // index the URI + f = new Field( field, s.getObject().asResource().getURI(), Field.Store.NO, Field.Index.NOT_ANALYZED ); + } else { + // could be a blank node + continue; + } + if ( isIndividual ) { + System.out.println( doc ); + } + doc.add( f ); } } indexWriter.addDocument( doc ); @@ -278,7 +315,7 @@ private ExtendedIterator search( OntModel model, String queryS Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString ); // in general, results are found in both regular and std index, so we divide by 2 the initial capacity // we also have to double the number of hits to account for duplicates - TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 3 ); + TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 2 ); Set seenIds = new HashSet<>( hits.totalHits / 2 ); List resources = new ArrayList<>( hits.totalHits / 2 ); for ( int i = 0; i < hits.scoreDocs.length; i++ ) { diff --git a/test/ubic/basecode/ontology/providers/ObiServiceTest.java b/test/ubic/basecode/ontology/providers/ObiServiceTest.java index 032df53e..b8aeb354 100644 --- a/test/ubic/basecode/ontology/providers/ObiServiceTest.java +++ b/test/ubic/basecode/ontology/providers/ObiServiceTest.java @@ -16,15 +16,10 @@ import org.junit.Test; import ubic.basecode.ontology.AbstractOntologyTest; -import ubic.basecode.ontology.model.OntologyIndividual; import ubic.basecode.ontology.model.OntologyResource; -import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.search.OntologySearchResult; -import java.util.Collection; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.assertj.core.api.Assertions.assertThat; /** * @author paul @@ -36,16 +31,21 @@ public void testLoadAndSearch() throws Exception { ObiService m = new ObiService(); m.setInferenceMode( OntologyService.InferenceMode.NONE ); m.initialize( true, false ); - - assertTrue( m.isOntologyLoaded() ); - - Collection> hits = m.findTerm( "batch", 500 ); - assertFalse( hits.isEmpty() ); - - Collection> ihits = m.findIndividuals( "batch", 500 ); - assertFalse( ihits.isEmpty() ); - - Collection> rhits = m.findResources( "batch", 500 ); - assertFalse( rhits.isEmpty() ); + assertThat( m.isOntologyLoaded() ).isTrue(); + + assertThat( m.findTerm( "batch", 500 ) ) + .extracting( OntologySearchResult::getResult ) + .extracting( OntologyResource::getUri ) + .contains( "http://purl.obolibrary.org/obo/IAO_0000132" ); + + assertThat( m.findIndividuals( "failed exploratory term", 500 ) ) + .extracting( OntologySearchResult::getResult ) + .extracting( OntologyResource::getUri ) + .contains( "http://purl.obolibrary.org/obo/IAO_0000103" ); + + assertThat( m.findResources( "batch", 500 ) ) + .extracting( OntologySearchResult::getResult ) + .extracting( OntologyResource::getUri ) + .contains( "http://purl.obolibrary.org/obo/IAO_0000132" ); } } From 1e7b56889fa3e820ab45e120f5b28966f4da2a00 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 30 Apr 2024 00:37:54 -0700 Subject: [PATCH 13/19] Handle invalid datatypes in literals --- .../ontology/jena/OntologyIndexer.java | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/ubic/basecode/ontology/jena/OntologyIndexer.java b/src/ubic/basecode/ontology/jena/OntologyIndexer.java index a215a6f5..51f56472 100644 --- a/src/ubic/basecode/ontology/jena/OntologyIndexer.java +++ b/src/ubic/basecode/ontology/jena/OntologyIndexer.java @@ -18,6 +18,7 @@ */ package ubic.basecode.ontology.jena; +import com.hp.hpl.jena.datatypes.DatatypeFormatException; import com.hp.hpl.jena.datatypes.xsd.XSDDateTime; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntResource; @@ -223,28 +224,35 @@ private static Directory index( String name, OntModel model, Analyzer analyzer, Fieldable f; if ( s.getObject().isLiteral() ) { Literal l = s.getObject().asLiteral(); - if ( l.getValue() instanceof String ) { - f = new Field( field, l.getString(), Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ); - } else if ( l.getValue() instanceof Number ) { + Object v; + try { + v = l.getValue(); + } catch ( DatatypeFormatException e ) { + log.warn( "Invalid datatype for literal: {}", l, e ); + continue; + } + if ( v instanceof String ) { + f = new Field( field, ( String ) v, Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ); + } else if ( v instanceof Number ) { NumericField nf = new NumericField( field ); - if ( l.getValue() instanceof Integer ) { - nf.setIntValue( s.getInt() ); - } else if ( l.getValue() instanceof Long ) { - nf.setLongValue( s.getLong() ); - } else if ( l.getValue() instanceof Float ) { - nf.setFloatValue( s.getFloat() ); - } else if ( l.getValue() instanceof Double ) { - nf.setDoubleValue( s.getDouble() ); + if ( v instanceof Integer ) { + nf.setIntValue( ( Integer ) v ); + } else if ( v instanceof Long ) { + nf.setLongValue( ( Long ) v ); + } else if ( v instanceof Float ) { + nf.setFloatValue( ( Float ) v ); + } else if ( v instanceof Double ) { + nf.setDoubleValue( ( Double ) v ); } else { log.warn( "Skipping numeric literal of unsupported type: {}", l ); continue; } f = nf; - } else if ( l.getValue() instanceof XSDDateTime ) { + } else if ( v instanceof XSDDateTime ) { f = new NumericField( field ) - .setLongValue( ( ( XSDDateTime ) l.getValue() ).asCalendar().getTime().getTime() ); - } else if ( l.getValue() instanceof Boolean ) { - f = new NumericField( field ).setIntValue( Boolean.TRUE.equals( l.getValue() ) ? 1 : 0 ); + .setLongValue( ( ( XSDDateTime ) v ).asCalendar().getTime().getTime() ); + } else if ( v instanceof Boolean ) { + f = new NumericField( field ).setIntValue( Boolean.TRUE.equals( v ) ? 1 : 0 ); } else { log.warn( "Skipping literal of unsupported type: {}", l ); continue; From 4cbdc4b4d29f5db81aedf1855e08c86a0d85a6ee Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 30 Apr 2024 09:49:38 -0700 Subject: [PATCH 14/19] Update log4j to 2.23.1 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index d319b159..986072c1 100644 --- a/pom.xml +++ b/pom.xml @@ -234,13 +234,13 @@ org.apache.logging.log4j log4j-core - 2.21.1 + 2.23.1 test org.apache.logging.log4j log4j-slf4j-impl - 2.21.1 + 2.23.1 test From 21ff5de4ca147ba5db95d2e46084443b9392ea7c Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 30 Apr 2024 10:05:43 -0700 Subject: [PATCH 15/19] Add missing log4j-api dependency --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 986072c1..b25426fa 100644 --- a/pom.xml +++ b/pom.xml @@ -231,6 +231,12 @@ assertj-core 3.25.3 + + org.apache.logging.log4j + log4j-api + 2.23.1 + test + org.apache.logging.log4j log4j-core From 1d78b4248b9f898f5c544402e88258a1c4e02791 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 30 Apr 2024 10:06:05 -0700 Subject: [PATCH 16/19] Move assertj in the test scope --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index b25426fa..7aee67a6 100644 --- a/pom.xml +++ b/pom.xml @@ -230,6 +230,7 @@ org.assertj assertj-core 3.25.3 + test org.apache.logging.log4j From 0e8665387e5cb44468d8d467ec93563f94e9a062 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 30 Apr 2024 10:09:40 -0700 Subject: [PATCH 17/19] Update Maven plugins --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7aee67a6..04e47b19 100644 --- a/pom.xml +++ b/pom.xml @@ -315,7 +315,7 @@ com.amashchenko.maven.plugin gitflow-maven-plugin - 1.16.0 + 1.21.0 false @@ -418,7 +418,7 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.1.2 + 3.2.2 From 88ddff4b408416a0f3cd03b462baa172e5919a46 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 30 Apr 2024 10:12:06 -0700 Subject: [PATCH 18/19] Update Maven metadata --- pom.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 04e47b19..f08725d9 100644 --- a/pom.xml +++ b/pom.xml @@ -10,10 +10,10 @@ - https://github.com/pavlidisLab/basecode + https://github.com/PavlidisLab/baseCode - UBC Michael Smith Laboratories - http://www.msl.ubc.ca/ + Pavlidis Lab + https://pavlab.msl.ubc.ca/ jar @@ -27,7 +27,7 @@ The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt + https://www.apache.org/licenses/LICENSE-2.0.txt repo From 88e992fad2edc81c170294914d9e2c9258062bf5 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 30 Apr 2024 11:35:25 -0700 Subject: [PATCH 19/19] Update for next development version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f08725d9..f6ee496d 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ baseCode baseCode baseCode - 1.1.22-SNAPSHOT + 1.1.22 2003