diff --git a/.gitignore b/.gitignore index deeb8432d..807aaf614 100755 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ data wikidata qendpoint-store/wdbench-indexes wdbench-results +wdbench-indexes diff --git a/qendpoint-backend/pom.xml b/qendpoint-backend/pom.xml index 331680f51..920f8c21d 100644 --- a/qendpoint-backend/pom.xml +++ b/qendpoint-backend/pom.xml @@ -43,7 +43,7 @@ 1.1.1 4.13.2 3.3.1 - 5.0.0-SNAPSHOT + 5.0.1 3.0.2 1.4.5 diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/sequence/SequenceLog64Map.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/sequence/SequenceLog64Map.java index e3ce9942d..2d5edb3e6 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/sequence/SequenceLog64Map.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/compact/sequence/SequenceLog64Map.java @@ -53,7 +53,10 @@ */ public class SequenceLog64Map implements Sequence, Closeable { private static final byte W = 64; - private static final long LONGS_PER_BUFFER = 128 * 1024 * 1024; // 128*8 = + public static final int W_LEFT_SHIFT = (W << 1); + private static final int LONGS_PER_BUFFER = 128 * 1024 * 1024; // 128*8 = + private static final int LOG2_LONGS_PER_BUFFER = Long.numberOfTrailingZeros(LONGS_PER_BUFFER); + // 1Gb per // chunk. private CloseMappedByteBuffer[] buffers; @@ -62,6 +65,8 @@ public class SequenceLog64Map implements Sequence, Closeable { private final long numentries; private long lastword; private final long numwords; + private final int W_numbits; + private final int W_LEFT_SHIFT_MINUS_NUMBITS; public SequenceLog64Map(File f) throws IOException { // Read from the beginning of the file @@ -80,6 +85,8 @@ private SequenceLog64Map(CountInputStream in, File f, boolean closeInput) throws throw new IllegalFormatException("Trying to read a LogArray but the data is not LogArray"); } numbits = crcin.read(); + W_numbits = W - numbits; + W_LEFT_SHIFT_MINUS_NUMBITS = W_LEFT_SHIFT - numbits; numentries = VByte.decode(crcin); if (!crcin.readCRCAndCheck()) { @@ -111,6 +118,8 @@ private SequenceLog64Map(CountInputStream in, File f, boolean closeInput) throws public SequenceLog64Map(int numbits, long numentries, File f) throws IOException { this.numbits = numbits; + this.W_numbits = W - numbits; + this.W_LEFT_SHIFT_MINUS_NUMBITS = W_LEFT_SHIFT - numbits; this.numentries = numentries; this.numwords = SequenceLog64.numWordsFor(numbits, numentries); @@ -178,32 +187,41 @@ private long getWord(long w) { return lastword; } - return buffers[(int) (w / LONGS_PER_BUFFER)].getLong((int) ((w % LONGS_PER_BUFFER) * 8)); + return buffers[(int) (w >> LOG2_LONGS_PER_BUFFER)].getLong((int) ((w & (LONGS_PER_BUFFER - 1)) << 3)); +// return buffers[(int) (w / LONGS_PER_BUFFER)].getLong((int) ((w % LONGS_PER_BUFFER) * 8)); } /* * (non-Javadoc) * @see hdt.triples.array.Stream#get(long) */ + @Override public long get(long index) { if (index < 0 || index >= numentries) { throw new IndexOutOfBoundsException(index + " < 0 || " + index + ">= " + numentries); } - if (numbits == 0) + if (numbits == 0) { return 0; + } long bitPos = index * numbits; - long i = bitPos / W; int j = (int) (bitPos % W); - long result; if (j + numbits <= W) { - result = (getWord(i) << (W - j - numbits)) >>> (W - numbits); + return extracted1(bitPos, j); } else { - result = getWord(i) >>> j; - result = result | (getWord(i + 1) << ((W << 1) - j - numbits)) >>> (W - numbits); + return extracted(bitPos, j); } - return result; + } + + private long extracted(long bitPos, int j) { + long i = bitPos / W; + return getWord(i) >>> j | (getWord(i + 1) << (W_LEFT_SHIFT_MINUS_NUMBITS - j)) >>> W_numbits; + } + + private long extracted1(long bitPos, int j) { + long i = bitPos / W; + return (getWord(i) << (W_numbits - j)) >>> (W_numbits); } /* diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java index 9669f8885..ee7378d73 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/enums/TripleComponentOrder.java @@ -20,6 +20,7 @@ package com.the_qa_company.qendpoint.core.enums; import java.util.ArrayList; +import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -98,6 +99,8 @@ public static List fetchAllBestForCfg(int return ret; } + public static TripleComponentOrder preference; + /** * Search for an acceptable value in a map of orders * @@ -107,12 +110,43 @@ public static List fetchAllBestForCfg(int * @return find value, null for no matching value */ public static T fetchBestForCfg(int flags, Map map) { + + var tripleComponentOrders = EnumSet.noneOf(TripleComponentOrder.class); + for (Map.Entry e : map.entrySet()) { if ((e.getKey().mask & flags) != 0) { - return e.getValue(); + tripleComponentOrders.add(e.getKey()); + } + } + + if (tripleComponentOrders.isEmpty()) { + return null; + } + + if (preference != null) { + if (tripleComponentOrders.contains(preference)) { + return map.get(preference); } + throw new IllegalStateException("Preference not found in the list of acceptable orders"); } - return null; + + if (tripleComponentOrders.contains(SOP)) { + return map.get(SOP); + } + if (tripleComponentOrders.contains(OPS)) { + return map.get(OPS); + } + if (tripleComponentOrders.contains(OSP)) { + return map.get(OSP); + } + if (tripleComponentOrders.contains(POS)) { + return map.get(POS); + } + if (tripleComponentOrders.contains(PSO)) { + return map.get(PSO); + } + + return map.get(tripleComponentOrders.iterator().next()); } /** diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java index 2b9afacdb..fb42bff73 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriples.java @@ -93,6 +93,7 @@ */ public class BitmapTriples implements TriplesPrivate, BitmapTriplesIndex { private static final Logger log = LoggerFactory.getLogger(BitmapTriples.class); + public static boolean useDefaultOrder = true; protected TripleComponentOrder order; @@ -317,7 +318,7 @@ public SuppliableIteratorTripleID search(TripleID pattern, int searchMask) { TripleOrderConvert.swapComponentOrder(reorderedPat, TripleComponentOrder.SPO, order); int flags = reorderedPat.getPatternOrderFlags(); - if ((flags & searchMask & this.order.mask) != 0) { + if (useDefaultOrder && (flags & searchMask & this.order.mask) != 0) { // we can use the default order, so we use it return new BitmapTriplesIterator(this, pattern); } @@ -1340,7 +1341,7 @@ public void syncOtherIndexes(Path fileLocation, HDTOptions spec, ProgressListene try (FileChannel channel = FileChannel.open(subIndexPath, StandardOpenOption.READ)) { // load from the path... - BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel, this); + BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel); BitmapTriplesIndex old = indexes.put(order, idx); indexesMask |= idx.getOrder().mask; if (old != null) { @@ -1357,7 +1358,7 @@ public void syncOtherIndexes(Path fileLocation, HDTOptions spec, ProgressListene BitmapTriplesIndexFile.generateIndex(this, subIndexPath, order, spec, mListener); try (FileChannel channel = FileChannel.open(subIndexPath, StandardOpenOption.READ)) { // load from the path... - BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel, this); + BitmapTriplesIndex idx = BitmapTriplesIndexFile.map(subIndexPath, channel); BitmapTriplesIndex old = indexes.put(order, idx); indexesMask |= order.mask; if (old != null) { diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java index 597e011b4..49896901b 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/BitmapTriplesIndexFile.java @@ -11,7 +11,6 @@ import com.the_qa_company.qendpoint.core.compact.sequence.SequenceLog64BigDisk; import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; import com.the_qa_company.qendpoint.core.exceptions.IllegalFormatException; -import com.the_qa_company.qendpoint.core.exceptions.SignatureIOException; import com.the_qa_company.qendpoint.core.iterator.utils.AsyncIteratorFetcher; import com.the_qa_company.qendpoint.core.iterator.utils.ExceptionIterator; import com.the_qa_company.qendpoint.core.iterator.utils.MapIterator; @@ -34,7 +33,6 @@ import java.io.Closeable; import java.io.IOException; import java.io.InterruptedIOException; -import java.nio.ByteOrder; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; @@ -61,30 +59,19 @@ public static Path getIndexPath(Path hdt, TripleComponentOrder order) { return hdt.resolveSibling(hdt.getFileName() + "." + order.name().toLowerCase() + ".idx"); } - /** - * Compute triples signature - * - * @param triples triples - * @return signature - */ - public static long signature(BitmapTriples triples) { - return 0x484454802020L ^ triples.getNumberOfElements(); - } - - public static final byte[] MAGIC = "$HDTIDX1".getBytes(StandardCharsets.US_ASCII); + public static final byte[] MAGIC = "$HDTIDX0".getBytes(StandardCharsets.US_ASCII); /** * Map a file from a file * * @param file file * @param channel channel - * @param triples triples * @return index * @throws IOException io */ - public static BitmapTriplesIndex map(Path file, FileChannel channel, BitmapTriples triples) throws IOException { + public static BitmapTriplesIndex map(Path file, FileChannel channel) throws IOException { try (CloseMappedByteBuffer header = IOUtil.mapChannel(file, channel, FileChannel.MapMode.READ_ONLY, 0, - MAGIC.length + 8)) { + MAGIC.length)) { byte[] magicRead = new byte[MAGIC.length]; header.get(magicRead); @@ -92,18 +79,10 @@ public static BitmapTriplesIndex map(Path file, FileChannel channel, BitmapTripl if (!Arrays.equals(magicRead, MAGIC)) { throw new IOException(format("Can't read %s magic", file)); } - - long signature = header.order(ByteOrder.LITTLE_ENDIAN).getLong(magicRead.length); - - long currentSignature = signature(triples); - if (signature != currentSignature) { - throw new SignatureIOException( - format("Wrong signature for file 0x%x != 0x%x", signature, currentSignature)); - } } CountInputStream stream = new CountInputStream(new BufferedInputStream(Channels.newInputStream(channel))); - stream.skipNBytes(MAGIC.length + 8); + stream.skipNBytes(MAGIC.length); String orderCfg = IOUtil.readSizedString(stream, ProgressListener.ignore()); @@ -289,7 +268,6 @@ public static void generateIndex(BitmapTriples triples, Path destination, Triple // saving the index try (BufferedOutputStream output = new BufferedOutputStream(Files.newOutputStream(destination))) { output.write(MAGIC); - IOUtil.writeLong(output, signature(triples)); IOUtil.writeSizedString(output, order.name(), listener); diff --git a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleOrderConvert.java b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleOrderConvert.java index 35deaa52b..2f9cdcc6e 100644 --- a/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleOrderConvert.java +++ b/qendpoint-core/src/main/java/com/the_qa_company/qendpoint/core/triples/impl/TripleOrderConvert.java @@ -67,24 +67,33 @@ public static void swapComponentOrder(TripleID triple, TripleComponentOrder from if (from == TripleComponentOrder.Unknown || to == TripleComponentOrder.Unknown) { throw new IllegalArgumentException("Cannot swap Unknown Orders"); } - boolean swap1 = swap1tab[from.ordinal() - 1][to.ordinal() - 1]; - boolean swap2 = swap2tab[from.ordinal() - 1][to.ordinal() - 1]; - boolean swap3 = swap3tab[from.ordinal() - 1][to.ordinal() - 1]; - if (swap1) { - long tmp = triple.getSubject(); - triple.setSubject(triple.getPredicate()); - triple.setPredicate(tmp); + swap1(triple, from, to); + swap2(triple, from, to); + swap3(triple, from, to); + } + + private static void swap3(TripleID triple, TripleComponentOrder from, TripleComponentOrder to) { + if (swap3tab[from.ordinal() - 1][to.ordinal() - 1]) { + long tmp = triple.getPredicate(); + triple.setPredicate(triple.getObject()); + triple.setObject(tmp); } - if (swap2) { + } + + private static void swap2(TripleID triple, TripleComponentOrder from, TripleComponentOrder to) { + if (swap2tab[from.ordinal() - 1][to.ordinal() - 1]) { long tmp = triple.getSubject(); triple.setSubject(triple.getObject()); triple.setObject(tmp); } - if (swap3) { - long tmp = triple.getPredicate(); - triple.setPredicate(triple.getObject()); - triple.setObject(tmp); + } + + private static void swap1(TripleID triple, TripleComponentOrder from, TripleComponentOrder to) { + if (swap1tab[from.ordinal() - 1][to.ordinal() - 1]) { + long tmp = triple.getSubject(); + triple.setSubject(triple.getPredicate()); + triple.setPredicate(tmp); } } } diff --git a/qendpoint-store/pom.xml b/qendpoint-store/pom.xml index 87667ece3..cfd9309f7 100644 --- a/qendpoint-store/pom.xml +++ b/qendpoint-store/pom.xml @@ -41,7 +41,7 @@ 1.1.1 4.13.2 3.3.1 - 5.0.0-SNAPSHOT + 5.0.1 1.4.5 UTF-8 @@ -173,6 +173,18 @@ qendpoint-core ${project.parent.version} + + org.openjdk.jmh + jmh-core + 1.37 + test + + + org.openjdk.jmh + jmh-generator-annprocess + 1.37 + test + diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/EndpointStoreValueFactory.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/EndpointStoreValueFactory.java index f6dabec7d..052a48148 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/EndpointStoreValueFactory.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/EndpointStoreValueFactory.java @@ -99,7 +99,7 @@ public IRI createIRI(String iri) { } } if (id != -1) { - return new SimpleIRIHDT(hdt, position, id); + return new SimpleIRIHDT(hdt.getDictionary(), position, id); } else { return super.createIRI(iri); } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java index 7ba529bf9..9a7664f0a 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/model/SimpleIRIHDT.java @@ -1,17 +1,16 @@ package com.the_qa_company.qendpoint.model; +import com.the_qa_company.qendpoint.core.dictionary.Dictionary; import com.the_qa_company.qendpoint.core.enums.DictionarySectionRole; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException; -import com.the_qa_company.qendpoint.core.hdt.HDT; import com.the_qa_company.qendpoint.store.exception.EndpointStoreException; import org.eclipse.rdf4j.model.IRI; -import org.eclipse.rdf4j.model.base.AbstractIRI; -import org.eclipse.rdf4j.model.util.URIUtil; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import java.io.Serial; -public class SimpleIRIHDT extends AbstractIRI implements HDTValue { +public class SimpleIRIHDT implements HDTValue, IRI { @Serial private static final long serialVersionUID = -3220264926968931192L; @@ -31,31 +30,19 @@ public static byte getPos(DictionarySectionRole role) { }; } - private final HDT hdt; - private int postion; + private final Dictionary dict; + private final int position; private long id; - private String iriString; - // An index indicating the first character of the local name in the IRI - // string, -1 if not yet set. - private int localNameIdx; - private boolean delegate; - - public SimpleIRIHDT(HDT hdt, int position, long id) { - if (!(id > 0 && position >= SUBJECT_POS && position <= SHARED_POS)) { - throw new IllegalArgumentException("Bad argument %d > 0 / pos = %d".formatted(id, position)); - } - this.hdt = hdt; - this.postion = position; - this.id = id; - this.localNameIdx = -1; - } - public SimpleIRIHDT(HDT hdt, String iriString) { - assert iriString != null; - this.hdt = hdt; - this.iriString = iriString; - this.id = -1; - this.localNameIdx = -1; + private IRI delegate; + + public SimpleIRIHDT(Dictionary dict, int position, long id) { + this.dict = dict; + this.position = position; + this.id = id; + // if (!(id > 0 && position >= SUBJECT_POS && position <= SHARED_POS)) { +// throw new IllegalArgumentException("Bad argument %d > 0 / pos = %d".formatted(id, position)); +// } } @Override @@ -65,67 +52,56 @@ public long getHDTId() { @Override public int getHDTPosition() { - return postion; + return position; } public long getId() { return id; } - public int getPostion() { - return postion; - } - - @Override - public String toString() { - if (iriString == null) { - iriString = stringValue(); - } - return iriString; - } - - @Override - public String stringValue() { - if (this.iriString != null) { - return this.iriString; - } else { + private IRI getIRI() { + if (delegate == null) { CharSequence charSequence; - if (this.postion == SHARED_POS || this.postion == SUBJECT_POS) { - charSequence = hdt.getDictionary().idToString(this.id, TripleComponentRole.SUBJECT); - } else if (this.postion == OBJECT_POS) { - charSequence = hdt.getDictionary().idToString(this.id, TripleComponentRole.OBJECT); - } else if (this.postion == PREDICATE_POS) { - charSequence = hdt.getDictionary().idToString(this.id, TripleComponentRole.PREDICATE); - } else if (this.postion == GRAPH_POS) { - charSequence = hdt.getDictionary().idToString(this.id, TripleComponentRole.GRAPH); + if (this.position == SHARED_POS || this.position == SUBJECT_POS) { + charSequence = dict.idToString(this.id, TripleComponentRole.SUBJECT); + } else if (this.position == OBJECT_POS) { + charSequence = dict.idToString(this.id, TripleComponentRole.OBJECT); + } else if (this.position == PREDICATE_POS) { + charSequence = dict.idToString(this.id, TripleComponentRole.PREDICATE); + } else if (this.position == GRAPH_POS) { + charSequence = dict.idToString(this.id, TripleComponentRole.GRAPH); } else { - throw new EndpointStoreException("bad postion value: " + postion); + throw new EndpointStoreException("bad postion value: " + position); } if (charSequence == null) { throw new EndpointStoreException("Can't find HDT ID: " + id); } - return charSequence.toString(); + delegate = SimpleValueFactory.getInstance().createIRI(charSequence.toString()); } + return delegate; + } + + @Override + public String toString() { + return getIRI().toString(); + } + + @Override + public String stringValue() { + return getIRI().stringValue(); + } public String getNamespace() { - if (iriString == null) { - iriString = stringValue(); - } - if (localNameIdx < 0) { - localNameIdx = URIUtil.getLocalNameIndex(iriString); - } - return iriString.substring(0, localNameIdx); + return getIRI().getNamespace(); + } public String getLocalName() { - if (localNameIdx < 0) { - localNameIdx = URIUtil.getLocalNameIndex(iriString); - } + return getIRI().getLocalName(); - return iriString.substring(localNameIdx); } @Override @@ -149,50 +125,24 @@ public boolean equals(Object o) { @Override public int hashCode() { - if (id != -1 && !delegate) { - String prefix = "http://hdt.org/"; - if (this.postion == SHARED_POS) { - prefix += "SO"; - } else if (this.postion == SUBJECT_POS) { - prefix += "S"; - } else if (this.postion == PREDICATE_POS) { - prefix += "P"; - } else if (this.postion == OBJECT_POS) { - prefix += "O"; - } else if (this.postion == GRAPH_POS) { - prefix += "G"; - } else { - if (iriString != null) { - prefix = iriString; - } - return prefix.hashCode(); - } - prefix += id; - return prefix.hashCode(); - } else { - return toString().hashCode(); - } - } - - public String getIriString() { - return iriString; + return getIRI().hashCode(); } public void convertToNonHDTIRI() { - if (iriString == null) { - iriString = stringValue(); - } + this.id = -1; } @Override public void setDelegate(boolean delegate) { - this.delegate = delegate; + if (delegate) { + getIRI(); + } } @Override public boolean isDelegate() { - return delegate; + return delegate != null; } } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreTripleIterator.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreTripleIterator.java index b655d36c5..5c0083dd9 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreTripleIterator.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointStoreTripleIterator.java @@ -10,6 +10,7 @@ import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.GenericStatement; import org.eclipse.rdf4j.query.QueryInterruptedException; import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; import com.the_qa_company.qendpoint.core.triples.TripleID; @@ -22,21 +23,57 @@ public class EndpointStoreTripleIterator implements CloseableIteration, IndexReportingIterator { private static final Logger logger = LoggerFactory.getLogger(EndpointStoreTripleIterator.class); + public static boolean cache = true; + private final AtomicBoolean closed = new AtomicBoolean(); private final EndpointStore endpoint; private final EndpointStoreConnection connection; private final EndpointTripleSource endpointTripleSource; private final IteratorTripleID iterator; private final CloseableIteration repositoryResult; + + private final Resource subject; + private final IRI predicate; + private final Value object; + + private long objectID_cache; + private Value objectCache; + + private long subjectID_cache; + private Resource subjectCache; + + private long predicateID_cache; + private IRI predicateCache; + private Statement next; public EndpointStoreTripleIterator(EndpointStoreConnection connection, EndpointTripleSource endpointTripleSource, - IteratorTripleID iter, CloseableIteration repositoryResult) { + IteratorTripleID iter, CloseableIteration repositoryResult, long subjectID, + long predicateID, long objectID, boolean graph, long[] graphID) { this.connection = Objects.requireNonNull(connection, "connection can't be null!"); this.endpoint = Objects.requireNonNull(connection.getEndpoint(), "endpoint can't be null!"); this.endpointTripleSource = Objects.requireNonNull(endpointTripleSource, "endpointTripleSource can't be null!"); this.iterator = Objects.requireNonNull(iter, "iter can't be null!"); this.repositoryResult = Objects.requireNonNull(repositoryResult, "repositoryResult can't be null!"); + + if (subjectID > 0) { + subject = endpoint.getHdtConverter().idToSubjectHDTResource(subjectID); + } else { + subject = null; + } + + if (predicateID > 0) { + predicate = endpoint.getHdtConverter().idToPredicateHDTResource(predicateID); + } else { + predicate = null; + } + + if (objectID > 0) { + object = endpoint.getHdtConverter().idToObjectHDTResource(objectID); + } else { + object = null; + } + } @Override @@ -59,47 +96,102 @@ public boolean hasNext() { MultiLayerBitmapWrapper.MultiLayerModBitmapWrapper dbm = endpoint.getDeleteBitMap(order); if (endpoint.isDeleteDisabled() || dbm.getHandle().getMaxNumBits() == 0 || !dbm.access(tripleID.isQuad() ? tripleID.getGraph() - 1 : 0, iterator.getLastTriplePosition())) { - Resource subject = endpoint.getHdtConverter().idToSubjectHDTResource(tripleID.getSubject()); - IRI predicate = endpoint.getHdtConverter().idToPredicateHDTResource(tripleID.getPredicate()); - Value object = endpoint.getHdtConverter().idToObjectHDTResource(tripleID.getObject()); - if (logger.isTraceEnabled()) { - logger.trace("From HDT {} {} {} ", subject, predicate, object); - } + +// if (logger.isTraceEnabled()) { +// logger.trace("From HDT {} {} {} ", subject, predicate, object); +// } if (supportGraphs) { - Resource ctx = tripleID.isQuad() - ? endpoint.getHdtConverter().idToGraphHDTResource(tripleID.getGraph()) - : null; - next = endpointTripleSource.getValueFactory().createStatement(subject, predicate, object, ctx); + createStatementWithContext(tripleID); } else { - next = endpointTripleSource.getValueFactory().createStatement(subject, predicate, object); + createStatementWithoutContext(tripleID); } return true; } } // iterate over the result of rdf4j if (this.repositoryResult.hasNext()) { - Statement stm = repositoryResult.next(); - Resource newSubj = endpoint.getHdtConverter().rdf4jToHdtIDsubject(stm.getSubject()); - IRI newPred = endpoint.getHdtConverter().rdf4jToHdtIDpredicate(stm.getPredicate()); - Value newObject = endpoint.getHdtConverter().rdf4jToHdtIDobject(stm.getObject()); - Resource newContext = endpoint.getHdtConverter().rdf4jToHdtIDcontext(stm.getContext()); - - next = endpointTripleSource.getValueFactory().createStatement(newSubj, newPred, newObject, newContext); - if (logger.isTraceEnabled()) { - logger.trace("From RDF4j {} {} {}", next.getSubject(), next.getPredicate(), next.getObject()); - } + iterateOverResultsFromRDF4J(); return true; } return false; } + private void createStatementWithoutContext(TripleID tripleID) { + Resource subject = getSubject(tripleID); + + IRI predicate = getPredicate(tripleID); + + Value object = getObject(tripleID); + + next = new GenericStatement<>(subject, predicate, object, null); + +// next = endpointTripleSource.getValueFactory().createStatement(subject, predicate, object); + } + + private void createStatementWithContext(TripleID tripleID) { + Resource subject = endpoint.getHdtConverter().idToSubjectHDTResource(tripleID.getSubject()); + IRI predicate = endpoint.getHdtConverter().idToPredicateHDTResource(tripleID.getPredicate()); + Value object = endpoint.getHdtConverter().idToObjectHDTResource(tripleID.getObject()); + Resource ctx = tripleID.isQuad() ? endpoint.getHdtConverter().idToGraphHDTResource(tripleID.getGraph()) : null; + next = endpointTripleSource.getValueFactory().createStatement(subject, predicate, object, ctx); + } + + private Resource getSubject(TripleID tripleID) { + if (this.subject != null) { + return this.subject; + } + return endpoint.getHdtConverter().idToSubjectHDTResource(tripleID.getSubject()); + } + + private IRI getPredicate(TripleID tripleID) { + IRI predicate; + if (this.predicate != null) { + predicate = this.predicate; +// } else if (tripleID.getPredicate() == predicateID_cache) { +// predicate = predicateCache; + } else { + predicate = endpoint.getHdtConverter().idToPredicateHDTResource(tripleID.getPredicate()); +// this.predicateID_cache = tripleID.getPredicate(); +// this.predicateCache = predicate; + } + return predicate; + } + + private Value getObject(TripleID tripleID) { + Value object; + if (this.object != null) { + object = this.object; + } else if (cache && tripleID.getObject() == objectID_cache) { + object = objectCache; + } else { + object = endpoint.getHdtConverter().idToObjectHDTResource(tripleID.getObject()); + if (cache) { + this.objectID_cache = tripleID.getObject(); + this.objectCache = object; + } + } + return object; + } + + private void iterateOverResultsFromRDF4J() { + Statement stm = repositoryResult.next(); + Resource newSubj = endpoint.getHdtConverter().rdf4jToHdtIDsubject(stm.getSubject()); + IRI newPred = endpoint.getHdtConverter().rdf4jToHdtIDpredicate(stm.getPredicate()); + Value newObject = endpoint.getHdtConverter().rdf4jToHdtIDobject(stm.getObject()); + Resource newContext = endpoint.getHdtConverter().rdf4jToHdtIDcontext(stm.getContext()); + + next = endpointTripleSource.getValueFactory().createStatement(newSubj, newPred, newObject, newContext); + if (logger.isTraceEnabled()) { + logger.trace("From RDF4j {} {} {}", next.getSubject(), next.getPredicate(), next.getObject()); + } + } + @Override public Statement next() { if (!hasNext()) { return null; } - Statement stm = endpointTripleSource.getValueFactory().createStatement(next.getSubject(), next.getPredicate(), - next.getObject(), next.getContext()); + Statement stm = next; next = null; return stm; } diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java index 8ddb03d23..99b0afb62 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/EndpointTripleSource.java @@ -20,6 +20,7 @@ import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource; import org.eclipse.rdf4j.sail.SailException; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,6 +34,12 @@ import java.util.Set; import java.util.stream.Collectors; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.OPS; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.OSP; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.POS; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.PSO; +import static com.the_qa_company.qendpoint.core.enums.TripleComponentOrder.SOP; + // this is the main class telling how, given a triple pattern, to find the results in HDT and the current stores public class EndpointTripleSource implements TripleSource { @@ -95,44 +102,66 @@ public CloseableIteration getStatements(StatementOrder stat boolean graph = endpoint.getHdt().getDictionary().supportGraphs(); // convert uris into ids if needed - Resource newSubj; - IRI newPred; - Value newObj; - Resource[] newContextes; + long subjectID = this.endpoint.getHdtConverter().subjectToID(subj); long predicateID = this.endpoint.getHdtConverter().predicateToID(pred); long objectID = this.endpoint.getHdtConverter().objectToID(obj); long[] graphID; - if (subjectID == 0 || subjectID == -1) { - newSubj = subj; - } else { - newSubj = this.endpoint.getHdtConverter().subjectIdToIRI(subjectID); - } - if (predicateID == 0 || predicateID == -1) { - newPred = pred; - } else { - newPred = this.endpoint.getHdtConverter().predicateIdToIRI(predicateID); - } - if (objectID == 0 || objectID == -1) { - newObj = obj; - } else { - newObj = this.endpoint.getHdtConverter().objectIdToIRI(objectID); - } - if (graph) { graphID = new long[contexts.length]; - newContextes = this.endpoint.getHdtConverter().graphIdToIRI(contexts, graphID); } else { graphID = null; - newContextes = contexts; } // logger.debug("SEARCH {} {} {}", newSubj, newPred, newObj); // check if we need to search over the delta and if yes, search - CloseableIteration repositoryResult; + CloseableIteration repositoryResult = innerGetStatementsDelta(statementOrder, subj, pred, + obj, contexts, subjectID, predicateID, objectID, graph, graphID); + + // iterate over the HDT file + IteratorTripleID iterator = innerGetStatementsHDT(statementOrder, subj, pred, obj, contexts, subjectID, + predicateID, objectID, graph, graphID); + + // iterate over hdt result, delete the triples marked as deleted and add + // the triples from the delta + return new EndpointStoreTripleIterator(endpointStoreConnection, this, iterator, repositoryResult, subjectID, + predicateID, objectID, graph, graphID); + } + + private CloseableIteration innerGetStatementsDelta(StatementOrder statementOrder, + Resource subj, IRI pred, Value obj, Resource[] contexts, long subjectID, long predicateID, long objectID, + boolean graph, long[] graphID) { + CloseableIteration repositoryResult11; if (shouldSearchOverNativeStore(subjectID, predicateID, objectID)) { + + Resource newSubj; + IRI newPred; + Value newObj; + Resource[] newContextes; + if (subjectID == 0 || subjectID == -1) { + newSubj = subj; + } else { + newSubj = this.endpoint.getHdtConverter().subjectIdToIRI(subjectID); + } + if (predicateID == 0 || predicateID == -1) { + newPred = pred; + } else { + newPred = this.endpoint.getHdtConverter().predicateIdToIRI(predicateID); + } + if (objectID == 0 || objectID == -1) { + newObj = obj; + } else { + newObj = this.endpoint.getHdtConverter().objectIdToIRI(objectID); + } + + if (graph) { + newContextes = this.endpoint.getHdtConverter().graphIdToIRI(contexts, graphID); + } else { + newContextes = contexts; + } + if (statementOrder != null) { throw new UnsupportedOperationException( "Statement ordering is not supported when searching over the native store"); @@ -146,19 +175,23 @@ public CloseableIteration getStatements(StatementOrder stat .getStatements(newSubj, newPred, newObj, false, newContextes); CloseableIteration repositoryResult2 = this.endpointStoreConnection.getConnB_read() .getStatements(newSubj, newPred, newObj, false, newContextes); - repositoryResult = new CombinedNativeStoreResult(repositoryResult1, repositoryResult2); + repositoryResult11 = new CombinedNativeStoreResult(repositoryResult1, repositoryResult2); } else { logger.debug("Query only one RDF4j stores!"); - repositoryResult = this.endpointStoreConnection.getCurrentConnectionRead().getStatements(newSubj, + repositoryResult11 = this.endpointStoreConnection.getCurrentConnectionRead().getStatements(newSubj, newPred, newObj, false, newContextes); } } else { logger.debug("Not searching over native store"); - repositoryResult = new EmptyIteration<>(); + repositoryResult11 = new EmptyIteration<>(); } + CloseableIteration repositoryResult = repositoryResult11; + return repositoryResult; + } - // iterate over the HDT file + private IteratorTripleID innerGetStatementsHDT(StatementOrder statementOrder, Resource subj, IRI pred, Value obj, + Resource[] contexts, long subjectID, long predicateID, long objectID, boolean graph, long[] graphID) { IteratorTripleID iterator; if (subjectID != -1 && predicateID != -1 && objectID != -1) { // logger.debug("Searching over HDT {} {} {}", subjectID, @@ -166,19 +199,7 @@ public CloseableIteration getStatements(StatementOrder stat TripleID t = new TripleID(subjectID, predicateID, objectID); if (graph && contexts.length > 1) { - if (statementOrder != null) { - int indexMaskMatchingStatementOrder = getIndexMaskMatchingStatementOrder(statementOrder, subj, pred, - obj, t); - - // search with the ID to check if the triples has been - // deleted - iterator = new GraphFilteringTripleId( - this.endpoint.getHdt().getTriples().search(t, indexMaskMatchingStatementOrder), graphID); - } else { - // search with the ID to check if the triples has been - // deleted - iterator = new GraphFilteringTripleId(this.endpoint.getHdt().getTriples().search(t), graphID); - } + iterator = innerGetStatementsMultipleContexts(statementOrder, subj, pred, obj, t, graphID); } else { if (graph && contexts.length == 1) { t.setGraph(graphID[0]); @@ -200,10 +221,26 @@ public CloseableIteration getStatements(StatementOrder stat } else {// no need to search over hdt iterator = new EmptyTriplesIterator(TripleComponentOrder.SPO); } + return iterator; + } - // iterate over hdt result, delete the triples marked as deleted and add - // the triples from the delta - return new EndpointStoreTripleIterator(endpointStoreConnection, this, iterator, repositoryResult); + private @NotNull IteratorTripleID innerGetStatementsMultipleContexts(StatementOrder statementOrder, Resource subj, + IRI pred, Value obj, TripleID t, long[] graphID) { + IteratorTripleID iterator; + if (statementOrder != null) { + int indexMaskMatchingStatementOrder = getIndexMaskMatchingStatementOrder(statementOrder, subj, pred, obj, + t); + + // search with the ID to check if the triples has been + // deleted + iterator = new GraphFilteringTripleId( + this.endpoint.getHdt().getTriples().search(t, indexMaskMatchingStatementOrder), graphID); + } else { + // search with the ID to check if the triples has been + // deleted + iterator = new GraphFilteringTripleId(this.endpoint.getHdt().getTriples().search(t), graphID); + } + return iterator; } // this function determines if a triple pattern should be searched over the @@ -275,15 +312,34 @@ private int getIndexMaskMatchingStatementOrder(StatementOrder statementOrder, Re } } - Optional first = tripleComponentOrder.stream() + EnumSet tripleOrders = EnumSet.noneOf(TripleComponentOrder.class); + + tripleComponentOrder.stream() .filter(o -> getStatementOrder(o, subj != null, pred != null, obj != null).contains(statementOrder)) - .findFirst(); + .forEach(tripleOrders::add); + + if (tripleOrders.contains(SOP)) { + return SOP.mask; + } + if (tripleOrders.contains(OPS)) { + return OPS.mask; + } + if (tripleOrders.contains(OSP)) { + return OSP.mask; + } + if (tripleOrders.contains(POS)) { + return POS.mask; + } + if (tripleOrders.contains(PSO)) { + return PSO.mask; + } - if (first.isEmpty()) { + if (tripleOrders.isEmpty()) { throw new AssertionError( "Statement order " + statementOrder + " not supported for triple pattern " + t.getPatternString()); } - return first.get().mask; + + return tripleOrders.iterator().next().mask; } public static Set getStatementOrder(TripleComponentOrder tripleComponentOrder, boolean subject, diff --git a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java index d5938c4dd..0697b2881 100644 --- a/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java +++ b/qendpoint-store/src/main/java/com/the_qa_company/qendpoint/store/HDTConverter.java @@ -4,6 +4,7 @@ import com.the_qa_company.qendpoint.core.enums.RDFNodeType; import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.util.concurrent.ExceptionSupplier; import com.the_qa_company.qendpoint.model.HDTValue; import com.the_qa_company.qendpoint.model.SimpleBNodeHDT; import com.the_qa_company.qendpoint.model.SimpleIRIHDT; @@ -15,6 +16,10 @@ import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.jetbrains.annotations.NotNull; + +import java.util.Objects; +import java.util.function.Supplier; // there are 4 types of resources: // resources coming from outside, @@ -28,10 +33,31 @@ public class HDTConverter { private final EndpointStore endpoint; private final HDT hdt; private final ValueFactory valueFactory = SimpleValueFactory.getInstance(); + private final Dictionary dict; + private final long startBlankShared; + private final long endBlankShared; + private final long startBlankSubjects; + private final long endBlankSubjects; + private final long endBlankObjects; + private final long startBlankObjects; + private final long startLiteral; + private final long endLiteral; + private final long nshared; public HDTConverter(EndpointStore endpoint) { this.endpoint = endpoint; this.hdt = endpoint.getHdt(); + this.dict = hdt.getDictionary(); + this.startBlankShared = endpoint.getHdtProps().getStartBlankShared(); + this.endBlankShared = endpoint.getHdtProps().getEndBlankShared(); + this.startBlankSubjects = endpoint.getHdtProps().getStartBlankSubjects(); + this.endBlankSubjects = endpoint.getHdtProps().getEndBlankSubjects(); + this.endBlankObjects = endpoint.getHdtProps().getEndBlankObjects(); + this.startBlankObjects = endpoint.getHdtProps().getStartBlankObjects(); + this.startLiteral = endpoint.getHdtProps().getStartLiteral(); + this.endLiteral = endpoint.getHdtProps().getEndLiteral(); + this.nshared = dict.getNshared(); + } // method to get the ID of a resource @@ -40,14 +66,14 @@ public long subjectToID(Resource subj) { return 0; } if (!(subj instanceof HDTValue hdtval)) { - return this.hdt.getDictionary().stringToId(subj.toString(), TripleComponentRole.SUBJECT); + return dict.stringToId(subj.toString(), TripleComponentRole.SUBJECT); } long id = hdtval.getHDTId(); return switch (hdtval.getHDTPosition()) { case SimpleIRIHDT.SUBJECT_POS, SimpleIRIHDT.SHARED_POS -> id; case SimpleIRIHDT.PREDICATE_POS, SimpleIRIHDT.GRAPH_POS -> - hdt.getDictionary().stringToId(subj.toString(), TripleComponentRole.SUBJECT); + dict.stringToId(subj.toString(), TripleComponentRole.SUBJECT); case SimpleIRIHDT.OBJECT_POS -> -1; // not shared default -> throw new IllegalArgumentException("Invalid HDT position: " + hdtval.getHDTPosition()); }; @@ -59,7 +85,7 @@ public long predicateToID(IRI pred) { } if (!(pred instanceof HDTValue hdtval && hdtval.getHDTPosition() == SimpleIRIHDT.PREDICATE_POS)) { - return this.hdt.getDictionary().stringToId(pred.toString(), TripleComponentRole.PREDICATE); + return dict.stringToId(pred.toString(), TripleComponentRole.PREDICATE); } return hdtval.getHDTId(); @@ -70,14 +96,14 @@ public long objectToID(Value obj) { return 0; } if (!(obj instanceof HDTValue hdtval)) { - return this.hdt.getDictionary().stringToId(obj.toString(), TripleComponentRole.OBJECT); + return dict.stringToId(obj.toString(), TripleComponentRole.OBJECT); } long id = hdtval.getHDTId(); return switch (hdtval.getHDTPosition()) { case SimpleIRIHDT.OBJECT_POS, SimpleIRIHDT.SHARED_POS -> id; case SimpleIRIHDT.PREDICATE_POS, SimpleIRIHDT.GRAPH_POS -> - hdt.getDictionary().stringToId(obj.toString(), TripleComponentRole.OBJECT); + dict.stringToId(obj.toString(), TripleComponentRole.OBJECT); case SimpleIRIHDT.SUBJECT_POS -> -1; // not shared default -> throw new IllegalArgumentException("Invalid HDT position: " + hdtval.getHDTPosition()); }; @@ -89,14 +115,14 @@ public long contextToID(Resource context) { } if (!(context instanceof HDTValue hdtval && hdtval.getHDTPosition() == SimpleIRIHDT.GRAPH_POS)) { - return this.hdt.getDictionary().stringToId(context.toString(), TripleComponentRole.GRAPH); + return dict.stringToId(context.toString(), TripleComponentRole.GRAPH); } return hdtval.getHDTId(); } public IRI subjectIdToIRI(long id) { - if (id <= this.hdt.getDictionary().getNshared()) { + if (id <= nshared) { return valueFactory.createIRI(HDT_URI + "SO" + id); } else { return valueFactory.createIRI(HDT_URI + "S" + id); @@ -108,7 +134,7 @@ public IRI predicateIdToIRI(long id) { } public IRI objectIdToIRI(long id) { - if (id <= this.hdt.getDictionary().getNshared()) { + if (id <= nshared) { return valueFactory.createIRI(HDT_URI + "SO" + id); } else { return valueFactory.createIRI(HDT_URI + "O" + id); @@ -163,7 +189,7 @@ public Value rdf4jToHdtIDobject(Value object) { } public Resource rdf4jToHdtIDcontext(Resource ctx) { - if (ctx == null || !hdt.getDictionary().supportGraphs()) { + if (ctx == null || !dict.supportGraphs()) { return ctx; } long id = rdf4jContextToHdtID(ctx); @@ -230,15 +256,13 @@ public long rdf4jContextToHdtID(Resource ctx) { } public Value idToValue(TripleComponentRole role, long id) { - Dictionary dict = hdt.getDictionary(); if (dict.supportsNodeTypeOfId()) { RDFNodeType nodeType = dict.nodeTypeOfId(role, id); - boolean shared = id <= dict.getNshared(); + boolean shared = id <= nshared; return switch (nodeType) { - case IRI -> - new SimpleIRIHDT(endpoint.getHdt(), SimpleIRIHDT.getPos(role.asDictionarySectionRole(shared)), id); + case IRI -> new SimpleIRIHDT(dict, SimpleIRIHDT.getPos(role.asDictionarySectionRole(shared)), id); case BLANK_NODE -> new SimpleBNodeHDT(hdt, SimpleIRIHDT.getPos(role.asDictionarySectionRole(shared)), id); - case LITERAL -> new SimpleLiteralHDT(endpoint.getHdt(), id, valueFactory); + case LITERAL -> new SimpleLiteralHDT(hdt, id, valueFactory); }; } return switch (role) { @@ -250,58 +274,100 @@ public Value idToValue(TripleComponentRole role, long id) { } public Resource idToSubjectHDTResource(long subjectID) { - return (Resource) idToValue(TripleComponentRole.SUBJECT, subjectID); - } - private Resource idToSubjectHDTResource0(long subjectID) { - if ((subjectID >= endpoint.getHdtProps().getStartBlankShared() - && subjectID <= endpoint.getHdtProps().getEndBlankShared()) - || (subjectID >= endpoint.getHdtProps().getStartBlankSubjects() - && subjectID <= endpoint.getHdtProps().getEndBlankSubjects())) { - if (subjectID <= hdt.getDictionary().getNshared()) { - return new SimpleBNodeHDT(hdt, SimpleIRIHDT.SHARED_POS, subjectID); + if (dict.supportsNodeTypeOfId()) { + RDFNodeType nodeType = dict.nodeTypeOfId(TripleComponentRole.SUBJECT, subjectID); + if (Objects.requireNonNull(nodeType) == RDFNodeType.IRI) { + return new SimpleIRIHDT(dict, + SimpleIRIHDT.getPos(TripleComponentRole.SUBJECT.asDictionarySectionRole(subjectID <= nshared)), + subjectID); + } else if (nodeType == RDFNodeType.BLANK_NODE) { + return new SimpleBNodeHDT(hdt, + SimpleIRIHDT.getPos(TripleComponentRole.SUBJECT.asDictionarySectionRole(subjectID <= nshared)), + subjectID); } else { - return new SimpleBNodeHDT(hdt, SimpleIRIHDT.SUBJECT_POS, subjectID); + throw new IllegalArgumentException(); } } else { - if (subjectID <= hdt.getDictionary().getNshared()) { - return new SimpleIRIHDT(hdt, SimpleIRIHDT.SHARED_POS, subjectID); - } else { - return new SimpleIRIHDT(hdt, SimpleIRIHDT.SUBJECT_POS, subjectID); - } + return idToSubjectHDTResource0(subjectID); + } + } + + private Resource idToSubjectHDTResource0(long subjectID) { + + if ((subjectID >= startBlankShared && subjectID <= endBlankShared) + || (subjectID >= startBlankSubjects && subjectID <= endBlankSubjects)) { + return getSimpleBNodeHDT(subjectID); + } else { + return getSimpleIRIHDT(subjectID); + } + } + + private @NotNull SimpleIRIHDT getSimpleIRIHDT(long subjectID) { + if (subjectID <= nshared) { + return new SimpleIRIHDT(dict, SimpleIRIHDT.SHARED_POS, subjectID); + } else { + return new SimpleIRIHDT(dict, SimpleIRIHDT.SUBJECT_POS, subjectID); + } + } + + private @NotNull SimpleBNodeHDT getSimpleBNodeHDT(long subjectID) { + if (subjectID <= nshared) { + return new SimpleBNodeHDT(hdt, SimpleIRIHDT.SHARED_POS, subjectID); + } else { + return new SimpleBNodeHDT(hdt, SimpleIRIHDT.SUBJECT_POS, subjectID); } } public IRI idToPredicateHDTResource(long predicateId) { - return new SimpleIRIHDT(endpoint.getHdt(), SimpleIRIHDT.PREDICATE_POS, predicateId); + return new SimpleIRIHDT(dict, SimpleIRIHDT.PREDICATE_POS, predicateId); } public Value idToObjectHDTResource(long objectID) { - return idToValue(TripleComponentRole.OBJECT, objectID); + if (dict.supportsNodeTypeOfId()) { + RDFNodeType nodeType = dict.nodeTypeOfId(TripleComponentRole.OBJECT, objectID); + boolean shared = objectID <= nshared; + return switch (nodeType) { + case IRI -> new SimpleIRIHDT(dict, + SimpleIRIHDT.getPos(TripleComponentRole.OBJECT.asDictionarySectionRole(shared)), objectID); + case BLANK_NODE -> new SimpleBNodeHDT(hdt, + SimpleIRIHDT.getPos(TripleComponentRole.OBJECT.asDictionarySectionRole(shared)), objectID); + case LITERAL -> new SimpleLiteralHDT(hdt, objectID, valueFactory); + }; + } + return idToObjectHDTResource0(objectID); } private Value idToObjectHDTResource0(long objectID) { - if (objectID >= endpoint.getHdtProps().getStartLiteral() - && objectID <= endpoint.getHdtProps().getEndLiteral()) { - return new SimpleLiteralHDT(endpoint.getHdt(), objectID, valueFactory); - } else if ((objectID >= endpoint.getHdtProps().getStartBlankObjects() - && objectID <= endpoint.getHdtProps().getEndBlankObjects()) - || (objectID >= endpoint.getHdtProps().getStartBlankShared() - && objectID <= endpoint.getHdtProps().getEndBlankShared())) { - if (objectID <= hdt.getDictionary().getNshared()) { - return new SimpleBNodeHDT(hdt, SimpleIRIHDT.SHARED_POS, objectID); - } else { - return new SimpleBNodeHDT(hdt, SimpleIRIHDT.OBJECT_POS, objectID); - } + + if (objectID >= startLiteral && objectID <= endLiteral) { + return new SimpleLiteralHDT(hdt, objectID, valueFactory); } else { - if (objectID <= endpoint.getHdt().getDictionary().getNshared()) { - return new SimpleIRIHDT(endpoint.getHdt(), SimpleIRIHDT.SHARED_POS, objectID); + if ((objectID >= startBlankObjects && objectID <= endBlankObjects) + || (objectID >= startBlankShared && objectID <= endBlankShared)) { + return getbNodeHDT(objectID); } else { - return new SimpleIRIHDT(endpoint.getHdt(), SimpleIRIHDT.OBJECT_POS, objectID); + return getIrihdt(objectID); } } } + private @NotNull SimpleIRIHDT getIrihdt(long objectID) { + if (objectID <= nshared) { + return new SimpleIRIHDT(dict, SimpleIRIHDT.SHARED_POS, objectID); + } else { + return new SimpleIRIHDT(dict, SimpleIRIHDT.OBJECT_POS, objectID); + } + } + + private @NotNull SimpleBNodeHDT getbNodeHDT(long objectID) { + if (objectID <= nshared) { + return new SimpleBNodeHDT(hdt, SimpleIRIHDT.SHARED_POS, objectID); + } else { + return new SimpleBNodeHDT(hdt, SimpleIRIHDT.OBJECT_POS, objectID); + } + } + public Resource idToGraphHDTResource(long graphID) { if (graphID == endpoint.getHdtProps().getDefaultGraph()) { return null; @@ -310,7 +376,7 @@ public Resource idToGraphHDTResource(long graphID) { && graphID <= endpoint.getHdtProps().getEndBlankGraph())) { return new SimpleBNodeHDT(hdt, SimpleIRIHDT.GRAPH_POS, graphID); } - return new SimpleIRIHDT(hdt, SimpleIRIHDT.GRAPH_POS, graphID); + return new SimpleIRIHDT(dict, SimpleIRIHDT.GRAPH_POS, graphID); } public Resource subjectHdtResourceToResource(Resource subject) { @@ -362,7 +428,7 @@ public Statement rdf4ToHdt(Statement statement) { Resource s = rdf4jToHdtIDsubject(statement.getSubject()); IRI p = rdf4jToHdtIDpredicate(statement.getPredicate()); Value o = rdf4jToHdtIDobject(statement.getObject()); - if (hdt.getDictionary().supportGraphs()) { + if (dict.supportGraphs()) { Resource g = rdf4jToHdtIDcontext(statement.getContext()); if (s == statement.getSubject() && p == statement.getPredicate() && o == statement.getObject() && g == statement.getContext()) { @@ -395,20 +461,20 @@ public Value convertValue(Value value) { return null; } String iriString = value.toString(); - long id = hdt.getDictionary().stringToId(iriString, TripleComponentRole.SUBJECT); + long id = dict.stringToId(iriString, TripleComponentRole.SUBJECT); int position; if (id != -1) { - if (id <= hdt.getDictionary().getNshared()) { + if (id <= nshared) { position = SimpleIRIHDT.SHARED_POS; } else { position = SimpleIRIHDT.SUBJECT_POS; } } else { - id = hdt.getDictionary().stringToId(iriString, TripleComponentRole.OBJECT); + id = dict.stringToId(iriString, TripleComponentRole.OBJECT); if (id != -1) { position = SimpleIRIHDT.OBJECT_POS; } else { - id = hdt.getDictionary().stringToId(iriString, TripleComponentRole.PREDICATE); + id = dict.stringToId(iriString, TripleComponentRole.PREDICATE); position = SimpleIRIHDT.PREDICATE_POS; } } diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/README.md b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/README.md new file mode 100644 index 000000000..20efa50f0 --- /dev/null +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/README.md @@ -0,0 +1,39 @@ +### 32 GB of memory, no caching + +``` +Benchmark (order) Mode Cnt Score Error Units +WikiDataDifferentIndexesBenchmark.testCount POS avgt 3 70.014 ± 3.764 ms/op +WikiDataDifferentIndexesBenchmark.testCount OSP avgt 3 63.579 ± 10.304 ms/op +WikiDataDifferentIndexesBenchmark.testCount PSO avgt 3 63.280 ± 3.144 ms/op +WikiDataDifferentIndexesBenchmark.testCount SOP avgt 3 63.525 ± 11.067 ms/op +WikiDataDifferentIndexesBenchmark.testCount OPS avgt 3 59.152 ± 3.904 ms/op +WikiDataDifferentIndexesBenchmark.testCount Unknown avgt 3 63.358 ± 8.699 ms/op +``` +Unknown is the "default order". + +### 4 GB of memory, no caching +``` +Benchmark (order) Mode Cnt Score Error Units +WikiDataDifferentIndexesBenchmark.testCount POS avgt 3 161.099 ± 137.932 ms/op +WikiDataDifferentIndexesBenchmark.testCount OSP avgt 3 142.316 ± 6.770 ms/op +WikiDataDifferentIndexesBenchmark.testCount PSO avgt 3 143.392 ± 51.168 ms/op +WikiDataDifferentIndexesBenchmark.testCount SOP avgt 3 94.574 ± 14.587 ms/op +WikiDataDifferentIndexesBenchmark.testCount OPS avgt 3 121.756 ± 59.288 ms/op +WikiDataDifferentIndexesBenchmark.testCount Unknown avgt 3 79.557 ± 21.136 ms/op +``` +Unknown is the "default order". + + +### 4 GB of memory, no caching, longer benchmark run (fork=3, warmup=10, iterations=10) +``` +Benchmark (order) Mode Cnt Score Error Units +WikiDataDifferentIndexesBenchmark.testCount POS avgt 30 161.733 ± 6.663 ms/op +WikiDataDifferentIndexesBenchmark.testCount OSP avgt 30 159.620 ± 7.252 ms/op +WikiDataDifferentIndexesBenchmark.testCount PSO avgt 30 147.293 ± 8.981 ms/op +WikiDataDifferentIndexesBenchmark.testCount SOP avgt 30 104.620 ± 19.548 ms/op +WikiDataDifferentIndexesBenchmark.testCount OPS avgt 30 130.582 ± 16.454 ms/op +WikiDataDifferentIndexesBenchmark.testCount Unknown avgt 30 77.321 ± 1.669 ms/op +``` +Unknown is the "default order". + + diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/README2.md b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/README2.md new file mode 100644 index 000000000..5d64e513e --- /dev/null +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/README2.md @@ -0,0 +1,51 @@ +# Before + +### @Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+AlwaysPreTouch" }) +``` +Benchmark Mode Cnt Score Error Units +WikiDataBenchmark.testCountSimpleJoin avgt 10 1466.804 ± 10.966 ms/op +``` + +### @Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+AlwaysPreTouch" , "-XX:+UseSerialGC"}) +``` +Benchmark Mode Cnt Score Error Units +WikiDataBenchmark.testCountSimpleJoin avgt 10 3315.039 ± 135.082 ms/op +``` + +### @Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+AlwaysPreTouch" }) +``` +Benchmark Mode Cnt Score Error Units +WikiDataBenchmark.testCountSimpleJoin avgt 10 1319.262 ± 25.227 ms/op +``` + + + +# After + +### @Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+AlwaysPreTouch" }) +``` +Benchmark Mode Cnt Score Error Units +WikiDataBenchmark.testCountSimpleJoin avgt 10 1220.977 ± 10.316 ms/op +WikiDataBenchmark.testCountSimpleJoin2 avgt 10 1877.909 ± 22.241 ms/op +``` + +### @Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+AlwaysPreTouch" , "-XX:+UseSerialGC"}) +``` +Benchmark Mode Cnt Score Error Units +WikiDataBenchmark.testCountSimpleJoin avgt 10 2502.995 ± 89.774 ms/op +WikiDataBenchmark.testCountSimpleJoin2 avgt 10 3568.581 ± 665.337 ms/op +``` + +### @Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+AlwaysPreTouch" }) +``` +Benchmark Mode Cnt Score Error Units +WikiDataBenchmark.testCountSimpleJoin avgt 10 958.359 ± 15.146 ms/op +WikiDataBenchmark.testCountSimpleJoin2 avgt 10 1764.693 ± 25.449 ms/op +``` + +### GraalVM - @Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+AlwaysPreTouch" }) +``` +Benchmark Mode Cnt Score Error Units +WikiDataBenchmark.testCountSimpleJoin avgt 10 823.190 ± 11.428 ms/op +WikiDataBenchmark.testCountSimpleJoin2 avgt 10 1231.812 ± 14.117 ms/op +``` diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataBenchmark.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataBenchmark.java new file mode 100644 index 000000000..32b5b3a29 --- /dev/null +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataBenchmark.java @@ -0,0 +1,228 @@ +package com.the_qa_company.qendpoint.benchmark; + +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.store.EndpointFiles; +import com.the_qa_company.qendpoint.store.EndpointStore; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.util.Values; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.query.algebra.evaluation.iterator.InnerMergeJoinIterator; +import org.eclipse.rdf4j.repository.RepositoryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Warmup(iterations = 0) +@BenchmarkMode({ Mode.AverageTime }) +//@Fork(value = 1, jvmArgs = { "-Xms96G", "-Xmx96G", "-XX:+UnlockExperimentalVMOptions","-XX:+UseEpsilonGC", "-XX:+AlwaysPreTouch" }) +@Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+AlwaysPreTouch" }) +//@Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+AlwaysPreTouch" }) +//@Fork(value = 3, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+AlwaysPreTouch", "-XX:+PrintCompilation","-XX:+UnlockDiagnosticVMOptions","-XX:+PrintInlining" }) +//@Fork(value = 3, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+EnableDynamicAgentLoading", "-XX:+AlwaysPreTouch", "-XX:+UnlockDiagnosticVMOptions", "-XX:+DebugNonSafepoints", "-XX:FlightRecorderOptions=stackdepth=2048" }) +//@Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+EnableDynamicAgentLoading", "-XX:+AlwaysPreTouch","-XX:StartFlightRecording=delay=15s,dumponexit=true,filename=recording.jfr,method-profiling=max","-XX:FlightRecorderOptions=stackdepth=2048", "-XX:+UnlockDiagnosticVMOptions", "-XX:+DebugNonSafepoints" }) +//@Measurement(iterations = 1, time = 3, timeUnit = TimeUnit.MINUTES) +@Measurement(iterations = 10) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class WikiDataBenchmark { + + private SailRepository endpointStore; + + @Setup(Level.Trial) + public void setUp() throws IOException { + + Path dir = Path.of(System.getProperty("user.dir") + "/wdbench-indexes/"); + System.out.println("Loading from: " + dir); + +// store options + HDTOptions options = HDTOptions.of( + // disable the default index (to use the custom indexes) + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + // set the custom indexes we want + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "sop,ops,osp,pso,pos"); + + EndpointStore store = new EndpointStore(new EndpointFiles(dir, "wdbench.hdt"), options); + store.init(); + + endpointStore = new SailRepository(store); + + } + + @TearDown(Level.Trial) + public void tearDown() { + if (endpointStore != null) { + endpointStore.shutDown(); + } + endpointStore = null; + } + + public static void main(String[] args) throws IOException { + Path dir = Path.of("/Users/havardottestad/Documents/Programming/qEndpoint2/qendpoint-store/wdbench-indexes"); + System.out.println("Loading from: " + dir); + + WikiDataBenchmark wikiDataBypassRDF4JBenchmark = new WikiDataBenchmark(); + HDTOptions options = HDTOptions.of( + // disable the default index (to use the custom indexes) + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + // set the custom indexes we want + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "sop,ops,osp,pso,pos"); + + wikiDataBypassRDF4JBenchmark.endpointStore = new SailRepository( + new EndpointStore(new EndpointFiles(dir, "wdbench.hdt"), options)); + wikiDataBypassRDF4JBenchmark.endpointStore.init(); + + wikiDataBypassRDF4JBenchmark.testCountSimpleJoin2(); + wikiDataBypassRDF4JBenchmark.tearDown(); + } + + @Benchmark + public long testCountSimpleJoin() { + try (SailRepositoryConnection connection = endpointStore.getConnection()) { + + String query = """ + PREFIX rdfs: + PREFIX wd: + PREFIX wdt: + SELECT (COUNT(?profession_id) AS ?count) WHERE { + ?person_id wdt:P31 wd:Q5 . + ?person_id wdt:P106 ?profession_id . + } + """; + + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query).evaluate()) { + long i = 0; + while (evaluate.hasNext()) { + i++; + BindingSet next = evaluate.next(); + System.out.println(next); + if (!next.toString().equals("[count=\"8501245\"^^]")) { + throw new IllegalStateException("Unexpected result: " + next); + } + } + return i; + } + + } + } + + @Benchmark + public long testCountSimpleJoin2() { + + try (SailRepositoryConnection connection = endpointStore.getConnection()) { + + String query = """ + PREFIX wd: + PREFIX wdt: + SELECT (count(?s) as ?c) WHERE { + ?s wdt:P106 ?o . + ?s wdt:P31 wd:Q5 . + ?s wdt:P21 ?sex + } + """; + + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query).evaluate()) { + long i = 0; + while (evaluate.hasNext()) { + i++; + BindingSet next = evaluate.next(); + System.out.println(next); + if (!next.toString().equals("[c=\"7011884\"^^]")) { + throw new IllegalStateException("Unexpected result: " + next); + } + } + return i; + } + + } + } + + @Benchmark + public long testCount() { + try (SailRepositoryConnection connection = endpointStore.getConnection()) { + + String query = """ + PREFIX rdfs: + PREFIX wd: + PREFIX wdt: + SELECT (COUNT(?profession_id) AS ?count) WHERE { + ?person_id wdt:P106 ?profession_id . + } + """; + + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query).evaluate()) { + long i = 0; + while (evaluate.hasNext()) { + i++; + BindingSet next = evaluate.next(); + System.out.println(next); + } + return i; + } + + } + } + + @Benchmark + public long testCountWithoutCountInQuery() { + try (SailRepositoryConnection connection = endpointStore.getConnection()) { + + String query = """ + PREFIX rdfs: + PREFIX wd: + PREFIX wdt: + SELECT ?profession_id WHERE { + ?person_id wdt:P106 ?profession_id . + } + """; + + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query).evaluate()) { + long i = 0; + while (evaluate.hasNext()) { + i++; + evaluate.next(); + } + return i; + } + + } + } + + @Benchmark + public long testCountGetStatements() { + try (SailRepositoryConnection connection = endpointStore.getConnection()) { + + try (RepositoryResult statements = connection.getStatements(null, + Values.iri("http://www.wikidata.org/prop/direct/P106"), null, false)) { + long i = 0; + while (statements.hasNext()) { + i++; + statements.next(); + } + return i; + } + + } + } + +} diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataBypassRDF4JBenchmark.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataBypassRDF4JBenchmark.java new file mode 100644 index 000000000..ee6266fff --- /dev/null +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataBypassRDF4JBenchmark.java @@ -0,0 +1,361 @@ +package com.the_qa_company.qendpoint.benchmark; + +import com.the_qa_company.qendpoint.core.dictionary.Dictionary; +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.hdt.HDT; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.IteratorTripleID; +import com.the_qa_company.qendpoint.core.triples.TripleID; +import com.the_qa_company.qendpoint.model.SimpleIRIHDT; +import com.the_qa_company.qendpoint.store.EndpointFiles; +import com.the_qa_company.qendpoint.store.EndpointStore; +import com.the_qa_company.qendpoint.store.EndpointStoreTripleIterator; +import com.the_qa_company.qendpoint.core.enums.TripleComponentRole; +import com.the_qa_company.qendpoint.store.HDTConverter; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.GenericStatement; +import org.eclipse.rdf4j.model.util.Values; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.RepositoryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayDeque; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +//failed to inline +@State(Scope.Benchmark) +@Warmup(iterations = 0) +@BenchmarkMode({ Mode.AverageTime }) +//@Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+EnableDynamicAgentLoading", "-XX:+AlwaysPreTouch", "-XX:+UnlockDiagnosticVMOptions", "-XX:+DebugNonSafepoints", "-XX:FlightRecorderOptions=stackdepth=2048" }) +//@Fork(value = 1, jvmArgs = {"-Xms4G", "-Xmx4G", "-XX:+EnableDynamicAgentLoading"}) +//@Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+EnableDynamicAgentLoading" , "-XX:-Inline"}) +//@Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+AlwaysPreTouch", "-XX:+PrintCompilation","-XX:+UnlockDiagnosticVMOptions","-XX:+PrintInlining" }) +//@Fork(value = 1, jvmArgs = { "-Xms96G", "-Xmx96G", "-XX:+UnlockExperimentalVMOptions","-XX:+UseEpsilonGC", "-XX:+AlwaysPreTouch" }) +//@Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:StartFlightRecording:delay=15s,duration=600s,filename=recording.jfr,settings=profile,method-profiling=max", "-XX:FlightRecorderOptions:stackdepth=2048,globalbuffersize=1024M", "-XX:+UnlockDiagnosticVMOptions", "-XX:+DebugNonSafepoints"}) +@Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", + "-XX:StartFlightRecording:dumponexit=true,filename=recording.jfr,settings=profile,method-profiling=max", + "-XX:FlightRecorderOptions:stackdepth=2048,globalbuffersize=1024M", "-XX:+UnlockDiagnosticVMOptions", + "-XX:+DebugNonSafepoints" }) +@Measurement(iterations = 10) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class WikiDataBypassRDF4JBenchmark { + + private EndpointStore endpointStore; + + private ArrayDeque objects; + + private static long counter = 0; + + @Setup(Level.Trial) + public void setUp() throws IOException { + + Path dir = Path.of(System.getProperty("user.dir") + "/wdbench-indexes/"); + System.out.println("Loading from: " + dir); + +// store options + HDTOptions options = HDTOptions.of( + // disable the default index (to use the custom indexes) + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + // set the custom indexes we want + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "sop,ops,osp,pso,pos"); + + endpointStore = new EndpointStore(new EndpointFiles(dir, "wdbench.hdt"), options); + endpointStore.init(); + + { + + HDT hdt = endpointStore.getHdt(); + + final String wdt = "http://www.wikidata.org/prop/direct/"; + final String wd = "http://www.wikidata.org/entity/"; + + Dictionary d = hdt.getDictionary(); + long p31 = d.stringToId(wdt + "P31", TripleComponentRole.PREDICATE); + long p131 = d.stringToId(wdt + "P131", TripleComponentRole.PREDICATE); + // human + long q5 = d.stringToId(wd + "Q5", TripleComponentRole.OBJECT); + // museum + long q33506 = d.stringToId(wd + "Q33506", TripleComponentRole.OBJECT); + // roma + long q220 = d.stringToId(wd + "Q220", TripleComponentRole.OBJECT); + // roma + long p106 = d.stringToId(wdt + "P106", TripleComponentRole.PREDICATE); + + IteratorTripleID it = hdt.getTriples().search(new TripleID(0, p106, 0)); + + TripleComponentOrder order = it.getOrder(); +// System.out.println(order); + + long count = 0; + + HDTConverter hdtConverter = new HDTConverter(endpointStore); + objects = new ArrayDeque<>(1024); + while (it.hasNext()) { + + objects.addLast(it.next()); + } + + } + + } + + @Setup(Level.Invocation) + public void setupInvocation() { + HDT hdt = endpointStore.getHdt(); + IteratorTripleID it = hdt.getTriples().search(new TripleID(0, 0, 0)); + while (it.hasNext()) { + it.next(); + } + } + + @TearDown(Level.Trial) + public void tearDown() { + if (endpointStore != null) { + endpointStore.shutDown(); + } + endpointStore = null; + System.out.println(counter); + } + +// public static void main(String[] args) throws IOException { +// Path dir = Path.of("/Users/havardottestad/Documents/Programming/qEndpoint2/qendpoint-store/wdbench-indexes"); +// System.out.println("Loading from: " + dir); +// +// WikiDataBypassRDF4JBenchmark wikiDataBypassRDF4JBenchmark = new WikiDataBypassRDF4JBenchmark(); +// HDTOptions options = HDTOptions.of( +// // disable the default index (to use the custom indexes) +// HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, +// // set the custom indexes we want +// HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "sop,ops,osp,pso,pos"); +// +// wikiDataBypassRDF4JBenchmark.endpointStore = new EndpointStore(new EndpointFiles(dir, "wdbench.hdt"), options); +// wikiDataBypassRDF4JBenchmark.endpointStore.init(); +// +// +// wikiDataBypassRDF4JBenchmark.testCountGetStatements(); +// wikiDataBypassRDF4JBenchmark.tearDown(); +// } + + @Benchmark + public long testCountDirect(Blackhole blackhole) throws IOException { + + HDT hdt = endpointStore.getHdt(); + + final String wdt = "http://www.wikidata.org/prop/direct/"; + final String wd = "http://www.wikidata.org/entity/"; + + Dictionary d = hdt.getDictionary(); + long p31 = d.stringToId(wdt + "P31", TripleComponentRole.PREDICATE); + long p131 = d.stringToId(wdt + "P131", TripleComponentRole.PREDICATE); + // human + long q5 = d.stringToId(wd + "Q5", TripleComponentRole.OBJECT); + // museum + long q33506 = d.stringToId(wd + "Q33506", TripleComponentRole.OBJECT); + // roma + long q220 = d.stringToId(wd + "Q220", TripleComponentRole.OBJECT); + // roma + long p106 = d.stringToId(wdt + "P106", TripleComponentRole.PREDICATE); + + IteratorTripleID it = hdt.getTriples().search(new TripleID(0, p106, 0)); + + TripleComponentOrder order = it.getOrder(); +// System.out.println(order); + + long count = 0; + + HDTConverter hdtConverter = new HDTConverter(endpointStore); + + while (it.hasNext()) { + TripleID tid = it.next(); + blackhole.consume(tid); + + Resource resource = hdtConverter.idToSubjectHDTResource(tid.getSubject()); + IRI iri = hdtConverter.idToPredicateHDTResource(tid.getPredicate()); + Value value = hdtConverter.idToObjectHDTResource(tid.getObject()); + GenericStatement resourceIRIValueGenericStatement = new GenericStatement<>(resource, + iri, value, null); + blackhole.consume(resourceIRIValueGenericStatement); + +// ArrayDeque objects = new ArrayDeque<>(1024); +// +// for (int i = 0; i < 1024 && it.hasNext(); i++) { +// objects.addLast(it.next()); +// } +// +// +// for (TripleID tid : objects) { +// Resource resource = hdtConverter.idToSubjectHDTResource(tid.getSubject()); +// IRI iri = hdtConverter.idToPredicateHDTResource(tid.getPredicate()); +// Value value = hdtConverter.idToObjectHDTResource(tid.getObject()); +// GenericStatement resourceIRIValueGenericStatement = new GenericStatement<>(resource, iri, value, null); +// blackhole.consume(resourceIRIValueGenericStatement); +// } + + count++; + } + +// System.out.println(count); + + return count; + + } + + @Benchmark + public long testOverhead(Blackhole blackhole) throws IOException { + + HDTConverter hdtConverter = new HDTConverter(endpointStore); + + long count = 1; + + for (TripleID tid : objects) { + Resource resource = hdtConverter.idToSubjectHDTResource(tid.getSubject()); + IRI iri = hdtConverter.idToPredicateHDTResource(tid.getPredicate()); + Value value = hdtConverter.idToObjectHDTResource(tid.getObject()); + + if (resource instanceof SimpleIRIHDT) { + counter++; + } + + if (iri instanceof SimpleIRIHDT) { + counter++; + } + + if (value instanceof SimpleIRIHDT) { + counter++; + } + + blackhole.consume(resource); + blackhole.consume(iri); + blackhole.consume(value); +// MyGenericStatement obj = new MyGenericStatement(resource, iri, value, null); +// blackhole.consume(obj); + } + + return count; + + } + + public record MyGenericStatement(Resource getSubject, IRI getPredicate, Value getObject, Resource getContext) + implements Statement { + + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof Statement that)) { + return false; + } else { + return this.getSubject.equals(that.getSubject()) && getPredicate.equals(that.getPredicate()) + && getObject.equals(that.getObject()) && Objects.equals(getContext, that.getContext()); + } + } + + public int hashCode() { + int result = 1; + result = 31 * result + getSubject.hashCode(); + result = 31 * result + getPredicate.hashCode(); + result = 31 * result + getObject.hashCode(); + result = 31 * result + (getContext == null ? 0 : getContext.hashCode()); + return result; + } + + public String toString() { + return "(" + getSubject + ", " + getPredicate + ", " + getObject + ") [" + getContext + "]"; + } + } + + @Benchmark + public long testOriginalOverhead_original(Blackhole blackhole) throws IOException { + + HDTConverter hdtConverter = new HDTConverter(endpointStore); + + long count = 1; + + for (TripleID tid : objects) { + Resource resource = hdtConverter.idToSubjectHDTResource(tid.getSubject()); + IRI iri = hdtConverter.idToPredicateHDTResource(tid.getPredicate()); + Value value = hdtConverter.idToObjectHDTResource(tid.getObject()); + + blackhole.consume(resource); + blackhole.consume(iri); + blackhole.consume(value); + blackhole.consume(new GenericStatement<>(resource, iri, value, null)); + } + + return count; + + } + + @Benchmark + public long testCountSPARQL() { + EndpointStoreTripleIterator.cache = false; + SailRepository sailRepository = new SailRepository(endpointStore); + try (SailRepositoryConnection connection = sailRepository.getConnection()) { + + String query = """ + PREFIX rdfs: + PREFIX wd: + PREFIX wdt: + SELECT (COUNT(?profession_id) AS ?count) WHERE { + ?person_id wdt:P106 ?profession_id . + } + """; + + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query).evaluate()) { + long i = 0; + while (evaluate.hasNext()) { + i++; + BindingSet next = evaluate.next(); + System.out.println(next); + } + return i; + } + + } + } + + @Benchmark + public long testCountGetStatements(Blackhole blackhole) { + EndpointStoreTripleIterator.cache = false; + SailRepository sailRepository = new SailRepository(endpointStore); + + try (SailRepositoryConnection connection = sailRepository.getConnection()) { + + try (RepositoryResult statements = connection.getStatements(null, + Values.iri("http://www.wikidata.org/prop/direct/P106"), null, false)) { + long i = 0; + while (statements.hasNext()) { + i++; + Statement next = statements.next(); + blackhole.consume(next); + } + System.out.println(i); + return i; + } + + } + } + +} diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataDifferentIndexesBenchmark.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataDifferentIndexesBenchmark.java new file mode 100644 index 000000000..9eaa6fcf1 --- /dev/null +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/benchmark/WikiDataDifferentIndexesBenchmark.java @@ -0,0 +1,121 @@ +package com.the_qa_company.qendpoint.benchmark; + +import com.the_qa_company.qendpoint.core.enums.TripleComponentOrder; +import com.the_qa_company.qendpoint.core.options.HDTOptions; +import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys; +import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples; +import com.the_qa_company.qendpoint.store.EndpointFiles; +import com.the_qa_company.qendpoint.store.EndpointStore; +import com.the_qa_company.qendpoint.store.EndpointStoreTripleIterator; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.sail.SailRepository; +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +@Warmup(iterations = 10) +@BenchmarkMode({ Mode.AverageTime }) +@Fork(value = 3, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:+EnableDynamicAgentLoading" }) +//@Fork(value = 1, jvmArgs = { "-Xms32G", "-Xmx32G", "-XX:+EnableDynamicAgentLoading" }) +//@Fork(value = 1, jvmArgs = { "-Xms96G", "-Xmx96G", "-XX:+UnlockExperimentalVMOptions","-XX:+UseEpsilonGC", "-XX:+AlwaysPreTouch" }) +//@Fork(value = 1, jvmArgs = { "-Xms4G", "-Xmx4G", "-XX:StartFlightRecording=delay=15s,duration=120s,filename=recording.jfr,settings=profile", "-XX:FlightRecorderOptions=samplethreads=true,stackdepth=2048", "-XX:+UnlockDiagnosticVMOptions", "-XX:+DebugNonSafepoints"}) +@Measurement(iterations = 10) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class WikiDataDifferentIndexesBenchmark { + + private SailRepository endpointStore; + + @State(Scope.Benchmark) + public static class MyState { + @Param({ "POS", "OSP", "PSO", "SOP", "OPS", "Unknown" }) + public TripleComponentOrder order; + } + + @Setup(Level.Trial) + public void setUp() throws IOException { + + Path dir = Path.of(System.getProperty("user.dir") + "/wdbench-indexes/"); + System.out.println("Loading from: " + dir); + +// store options + HDTOptions options = HDTOptions.of( + // disable the default index (to use the custom indexes) + HDTOptionsKeys.BITMAPTRIPLES_INDEX_NO_FOQ, true, + // set the custom indexes we want + HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, "sop,ops,osp,pso,pos"); + + EndpointStore store = new EndpointStore(new EndpointFiles(dir, "wdbench.hdt"), options); + store.init(); + + endpointStore = new SailRepository(store); + + } + + @TearDown(Level.Trial) + public void tearDown() { + if (endpointStore != null) { + endpointStore.shutDown(); + } + endpointStore = null; + BitmapTriples.useDefaultOrder = true; + TripleComponentOrder.preference = null; + EndpointStoreTripleIterator.cache = true; + } + + @Benchmark + public long testCount(MyState state, Blackhole blackhole) { + EndpointStoreTripleIterator.cache = false; + + if (state.order == TripleComponentOrder.Unknown) { + BitmapTriples.useDefaultOrder = true; + TripleComponentOrder.preference = null; + } else { + BitmapTriples.useDefaultOrder = false; + TripleComponentOrder.preference = state.order; + } + + try (SailRepositoryConnection connection = endpointStore.getConnection()) { + + String query = """ + PREFIX rdfs: + PREFIX wd: + PREFIX wdt: + SELECT (COUNT(*) AS ?count) WHERE { + { + SELECT * WHERE{ + ?person_id ?p ?profession_id . + } limit 1000000 + } + } + """; + + try (TupleQueryResult evaluate = connection.prepareTupleQuery(query).evaluate()) { + long i = 0; + while (evaluate.hasNext()) { + blackhole.consume(evaluate.next()); + i++; + } + return i; + } + + } + } + +} diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/model/IRITest.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/model/IRITest.java index ca7ef9ff4..e82b49910 100644 --- a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/model/IRITest.java +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/model/IRITest.java @@ -30,17 +30,17 @@ public void setUp() { HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); } - @Test - public void equality() throws IOException { - ValueFactory factory = SimpleValueFactory.getInstance(); - HDT hdt = Utility.createTempHdtIndex(tempDir, false, false, spec); - SimpleIRIHDT s1 = new SimpleIRIHDT(hdt, "http://s1"); - IRI s2 = factory.createIRI("http://s1"); - - Assert.assertEquals(s1.hashCode(), s2.hashCode()); - Assert.assertEquals(s1.getLocalName(), s2.getLocalName()); - Assert.assertEquals(s1.getNamespace(), s2.getNamespace()); - Assert.assertEquals(s1.isIRI(), s2.isIRI()); - } +// @Test +// public void equality() throws IOException { +// ValueFactory factory = SimpleValueFactory.getInstance(); +// HDT hdt = Utility.createTempHdtIndex(tempDir, false, false, spec); +// SimpleIRIHDT s1 = new SimpleIRIHDT(hdt.getDictionary(), "http://s1"); +// IRI s2 = factory.createIRI("http://s1"); +// +// Assert.assertEquals(s1.hashCode(), s2.hashCode()); +// Assert.assertEquals(s1.getLocalName(), s2.getLocalName()); +// Assert.assertEquals(s1.getNamespace(), s2.getNamespace()); +// Assert.assertEquals(s1.isIRI(), s2.isIRI()); +// } } diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/ComplianceTest.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/ComplianceTest.java index 30c832c2b..d2509564d 100644 --- a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/ComplianceTest.java +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/ComplianceTest.java @@ -14,11 +14,10 @@ import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.testsuite.query.parser.sparql.manifest.SPARQL11QueryComplianceTest; import org.eclipse.rdf4j.testsuite.query.parser.sparql.manifest.SPARQL11UpdateComplianceTest; -import org.junit.Rule; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.DynamicTest; import org.junit.jupiter.api.TestFactory; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.io.TempDir; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -33,11 +32,15 @@ import java.util.Collection; import java.util.EnumSet; import java.util.List; +import java.util.UUID; import java.util.stream.Stream; public class ComplianceTest { private static final Logger logger = LoggerFactory.getLogger(ComplianceTest.class); + @TempDir + static Path tempDir; + public static class EndpointMultIndexSPARQL11QueryComplianceTest extends SPARQL11QueryComplianceTest { public EndpointMultIndexSPARQL11QueryComplianceTest() { @@ -69,9 +72,6 @@ public EndpointMultIndexSPARQL11QueryComplianceTest() { this.setIgnoredTests(testToIgnore); } - @Rule - public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); - EndpointStore endpoint; File nativeStore; File hdtStore; @@ -88,15 +88,20 @@ protected void testParameterListener(String displayName, String testURI, String @Override protected Repository newRepository() throws Exception { - nativeStore = tempDir.newFolder(); - hdtStore = tempDir.newFolder(); + Path localTemp = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(localTemp); + + nativeStore = localTemp.resolve("native").toFile(); + Files.createDirectories(nativeStore.toPath()); + hdtStore = localTemp.resolve("hdt").toFile(); + Files.createDirectories(hdtStore.toPath()); HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS, HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, EnumSet.of(TripleComponentOrder.SPO, TripleComponentOrder.OPS, TripleComponentOrder.PSO)); Path fileName = Path.of(hdtStore.getAbsolutePath() + "/" + EndpointStoreTest.HDT_INDEX_NAME); if (this.hdt == null) { - hdt = Utility.createTempHdtIndex(tempDir, true, false, spec); + hdt = Utility.createTempHdtIndex(localTemp, true, false, spec); } assert hdt != null; @@ -179,18 +184,22 @@ public EndpointMultIndexSPARQL11UpdateComplianceTest() { this.setIgnoredTests(testToIgnore); } - @Rule - public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); - @Override protected Repository newRepository() throws Exception { - File nativeStore = tempDir.newFolder(); - File hdtStore = tempDir.newFolder(); + + Path localTemp = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(localTemp); + + File nativeStore = localTemp.resolve("native").toFile(); + Files.createDirectories(nativeStore.toPath()); + File hdtStore = localTemp.resolve("hdt").toFile(); + Files.createDirectories(hdtStore.toPath()); + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS, HDTOptionsKeys.BITMAPTRIPLES_INDEX_OTHERS, EnumSet.of(TripleComponentOrder.SPO, TripleComponentOrder.OPS, TripleComponentOrder.PSO)); Path fileName = Path.of(hdtStore.getAbsolutePath() + "/" + EndpointStoreTest.HDT_INDEX_NAME); - try (HDT hdt = Utility.createTempHdtIndex(tempDir, true, false, spec)) { + try (HDT hdt = Utility.createTempHdtIndex(localTemp, true, false, spec)) { assert hdt != null; hdt.saveToHDT(fileName, null); } @@ -266,22 +275,24 @@ protected void testParameterListener(String displayName, String testURI, String } } - @Rule - public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); - EndpointStore endpoint; File nativeStore; File hdtStore; @Override protected Repository newRepository() throws Exception { - nativeStore = tempDir.newFolder(); - hdtStore = tempDir.newFolder(); + Path localTemp = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(localTemp); + + nativeStore = localTemp.resolve("native").toFile(); + Files.createDirectories(nativeStore.toPath()); + hdtStore = localTemp.resolve("hdt").toFile(); + Files.createDirectories(hdtStore.toPath()); HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); if (this.hdt == null) { - hdt = Utility.createTempHdtIndex(tempDir, true, false, spec); + hdt = Utility.createTempHdtIndex(localTemp, true, false, spec); } assert hdt != null; @@ -342,16 +353,19 @@ public EndpointSPARQL11UpdateComplianceTest() { this.setIgnoredTests(testToIgnore); } - @Rule - public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); - @Override protected Repository newRepository() throws Exception { - File nativeStore = tempDir.newFolder(); - File hdtStore = tempDir.newFolder(); + Path localTemp = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(localTemp); + + File nativeStore = localTemp.resolve("native").toFile(); + Files.createDirectories(nativeStore.toPath()); + File hdtStore = localTemp.resolve("hdt").toFile(); + Files.createDirectories(hdtStore.toPath()); + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS); - try (HDT hdt = Utility.createTempHdtIndex(tempDir, true, false, spec)) { + try (HDT hdt = Utility.createTempHdtIndex(localTemp, true, false, spec)) { assert hdt != null; hdt.saveToHDT(hdtStore.getAbsolutePath() + "/" + EndpointStoreTest.HDT_INDEX_NAME, null); } @@ -380,22 +394,24 @@ protected void testParameterListener(String displayName, String testURI, String } } - @Rule - public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); - EndpointStore endpoint; File nativeStore; File hdtStore; @Override protected Repository newRepository() throws Exception { - nativeStore = tempDir.newFolder(); - hdtStore = tempDir.newFolder(); + Path localTemp = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(localTemp); + + nativeStore = localTemp.resolve("native").toFile(); + Files.createDirectories(nativeStore.toPath()); + hdtStore = localTemp.resolve("hdt").toFile(); + Files.createDirectories(hdtStore.toPath()); HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD); if (this.hdt == null) { - hdt = Utility.createTempHdtIndex(tempDir, true, false, spec); + hdt = Utility.createTempHdtIndex(localTemp, true, false, spec); } assert hdt != null; @@ -440,16 +456,19 @@ private void setUpHDT(Dataset dataset) throws IOException, ParserException, NotF public static class EndpointQuadSPARQL11UpdateComplianceTest extends SPARQL11UpdateComplianceTest { - @Rule - public TemporaryFolder tempDir = TemporaryFolder.builder().assureDeletion().build(); - @Override protected Repository newRepository() throws Exception { - File nativeStore = tempDir.newFolder(); - File hdtStore = tempDir.newFolder(); + Path localTemp = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(localTemp); + + File nativeStore = localTemp.resolve("native").toFile(); + Files.createDirectories(nativeStore.toPath()); + File hdtStore = localTemp.resolve("hdt").toFile(); + Files.createDirectories(hdtStore.toPath()); + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG_QUAD); - try (HDT hdt = Utility.createTempHdtIndex(tempDir, true, false, spec)) { + try (HDT hdt = Utility.createTempHdtIndex(localTemp, true, false, spec)) { assert hdt != null; hdt.saveToHDT(hdtStore.getAbsolutePath() + "/" + EndpointStoreTest.HDT_INDEX_NAME, null); } diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/Utility.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/Utility.java index 2e8052476..3d551ad79 100644 --- a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/Utility.java +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/Utility.java @@ -42,7 +42,10 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; +import java.util.UUID; public class Utility { @@ -61,6 +64,11 @@ public static HDT createTempHdtIndex(TemporaryFolder fileName, boolean empty, bo return createTempHdtIndex(new File(fileName.newFile() + ".nt").getAbsolutePath(), empty, isBig, spec); } + public static HDT createTempHdtIndex(Path path, boolean empty, boolean isBig, HDTOptions spec) throws IOException { + File file = new File(path.toString() + "/" + UUID.randomUUID() + ".nt"); + return createTempHdtIndex(file.getAbsolutePath(), empty, isBig, spec); + } + /** * create a temporary HDT Index * diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceQueryTest.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceQueryTest.java index 5442e38cc..1c65631da 100644 --- a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceQueryTest.java +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceQueryTest.java @@ -9,9 +9,11 @@ import org.eclipse.rdf4j.testsuite.query.parser.sparql.manifest.SPARQL11QueryComplianceTest; import org.junit.jupiter.api.io.TempDir; +import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import java.util.UUID; public class ExperimentalQEndpointSPARQL11ComplianceQueryTest extends SPARQL11QueryComplianceTest { /* @@ -45,9 +47,13 @@ public ExperimentalQEndpointSPARQL11ComplianceQueryTest() { @Override protected Repository newRepository() throws Exception { + + Path resolve = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(resolve); + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG); - ExperimentalQEndpointSail sail = new ExperimentalQEndpointSail(tempDir, spec); + ExperimentalQEndpointSail sail = new ExperimentalQEndpointSail(resolve, spec); if (PRINT_CALLS) { return Utility.convertToDumpRepository(new SailRepository(Utility.convertToDumpSail(sail))); diff --git a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceUpdateTest.java b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceUpdateTest.java index 266aa6a45..793ce1782 100644 --- a/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceUpdateTest.java +++ b/qendpoint-store/src/test/java/com/the_qa_company/qendpoint/store/experimental/ExperimentalQEndpointSPARQL11ComplianceUpdateTest.java @@ -7,141 +7,55 @@ import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.sail.NotifyingSailConnection; import org.eclipse.rdf4j.testsuite.query.parser.sparql.manifest.SPARQL11UpdateComplianceTest; -import org.junit.Ignore; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.io.TempDir; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import java.util.UUID; + +public class ExperimentalQEndpointSPARQL11ComplianceUpdateTest extends SPARQL11UpdateComplianceTest { + /* + * Set this to true to print the call to the store when doing the compliance + * tests + */ + private static final boolean PRINT_CALLS = false; + + /* + * Set this to false to enable the graph tests + */ + private static final boolean DISABLE_GRAPH_TESTS = true; + + @TempDir + public Path tempDir; + + public ExperimentalQEndpointSPARQL11ComplianceUpdateTest() { + + if (DISABLE_GRAPH_TESTS) { + this.setIgnoredTests(new ArrayList<>(List.of("INSERT 03", "INSERT 04", "INSERT USING 01", + "DELETE INSERT 1b", "DELETE INSERT 1c", "INSERT same bnode twice", "CLEAR NAMED", "DROP NAMED", + "DROP GRAPH", "DROP DEFAULT", "CLEAR GRAPH", "CLEAR DEFAULT", "COPY 1", "COPY 3", "COPY 6", + "MOVE 1", "MOVE 3", "MOVE 6", "Graph-specific DELETE DATA 1", "Graph-specific DELETE DATA 2", + "Graph-specific DELETE 1", "Graph-specific DELETE 1 (WITH)", "Graph-specific DELETE 1 (USING)", + "Simple DELETE 1 (USING)", "Simple DELETE 2 (WITH)", "Simple DELETE 4 (WITH)"))); + } + } + + @Override + protected Repository newRepository() throws Exception { + Path resolve = tempDir.resolve(UUID.randomUUID().toString()); + Files.createDirectories(resolve); + + HDTOptions spec = HDTOptions.of(HDTOptionsKeys.DICTIONARY_TYPE_KEY, + HDTOptionsKeys.DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG); + ExperimentalQEndpointSail sail = new ExperimentalQEndpointSail(resolve, spec); + + if (PRINT_CALLS) { + return Utility.convertToDumpRepository(new SailRepository(Utility.convertToDumpSail(sail))); + } + return new SailRepository(sail); + } -@Ignore("later") -public class ExperimentalQEndpointSPARQL11ComplianceUpdateTest { /* - * extends - * SPARQL11UpdateComplianceTest - * { // Set - * this to - * true to - * print the - * call to - * the store - * when - * doing the - * compliance - * tests - * private - * static - * final - * boolean - * PRINT_CALLS - * = false; - * // Set - * this to - * false to - * enable - * the graph - * tests - * private - * static - * final - * boolean - * DISABLE_GRAPH_TESTS - * = true; - * @TempDir - * public - * Path - * tempDir; - * public - * ExperimentalQEndpointSPARQL11ComplianceUpdateTest - * () { if - * (DISABLE_GRAPH_TESTS) - * { this. - * setIgnoredTests - * (new - * ArrayList - * <>(List. - * of("INSERT 03" - * , - * "INSERT 04" - * , - * "INSERT USING 01" - * , - * "DELETE INSERT 1b" - * , - * "DELETE INSERT 1c" - * , - * "INSERT same bnode twice" - * , - * "CLEAR NAMED" - * , - * "DROP NAMED" - * , - * "DROP GRAPH" - * , - * "DROP DEFAULT" - * , - * "CLEAR GRAPH" - * , - * "CLEAR DEFAULT" - * , - * "COPY 1", - * "COPY 3", - * "COPY 6", - * "MOVE 1", - * "MOVE 3", - * "MOVE 6", - * "Graph-specific DELETE DATA 1" - * , - * "Graph-specific DELETE DATA 2" - * , - * "Graph-specific DELETE 1" - * , - * "Graph-specific DELETE 1 (WITH)" - * , - * "Graph-specific DELETE 1 (USING)" - * , - * "Simple DELETE 1 (USING)" - * , - * "Simple DELETE 2 (WITH)" - * , - * "Simple DELETE 4 (WITH)" - * ))); } } - * @Override - * protected - * Repository - * newRepository - * () throws - * Exception - * { - * HDTOptions - * spec = - * HDTOptions - * .of( - * HDTOptionsKeys - * . - * DICTIONARY_TYPE_KEY, - * HDTOptionsKeys - * . - * DICTIONARY_TYPE_VALUE_MULTI_OBJECTS_LANG - * ); - * ExperimentalQEndpointSail - * sail = - * new - * ExperimentalQEndpointSail - * (tempDir, - * spec); if - * (PRINT_CALLS) - * { return - * Utility. - * convertToDumpRepository - * (new - * SailRepository - * (Utility. - * convertToDumpSail - * (sail))); - * } return - * new - * SailRepository - * (sail); } - */ }