Skip to content

Commit e83340d

Browse files
committed
last round of addressing comments
1 parent dfc7c11 commit e83340d

File tree

15 files changed

+121
-81
lines changed

15 files changed

+121
-81
lines changed

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/AccessInfo.java

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
package com.apple.foundationdb.async.hnsw;
2222

2323
import com.apple.foundationdb.linear.RealVector;
24-
import com.google.common.base.Objects;
2524

2625
import javax.annotation.Nonnull;
2726
import javax.annotation.Nullable;
27+
import java.util.Objects;
2828

2929
/**
3030
* Class to capture the current state of this HNSW that cannot be expressed as metadata but that also is not the actual
@@ -46,17 +46,19 @@ class AccessInfo {
4646
private final long rotatorSeed;
4747

4848
/**
49-
* A centroid that is usually derived as an average over some vectors seen so far. It is used to create the
50-
* {@link StorageTransform}.
49+
* The negated centroid that is usually derived as an average over some vectors seen so far. It is used to create
50+
* the {@link StorageTransform}. The centroid is stored in its negated form (i.e. {@code centroid * (-1)}) as the
51+
* {@link com.apple.foundationdb.linear.AffineOperator} adds its translation vector but the centroid needs to be
52+
* subtracted.
5153
*/
5254
@Nullable
53-
private final RealVector centroid;
55+
private final RealVector negatedCentroid;
5456

5557
public AccessInfo(@Nonnull final EntryNodeReference entryNodeReference, final long rotatorSeed,
56-
@Nullable final RealVector centroid) {
58+
@Nullable final RealVector negatedCentroid) {
5759
this.entryNodeReference = entryNodeReference;
5860
this.rotatorSeed = rotatorSeed;
59-
this.centroid = centroid;
61+
this.negatedCentroid = negatedCentroid;
6062
}
6163

6264
@Nonnull
@@ -65,21 +67,21 @@ public EntryNodeReference getEntryNodeReference() {
6567
}
6668

6769
public boolean canUseRaBitQ() {
68-
return getCentroid() != null;
70+
return getNegatedCentroid() != null;
6971
}
7072

7173
public long getRotatorSeed() {
7274
return rotatorSeed;
7375
}
7476

7577
@Nullable
76-
public RealVector getCentroid() {
77-
return centroid;
78+
public RealVector getNegatedCentroid() {
79+
return negatedCentroid;
7880
}
7981

8082
@Nonnull
8183
public AccessInfo withNewEntryNodeReference(@Nonnull final EntryNodeReference entryNodeReference) {
82-
return new AccessInfo(entryNodeReference, getRotatorSeed(), getCentroid());
84+
return new AccessInfo(entryNodeReference, getRotatorSeed(), getNegatedCentroid());
8385
}
8486

8587
@Override
@@ -89,13 +91,13 @@ public boolean equals(final Object o) {
8991
}
9092
final AccessInfo that = (AccessInfo)o;
9193
return rotatorSeed == that.rotatorSeed &&
92-
Objects.equal(entryNodeReference, that.entryNodeReference) &&
93-
Objects.equal(centroid, that.centroid);
94+
Objects.equals(entryNodeReference, that.entryNodeReference) &&
95+
Objects.equals(negatedCentroid, that.negatedCentroid);
9496
}
9597

9698
@Override
9799
public int hashCode() {
98-
return Objects.hashCode(entryNodeReference, rotatorSeed, centroid);
100+
return Objects.hash(entryNodeReference, rotatorSeed, negatedCentroid);
99101
}
100102

101103
@Nonnull
@@ -104,6 +106,6 @@ public String toString() {
104106
return "AccessInfo[" +
105107
"entryNodeReference=" + entryNodeReference +
106108
", rotatorSeed=" + rotatorSeed +
107-
", centroid=" + centroid + "]";
109+
", centroid=" + negatedCentroid + "]";
108110
}
109111
}

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/AggregatedVector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222

2323
import com.apple.foundationdb.linear.RealVector;
2424
import com.apple.foundationdb.linear.Transformed;
25-
import com.google.common.base.Objects;
2625

2726
import javax.annotation.Nonnull;
27+
import java.util.Objects;
2828

2929
/**
3030
* A record-like class wrapping a {@link RealVector} and a count. This data structure is used to keep a running sum
@@ -55,12 +55,12 @@ public boolean equals(final Object o) {
5555
return false;
5656
}
5757
final AggregatedVector that = (AggregatedVector)o;
58-
return partialCount == that.partialCount && Objects.equal(partialVector, that.partialVector);
58+
return partialCount == that.partialCount && Objects.equals(partialVector, that.partialVector);
5959
}
6060

6161
@Override
6262
public int hashCode() {
63-
return Objects.hashCode(partialCount, partialVector);
63+
return Objects.hash(partialCount, partialVector);
6464
}
6565

6666
@Override

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/CompactStorageAdapter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ public void writeNodeInternal(@Nonnull final Transaction transaction, @Nonnull f
230230
nodeItems.add(NodeKind.COMPACT.getSerialized());
231231
final CompactNode compactNode = node.asCompactNode();
232232
// getting underlying vector is okay as it is only written to the database
233-
nodeItems.add(StorageAdapter.tupleFromVector(quantizer.encode(compactNode.getVector()).getUnderlyingVector()));
233+
nodeItems.add(StorageAdapter.tupleFromVector(quantizer.encode(compactNode.getVector())));
234234

235235
final Iterable<NodeReference> neighbors = neighborsChangeSet.merge();
236236

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/Config.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
package com.apple.foundationdb.async.hnsw;
2222

2323
import com.apple.foundationdb.linear.Metric;
24-
import com.google.common.base.Objects;
2524
import com.google.errorprone.annotations.CanIgnoreReturnValue;
2625

2726
import javax.annotation.Nonnull;
27+
import java.util.Objects;
2828

2929
/**
3030
* Configuration settings for a {@link HNSW}.
@@ -292,7 +292,7 @@ public boolean equals(final Object o) {
292292

293293
@Override
294294
public int hashCode() {
295-
return Objects.hashCode(randomSeed, metric, numDimensions, useInlining, m, mMax, mMax0, efConstruction,
295+
return Objects.hash(randomSeed, metric, numDimensions, useInlining, m, mMax, mMax0, efConstruction,
296296
extendCandidates, keepPrunedConnections, sampleVectorStatsProbability, maintainStatsProbability,
297297
statsThreshold, useRaBitQ, raBitQNumExBits, maxNumConcurrentNodeFetches, maxNumConcurrentNeighborhoodFetches);
298298
}

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/HNSW.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ private AffineOperator storageTransform(@Nullable final AccessInfo accessInfo) {
217217
}
218218

219219
return new StorageTransform(accessInfo.getRotatorSeed(),
220-
getConfig().getNumDimensions(), Objects.requireNonNull(accessInfo.getCentroid()));
220+
getConfig().getNumDimensions(), Objects.requireNonNull(accessInfo.getNegatedCentroid()));
221221
}
222222

223223
@Nonnull
@@ -442,6 +442,8 @@ private Quantizer quantizer(@Nullable final AccessInfo accessInfo) {
442442
@Nonnull final Transformed<RealVector> queryVector) {
443443
final Set<Tuple> visited = Sets.newConcurrentHashSet(NodeReference.primaryKeys(nodeReferences));
444444
final Queue<NodeReferenceWithDistance> candidates =
445+
// This initial capacity is somewhat arbitrary as m is not necessarily a limit,
446+
// but it gives us a number that is better than the default.
445447
new PriorityQueue<>(config.getM(),
446448
Comparator.comparing(NodeReferenceWithDistance::getDistance));
447449
candidates.addAll(nodeReferences);
@@ -1189,17 +1191,19 @@ layer, getConfig().getM(), getConfig().isExtendCandidates(), nodeCache, newVecto
11891191
@Nonnull final AffineOperator storageTransform,
11901192
@Nonnull final Estimator estimator,
11911193
@Nonnull final NodeReferenceAndNode<N> selectedNeighbor,
1192-
int layer,
1193-
int mMax,
1194+
final int layer,
1195+
final int mMax,
11941196
@Nonnull final NeighborsChangeSet<N> neighborChangeSet,
11951197
@Nonnull final Map<Tuple, AbstractNode<N>> nodeCache) {
11961198
final AbstractNode<N> selectedNeighborNode = selectedNeighbor.getNode();
1197-
if (selectedNeighborNode.getNeighbors().size() < mMax) {
1199+
final int numNeighbors =
1200+
Iterables.size(neighborChangeSet.merge()); // this is a view over the iterable neighbors in the set
1201+
if (numNeighbors < mMax) {
11981202
return CompletableFuture.completedFuture(null);
11991203
} else {
12001204
if (logger.isTraceEnabled()) {
12011205
logger.trace("pruning neighborhood of key={} which has numNeighbors={} out of mMax={}",
1202-
selectedNeighborNode.getPrimaryKey(), selectedNeighborNode.getNeighbors().size(), mMax);
1206+
selectedNeighborNode.getPrimaryKey(), numNeighbors, mMax);
12031207
}
12041208
return fetchNeighborhood(storageAdapter, transaction, storageTransform, layer, neighborChangeSet.merge(), nodeCache)
12051209
.thenCompose(nodeReferenceWithVectors -> {

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/InliningStorageAdapter.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,13 @@ private NodeReferenceWithVector neighborFromRaw(@Nonnull final AffineOperator st
185185
private NodeReferenceWithVector neighborFromTuples(@Nonnull final AffineOperator storageTransform,
186186
@Nonnull final Tuple keyTuple, @Nonnull final Tuple valueTuple) {
187187
final Tuple neighborPrimaryKey = keyTuple.getNestedTuple(2); // neighbor primary key
188+
//
189+
// Transform the raw vector that was just fetched into the internal coordinate system. If we do not have
190+
// a need to transform coordinates, this transform is the identity transformation. Vectors are always stored
191+
// in the internal coordinate system in use at the time the vector is written. If that coordinate system changes
192+
// afterward, for instance RaBitQ by enabling RaBitQ, subsequent reads of vectors that were written prior to
193+
// the coordinate system change need to be transformed when they are read back.
194+
//
188195
final Transformed<RealVector> neighborVector =
189196
storageTransform.transform(
190197
StorageAdapter.vectorFromTuple(getConfig(), valueTuple)); // the entire value is the vector
@@ -255,7 +262,7 @@ public void writeNeighbor(@Nonnull final Transaction transaction, @Nonnull final
255262
// getting underlying vector is okay as it is only written to the database
256263
final byte[] value =
257264
StorageAdapter.tupleFromVector(
258-
quantizer.encode(neighbor.getVector()).getUnderlyingVector()).pack();
265+
quantizer.encode(neighbor.getVector())).pack();
259266
transaction.set(neighborKey, value);
260267
getOnWriteListener().onNeighborWritten(layer, node, neighbor);
261268
getOnWriteListener().onKeyValueWritten(layer, neighborKey, value);

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/NodeReferenceWithVector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
import com.apple.foundationdb.linear.RealVector;
2424
import com.apple.foundationdb.linear.Transformed;
2525
import com.apple.foundationdb.tuple.Tuple;
26-
import com.google.common.base.Objects;
2726

2827
import javax.annotation.Nonnull;
28+
import java.util.Objects;
2929

3030
/**
3131
* Represents a reference to a node that includes an associated vector.
@@ -97,7 +97,7 @@ public boolean equals(final Object o) {
9797
if (!super.equals(o)) {
9898
return false;
9999
}
100-
return Objects.equal(vector, ((NodeReferenceWithVector)o).vector);
100+
return Objects.equals(vector, ((NodeReferenceWithVector)o).vector);
101101
}
102102

103103
/**
@@ -106,7 +106,7 @@ public boolean equals(final Object o) {
106106
*/
107107
@Override
108108
public int hashCode() {
109-
return Objects.hashCode(super.hashCode(), vector);
109+
return Objects.hash(super.hashCode(), vector);
110110
}
111111

112112
/**

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/ResultEntry.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
import com.apple.foundationdb.ReadTransaction;
2424
import com.apple.foundationdb.linear.RealVector;
2525
import com.apple.foundationdb.tuple.Tuple;
26-
import com.google.common.base.Objects;
2726

2827
import javax.annotation.Nonnull;
2928
import javax.annotation.Nullable;
29+
import java.util.Objects;
3030

3131
/**
3232
* Record-like class to wrap the results of a kNN-search.
@@ -39,16 +39,16 @@ public class ResultEntry {
3939
private final Tuple primaryKey;
4040

4141
/**
42-
* The vector that is stored with the item in the index. This vector is expressed in the client's coordinate
43-
* system and should not be of class {@link com.apple.foundationdb.linear.HalfRealVector},
42+
* The vector that is stored with the item in the structure. This vector is expressed in the client's coordinate
43+
* system and should be of class {@link com.apple.foundationdb.linear.HalfRealVector},
4444
* {@link com.apple.foundationdb.linear.FloatRealVector}, or {@link com.apple.foundationdb.linear.DoubleRealVector}.
4545
* This member is nullable. It is set to {@code null}, if the caller to
4646
* {@link HNSW#kNearestNeighborsSearch(ReadTransaction, int, int, boolean, RealVector)} requested to not return
4747
* vectors.
4848
* <p>
4949
* The vector, if set, may or may not be exactly equal to the vector that was originally inserted in the HNSW.
5050
* Depending on quantization settings (see {@link Config}, the vector that
51-
* is returned may only be an approximation of the original vector..
51+
* is returned may only be an approximation of the original vector.
5252
*/
5353
@Nullable
5454
private final RealVector vector;
@@ -97,13 +97,13 @@ public boolean equals(final Object o) {
9797
final ResultEntry that = (ResultEntry)o;
9898
return Double.compare(distance, that.distance) == 0 &&
9999
rankOrRowNumber == that.rankOrRowNumber &&
100-
Objects.equal(primaryKey, that.primaryKey) &&
101-
Objects.equal(vector, that.vector);
100+
Objects.equals(primaryKey, that.primaryKey) &&
101+
Objects.equals(vector, that.vector);
102102
}
103103

104104
@Override
105105
public int hashCode() {
106-
return Objects.hashCode(primaryKey, vector, distance, rankOrRowNumber);
106+
return Objects.hash(primaryKey, vector, distance, rankOrRowNumber);
107107
}
108108

109109
@Override

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/StorageAdapter.java

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import com.apple.foundationdb.rabitq.EncodedRealVector;
3939
import com.apple.foundationdb.subspace.Subspace;
4040
import com.apple.foundationdb.tuple.Tuple;
41+
import com.google.common.annotations.VisibleForTesting;
4142
import com.google.common.base.Verify;
4243
import com.google.common.collect.ImmutableList;
4344

@@ -63,17 +64,17 @@ interface StorageAdapter<N extends NodeReference> {
6364
/**
6465
* Subspace for data.
6566
*/
66-
byte SUBSPACE_PREFIX_DATA = 0x00;
67+
long SUBSPACE_PREFIX_DATA = 0x00;
6768

6869
/**
6970
* Subspace for the access info; contains entry nodes; these are kept separately from the data.
7071
*/
71-
byte SUBSPACE_PREFIX_ACCESS_INFO = 0x01;
72+
long SUBSPACE_PREFIX_ACCESS_INFO = 0x01;
7273

7374
/**
7475
* Subspace for (mostly) statistical analysis (like finding a centroid, etc.). Contains samples of vectors.
7576
*/
76-
byte SUBSPACE_PREFIX_SAMPLES = 0x02;
77+
long SUBSPACE_PREFIX_SAMPLES = 0x02;
7778

7879
/**
7980
* Returns the configuration of the HNSW graph.
@@ -175,6 +176,7 @@ void writeNode(@Nonnull Transaction transaction, @Nonnull Quantizer quantizer, @
175176
* @param maxNumRead the maximum number of nodes to return in this scan
176177
* @return an {@link AsyncIterable} that provides the nodes found in the specified layer range
177178
*/
179+
@VisibleForTesting
178180
Iterable<AbstractNode<N>> scanLayer(@Nonnull ReadTransaction readTransaction, int layer,
179181
@Nullable Tuple lastPrimaryKey, int maxNumRead);
180182

@@ -225,6 +227,16 @@ static RealVector vectorFromBytes(@Nonnull final Config config, @Nonnull final b
225227
}
226228
}
227229

230+
/**
231+
* Converts a transformed vector into a tuple.
232+
* @param vector a transformed vector
233+
* @return a new, non-null {@code Tuple} instance representing the contents of the underlying vector.
234+
*/
235+
@Nonnull
236+
static Tuple tupleFromVector(@Nonnull final Transformed<RealVector> vector) {
237+
return tupleFromVector(vector.getUnderlyingVector());
238+
}
239+
228240
/**
229241
* Converts a {@link RealVector} into a {@link Tuple}.
230242
* <p>
@@ -295,12 +307,12 @@ static void writeAccessInfo(@Nonnull final Transaction transaction,
295307
@Nonnull final OnWriteListener onWriteListener) {
296308
final Subspace entryNodeSubspace = accessInfoSubspace(subspace);
297309
final EntryNodeReference entryNodeReference = accessInfo.getEntryNodeReference();
298-
final RealVector centroid = accessInfo.getCentroid();
310+
final RealVector centroid = accessInfo.getNegatedCentroid();
299311
final byte[] key = entryNodeSubspace.pack();
300312
final byte[] value = Tuple.from(entryNodeReference.getLayer(),
301313
entryNodeReference.getPrimaryKey(),
302314
// getting underlying is okay as it is only written to the database
303-
StorageAdapter.tupleFromVector(entryNodeReference.getVector().getUnderlyingVector()),
315+
StorageAdapter.tupleFromVector(entryNodeReference.getVector()),
304316
accessInfo.getRotatorSeed(),
305317
centroid == null ? null : StorageAdapter.tupleFromVector(centroid)).pack();
306318
transaction.set(key, value);
@@ -313,7 +325,6 @@ static CompletableFuture<List<AggregatedVector>> consumeSampledVectors(@Nonnull
313325
final int numMaxVectors,
314326
@Nonnull final OnReadListener onReadListener) {
315327
final Subspace prefixSubspace = samplesSubspace(subspace);
316-
317328
final byte[] prefixKey = prefixSubspace.pack();
318329
final ReadTransaction snapshot = transaction.snapshot();
319330
final Range range = Range.startsWith(prefixKey);

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/StorageTransform.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,6 @@ public RealVector apply(@Nonnull final RealVector vector) {
6565
@Nonnull
6666
@Override
6767
public RealVector invertedApply(@Nonnull final RealVector vector) {
68-
//
69-
// Only transform the vector if it is needed. We make the decision based on whether the vector is encoded or
70-
// not. When we switch on encoding, we apply the new coordinate system from that point onwards meaning that all
71-
// vectors inserted before use the client coordinate system. For the inverted case, we only have to transform
72-
// the encoded vectors as they are expressed in the internal coordinate system, while regular non-encoded
73-
// vectors are already expressed in the client system.
74-
//
75-
if (!(vector instanceof EncodedRealVector)) {
76-
return vector;
77-
}
78-
7968
return super.invertedApply(vector);
8069
}
8170
}

0 commit comments

Comments
 (0)