Skip to content

Commit 0971796

Browse files
committed
last round of addressing comments
1 parent dfc7c11 commit 0971796

20 files changed

+338
-97
lines changed

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/AccessInfo.java

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
package com.apple.foundationdb.async.hnsw;
2222

2323
import com.apple.foundationdb.linear.RealVector;
24-
import com.google.common.base.Objects;
2524

2625
import javax.annotation.Nonnull;
2726
import javax.annotation.Nullable;
27+
import java.util.Objects;
2828

2929
/**
3030
* Class to capture the current state of this HNSW that cannot be expressed as metadata but that also is not the actual
@@ -46,17 +46,19 @@ class AccessInfo {
4646
private final long rotatorSeed;
4747

4848
/**
49-
* A centroid that is usually derived as an average over some vectors seen so far. It is used to create the
50-
* {@link StorageTransform}.
49+
* The negated centroid that is usually derived as an average over some vectors seen so far. It is used to create
50+
* the {@link StorageTransform}. The centroid is stored in its negated form (i.e. {@code centroid * (-1)}) as the
51+
* {@link com.apple.foundationdb.linear.AffineOperator} adds its translation vector but the centroid needs to be
52+
* subtracted.
5153
*/
5254
@Nullable
53-
private final RealVector centroid;
55+
private final RealVector negatedCentroid;
5456

5557
public AccessInfo(@Nonnull final EntryNodeReference entryNodeReference, final long rotatorSeed,
56-
@Nullable final RealVector centroid) {
58+
@Nullable final RealVector negatedCentroid) {
5759
this.entryNodeReference = entryNodeReference;
5860
this.rotatorSeed = rotatorSeed;
59-
this.centroid = centroid;
61+
this.negatedCentroid = negatedCentroid;
6062
}
6163

6264
@Nonnull
@@ -65,21 +67,21 @@ public EntryNodeReference getEntryNodeReference() {
6567
}
6668

6769
public boolean canUseRaBitQ() {
68-
return getCentroid() != null;
70+
return getNegatedCentroid() != null;
6971
}
7072

7173
public long getRotatorSeed() {
7274
return rotatorSeed;
7375
}
7476

7577
@Nullable
76-
public RealVector getCentroid() {
77-
return centroid;
78+
public RealVector getNegatedCentroid() {
79+
return negatedCentroid;
7880
}
7981

8082
@Nonnull
8183
public AccessInfo withNewEntryNodeReference(@Nonnull final EntryNodeReference entryNodeReference) {
82-
return new AccessInfo(entryNodeReference, getRotatorSeed(), getCentroid());
84+
return new AccessInfo(entryNodeReference, getRotatorSeed(), getNegatedCentroid());
8385
}
8486

8587
@Override
@@ -89,13 +91,13 @@ public boolean equals(final Object o) {
8991
}
9092
final AccessInfo that = (AccessInfo)o;
9193
return rotatorSeed == that.rotatorSeed &&
92-
Objects.equal(entryNodeReference, that.entryNodeReference) &&
93-
Objects.equal(centroid, that.centroid);
94+
Objects.equals(entryNodeReference, that.entryNodeReference) &&
95+
Objects.equals(negatedCentroid, that.negatedCentroid);
9496
}
9597

9698
@Override
9799
public int hashCode() {
98-
return Objects.hashCode(entryNodeReference, rotatorSeed, centroid);
100+
return Objects.hash(entryNodeReference, rotatorSeed, negatedCentroid);
99101
}
100102

101103
@Nonnull
@@ -104,6 +106,6 @@ public String toString() {
104106
return "AccessInfo[" +
105107
"entryNodeReference=" + entryNodeReference +
106108
", rotatorSeed=" + rotatorSeed +
107-
", centroid=" + centroid + "]";
109+
", centroid=" + negatedCentroid + "]";
108110
}
109111
}

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/AggregatedVector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222

2323
import com.apple.foundationdb.linear.RealVector;
2424
import com.apple.foundationdb.linear.Transformed;
25-
import com.google.common.base.Objects;
2625

2726
import javax.annotation.Nonnull;
27+
import java.util.Objects;
2828

2929
/**
3030
* A record-like class wrapping a {@link RealVector} and a count. This data structure is used to keep a running sum
@@ -55,12 +55,12 @@ public boolean equals(final Object o) {
5555
return false;
5656
}
5757
final AggregatedVector that = (AggregatedVector)o;
58-
return partialCount == that.partialCount && Objects.equal(partialVector, that.partialVector);
58+
return partialCount == that.partialCount && Objects.equals(partialVector, that.partialVector);
5959
}
6060

6161
@Override
6262
public int hashCode() {
63-
return Objects.hashCode(partialCount, partialVector);
63+
return Objects.hash(partialCount, partialVector);
6464
}
6565

6666
@Override

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/CompactStorageAdapter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ public void writeNodeInternal(@Nonnull final Transaction transaction, @Nonnull f
230230
nodeItems.add(NodeKind.COMPACT.getSerialized());
231231
final CompactNode compactNode = node.asCompactNode();
232232
// getting underlying vector is okay as it is only written to the database
233-
nodeItems.add(StorageAdapter.tupleFromVector(quantizer.encode(compactNode.getVector()).getUnderlyingVector()));
233+
nodeItems.add(StorageAdapter.tupleFromVector(quantizer.encode(compactNode.getVector())));
234234

235235
final Iterable<NodeReference> neighbors = neighborsChangeSet.merge();
236236

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/Config.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
package com.apple.foundationdb.async.hnsw;
2222

2323
import com.apple.foundationdb.linear.Metric;
24-
import com.google.common.base.Objects;
2524
import com.google.errorprone.annotations.CanIgnoreReturnValue;
2625

2726
import javax.annotation.Nonnull;
27+
import java.util.Objects;
2828

2929
/**
3030
* Configuration settings for a {@link HNSW}.
@@ -292,7 +292,7 @@ public boolean equals(final Object o) {
292292

293293
@Override
294294
public int hashCode() {
295-
return Objects.hashCode(randomSeed, metric, numDimensions, useInlining, m, mMax, mMax0, efConstruction,
295+
return Objects.hash(randomSeed, metric, numDimensions, useInlining, m, mMax, mMax0, efConstruction,
296296
extendCandidates, keepPrunedConnections, sampleVectorStatsProbability, maintainStatsProbability,
297297
statsThreshold, useRaBitQ, raBitQNumExBits, maxNumConcurrentNodeFetches, maxNumConcurrentNeighborhoodFetches);
298298
}

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/HNSW.java

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -122,20 +122,6 @@ public static Config defaultConfig(int numDimensions) {
122122
return new Config.ConfigBuilder().build(numDimensions);
123123
}
124124

125-
/**
126-
* Creates a new {@code HNSW} instance using the default configuration, write listener, and read listener.
127-
* <p>
128-
* This constructor delegates to the main constructor, providing default values for configuration
129-
* and listeners, simplifying the instantiation process for common use cases.
130-
*
131-
* @param subspace the non-null {@link Subspace} to build the HNSW graph for.
132-
* @param executor the non-null {@link Executor} for concurrent operations, such as building the graph.
133-
* @param numDimensions the number of dimensions
134-
*/
135-
public HNSW(@Nonnull final Subspace subspace, @Nonnull final Executor executor, final int numDimensions) {
136-
this(subspace, executor, newConfigBuilder().build(numDimensions), OnWriteListener.NOOP, OnReadListener.NOOP);
137-
}
138-
139125
/**
140126
* Constructs a new HNSW graph instance.
141127
* <p>
@@ -217,7 +203,7 @@ private AffineOperator storageTransform(@Nullable final AccessInfo accessInfo) {
217203
}
218204

219205
return new StorageTransform(accessInfo.getRotatorSeed(),
220-
getConfig().getNumDimensions(), Objects.requireNonNull(accessInfo.getCentroid()));
206+
getConfig().getNumDimensions(), Objects.requireNonNull(accessInfo.getNegatedCentroid()));
221207
}
222208

223209
@Nonnull
@@ -442,6 +428,8 @@ private Quantizer quantizer(@Nullable final AccessInfo accessInfo) {
442428
@Nonnull final Transformed<RealVector> queryVector) {
443429
final Set<Tuple> visited = Sets.newConcurrentHashSet(NodeReference.primaryKeys(nodeReferences));
444430
final Queue<NodeReferenceWithDistance> candidates =
431+
// This initial capacity is somewhat arbitrary as m is not necessarily a limit,
432+
// but it gives us a number that is better than the default.
445433
new PriorityQueue<>(config.getM(),
446434
Comparator.comparing(NodeReferenceWithDistance::getDistance));
447435
candidates.addAll(nodeReferences);
@@ -1189,17 +1177,19 @@ layer, getConfig().getM(), getConfig().isExtendCandidates(), nodeCache, newVecto
11891177
@Nonnull final AffineOperator storageTransform,
11901178
@Nonnull final Estimator estimator,
11911179
@Nonnull final NodeReferenceAndNode<N> selectedNeighbor,
1192-
int layer,
1193-
int mMax,
1180+
final int layer,
1181+
final int mMax,
11941182
@Nonnull final NeighborsChangeSet<N> neighborChangeSet,
11951183
@Nonnull final Map<Tuple, AbstractNode<N>> nodeCache) {
11961184
final AbstractNode<N> selectedNeighborNode = selectedNeighbor.getNode();
1197-
if (selectedNeighborNode.getNeighbors().size() < mMax) {
1185+
final int numNeighbors =
1186+
Iterables.size(neighborChangeSet.merge()); // this is a view over the iterable neighbors in the set
1187+
if (numNeighbors < mMax) {
11981188
return CompletableFuture.completedFuture(null);
11991189
} else {
12001190
if (logger.isTraceEnabled()) {
12011191
logger.trace("pruning neighborhood of key={} which has numNeighbors={} out of mMax={}",
1202-
selectedNeighborNode.getPrimaryKey(), selectedNeighborNode.getNeighbors().size(), mMax);
1192+
selectedNeighborNode.getPrimaryKey(), numNeighbors, mMax);
12031193
}
12041194
return fetchNeighborhood(storageAdapter, transaction, storageTransform, layer, neighborChangeSet.merge(), nodeCache)
12051195
.thenCompose(nodeReferenceWithVectors -> {

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/InliningStorageAdapter.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,13 @@ private NodeReferenceWithVector neighborFromRaw(@Nonnull final AffineOperator st
185185
private NodeReferenceWithVector neighborFromTuples(@Nonnull final AffineOperator storageTransform,
186186
@Nonnull final Tuple keyTuple, @Nonnull final Tuple valueTuple) {
187187
final Tuple neighborPrimaryKey = keyTuple.getNestedTuple(2); // neighbor primary key
188+
//
189+
// Transform the raw vector that was just fetched into the internal coordinate system. If we do not have
190+
// a need to transform coordinates, this transform is the identity transformation. Vectors are always stored
191+
// in the internal coordinate system in use at the time the vector is written. If that coordinate system changes
192+
// afterward, for instance RaBitQ by enabling RaBitQ, subsequent reads of vectors that were written prior to
193+
// the coordinate system change need to be transformed when they are read back.
194+
//
188195
final Transformed<RealVector> neighborVector =
189196
storageTransform.transform(
190197
StorageAdapter.vectorFromTuple(getConfig(), valueTuple)); // the entire value is the vector
@@ -255,7 +262,7 @@ public void writeNeighbor(@Nonnull final Transaction transaction, @Nonnull final
255262
// getting underlying vector is okay as it is only written to the database
256263
final byte[] value =
257264
StorageAdapter.tupleFromVector(
258-
quantizer.encode(neighbor.getVector()).getUnderlyingVector()).pack();
265+
quantizer.encode(neighbor.getVector())).pack();
259266
transaction.set(neighborKey, value);
260267
getOnWriteListener().onNeighborWritten(layer, node, neighbor);
261268
getOnWriteListener().onKeyValueWritten(layer, neighborKey, value);

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/NodeReferenceWithVector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
import com.apple.foundationdb.linear.RealVector;
2424
import com.apple.foundationdb.linear.Transformed;
2525
import com.apple.foundationdb.tuple.Tuple;
26-
import com.google.common.base.Objects;
2726

2827
import javax.annotation.Nonnull;
28+
import java.util.Objects;
2929

3030
/**
3131
* Represents a reference to a node that includes an associated vector.
@@ -97,7 +97,7 @@ public boolean equals(final Object o) {
9797
if (!super.equals(o)) {
9898
return false;
9999
}
100-
return Objects.equal(vector, ((NodeReferenceWithVector)o).vector);
100+
return Objects.equals(vector, ((NodeReferenceWithVector)o).vector);
101101
}
102102

103103
/**
@@ -106,7 +106,7 @@ public boolean equals(final Object o) {
106106
*/
107107
@Override
108108
public int hashCode() {
109-
return Objects.hashCode(super.hashCode(), vector);
109+
return Objects.hash(super.hashCode(), vector);
110110
}
111111

112112
/**

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/ResultEntry.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
import com.apple.foundationdb.ReadTransaction;
2424
import com.apple.foundationdb.linear.RealVector;
2525
import com.apple.foundationdb.tuple.Tuple;
26-
import com.google.common.base.Objects;
2726

2827
import javax.annotation.Nonnull;
2928
import javax.annotation.Nullable;
29+
import java.util.Objects;
3030

3131
/**
3232
* Record-like class to wrap the results of a kNN-search.
@@ -39,16 +39,16 @@ public class ResultEntry {
3939
private final Tuple primaryKey;
4040

4141
/**
42-
* The vector that is stored with the item in the index. This vector is expressed in the client's coordinate
43-
* system and should not be of class {@link com.apple.foundationdb.linear.HalfRealVector},
42+
* The vector that is stored with the item in the structure. This vector is expressed in the client's coordinate
43+
* system and should be of class {@link com.apple.foundationdb.linear.HalfRealVector},
4444
* {@link com.apple.foundationdb.linear.FloatRealVector}, or {@link com.apple.foundationdb.linear.DoubleRealVector}.
4545
* This member is nullable. It is set to {@code null}, if the caller to
4646
* {@link HNSW#kNearestNeighborsSearch(ReadTransaction, int, int, boolean, RealVector)} requested to not return
4747
* vectors.
4848
* <p>
4949
* The vector, if set, may or may not be exactly equal to the vector that was originally inserted in the HNSW.
5050
* Depending on quantization settings (see {@link Config}, the vector that
51-
* is returned may only be an approximation of the original vector..
51+
* is returned may only be an approximation of the original vector.
5252
*/
5353
@Nullable
5454
private final RealVector vector;
@@ -97,13 +97,13 @@ public boolean equals(final Object o) {
9797
final ResultEntry that = (ResultEntry)o;
9898
return Double.compare(distance, that.distance) == 0 &&
9999
rankOrRowNumber == that.rankOrRowNumber &&
100-
Objects.equal(primaryKey, that.primaryKey) &&
101-
Objects.equal(vector, that.vector);
100+
Objects.equals(primaryKey, that.primaryKey) &&
101+
Objects.equals(vector, that.vector);
102102
}
103103

104104
@Override
105105
public int hashCode() {
106-
return Objects.hashCode(primaryKey, vector, distance, rankOrRowNumber);
106+
return Objects.hash(primaryKey, vector, distance, rankOrRowNumber);
107107
}
108108

109109
@Override

fdb-extensions/src/main/java/com/apple/foundationdb/async/hnsw/StorageAdapter.java

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import com.apple.foundationdb.rabitq.EncodedRealVector;
3939
import com.apple.foundationdb.subspace.Subspace;
4040
import com.apple.foundationdb.tuple.Tuple;
41+
import com.google.common.annotations.VisibleForTesting;
4142
import com.google.common.base.Verify;
4243
import com.google.common.collect.ImmutableList;
4344

@@ -63,17 +64,17 @@ interface StorageAdapter<N extends NodeReference> {
6364
/**
6465
* Subspace for data.
6566
*/
66-
byte SUBSPACE_PREFIX_DATA = 0x00;
67+
long SUBSPACE_PREFIX_DATA = 0x00;
6768

6869
/**
6970
* Subspace for the access info; contains entry nodes; these are kept separately from the data.
7071
*/
71-
byte SUBSPACE_PREFIX_ACCESS_INFO = 0x01;
72+
long SUBSPACE_PREFIX_ACCESS_INFO = 0x01;
7273

7374
/**
7475
* Subspace for (mostly) statistical analysis (like finding a centroid, etc.). Contains samples of vectors.
7576
*/
76-
byte SUBSPACE_PREFIX_SAMPLES = 0x02;
77+
long SUBSPACE_PREFIX_SAMPLES = 0x02;
7778

7879
/**
7980
* Returns the configuration of the HNSW graph.
@@ -175,6 +176,7 @@ void writeNode(@Nonnull Transaction transaction, @Nonnull Quantizer quantizer, @
175176
* @param maxNumRead the maximum number of nodes to return in this scan
176177
* @return an {@link AsyncIterable} that provides the nodes found in the specified layer range
177178
*/
179+
@VisibleForTesting
178180
Iterable<AbstractNode<N>> scanLayer(@Nonnull ReadTransaction readTransaction, int layer,
179181
@Nullable Tuple lastPrimaryKey, int maxNumRead);
180182

@@ -225,6 +227,16 @@ static RealVector vectorFromBytes(@Nonnull final Config config, @Nonnull final b
225227
}
226228
}
227229

230+
/**
231+
* Converts a transformed vector into a tuple.
232+
* @param vector a transformed vector
233+
* @return a new, non-null {@code Tuple} instance representing the contents of the underlying vector.
234+
*/
235+
@Nonnull
236+
static Tuple tupleFromVector(@Nonnull final Transformed<RealVector> vector) {
237+
return tupleFromVector(vector.getUnderlyingVector());
238+
}
239+
228240
/**
229241
* Converts a {@link RealVector} into a {@link Tuple}.
230242
* <p>
@@ -295,12 +307,12 @@ static void writeAccessInfo(@Nonnull final Transaction transaction,
295307
@Nonnull final OnWriteListener onWriteListener) {
296308
final Subspace entryNodeSubspace = accessInfoSubspace(subspace);
297309
final EntryNodeReference entryNodeReference = accessInfo.getEntryNodeReference();
298-
final RealVector centroid = accessInfo.getCentroid();
310+
final RealVector centroid = accessInfo.getNegatedCentroid();
299311
final byte[] key = entryNodeSubspace.pack();
300312
final byte[] value = Tuple.from(entryNodeReference.getLayer(),
301313
entryNodeReference.getPrimaryKey(),
302314
// getting underlying is okay as it is only written to the database
303-
StorageAdapter.tupleFromVector(entryNodeReference.getVector().getUnderlyingVector()),
315+
StorageAdapter.tupleFromVector(entryNodeReference.getVector()),
304316
accessInfo.getRotatorSeed(),
305317
centroid == null ? null : StorageAdapter.tupleFromVector(centroid)).pack();
306318
transaction.set(key, value);
@@ -313,7 +325,6 @@ static CompletableFuture<List<AggregatedVector>> consumeSampledVectors(@Nonnull
313325
final int numMaxVectors,
314326
@Nonnull final OnReadListener onReadListener) {
315327
final Subspace prefixSubspace = samplesSubspace(subspace);
316-
317328
final byte[] prefixKey = prefixSubspace.pack();
318329
final ReadTransaction snapshot = transaction.snapshot();
319330
final Range range = Range.startsWith(prefixKey);

0 commit comments

Comments
 (0)