Skip to content

Commit 69c8839

Browse files
authored
HNSW using the linear package (#3691)
This PR implements the [HNSW](https://arxiv.org/abs/1603.09320) paper using the recently introduced linear package together with RaBitQ.
1 parent f161cbc commit 69c8839

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+7222
-145
lines changed
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* AbstractNode.java
3+
*
4+
* This source file is part of the FoundationDB open source project
5+
*
6+
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
package com.apple.foundationdb.async.hnsw;
22+
23+
import com.apple.foundationdb.tuple.Tuple;
24+
import com.google.common.collect.ImmutableList;
25+
26+
import javax.annotation.Nonnull;
27+
import java.util.List;
28+
29+
/**
30+
* An abstract base class implementing the {@link Node} interface.
31+
* <p>
32+
* This class provides the fundamental structure for a node within the HNSW graph,
33+
* managing a unique {@link Tuple} primary key and an immutable list of its neighbors.
34+
* Subclasses are expected to provide concrete implementations, potentially adding
35+
* more state or behavior.
36+
*
37+
* @param <N> the type of the node reference used for neighbors, which must extend {@link NodeReference}
38+
*/
39+
abstract class AbstractNode<N extends NodeReference> implements Node<N> {
40+
@Nonnull
41+
private final Tuple primaryKey;
42+
43+
@Nonnull
44+
private final List<N> neighbors;
45+
46+
/**
47+
* Constructs a new {@code AbstractNode} with a specified primary key and a list of neighbors.
48+
*
49+
* @param primaryKey the unique identifier for this node; must not be {@code null}
50+
* @param neighbors the list of nodes connected to this node; must not be {@code null}
51+
*/
52+
protected AbstractNode(@Nonnull final Tuple primaryKey,
53+
@Nonnull final List<N> neighbors) {
54+
this.primaryKey = primaryKey;
55+
this.neighbors = ImmutableList.copyOf(neighbors);
56+
}
57+
58+
/**
59+
* Gets the primary key that uniquely identifies this object.
60+
* @return the primary key {@link Tuple}, which will never be {@code null}.
61+
*/
62+
@Nonnull
63+
@Override
64+
public Tuple getPrimaryKey() {
65+
return primaryKey;
66+
}
67+
68+
/**
69+
* Gets the list of neighbors connected to this node.
70+
* <p>
71+
* This method returns a direct reference to the internal list which is
72+
* immutable.
73+
* @return a non-null, possibly empty, list of neighbors.
74+
*/
75+
@Nonnull
76+
@Override
77+
public List<N> getNeighbors() {
78+
return neighbors;
79+
}
80+
81+
82+
/**
83+
* Converts this node into its {@link CompactNode} representation.
84+
* <p>
85+
* A {@code CompactNode} is a space-efficient implementation {@code Node}. This method provides the
86+
* conversion logic to transform the current object into that compact form.
87+
*
88+
* @return a non-null {@link CompactNode} representing the current node.
89+
*/
90+
@Nonnull
91+
public abstract CompactNode asCompactNode();
92+
93+
/**
94+
* Converts this node into its {@link InliningNode} representation.
95+
* @return this object cast to an {@link InliningNode}; never {@code null}.
96+
* @throws ClassCastException if this object is not actually an instance of
97+
* {@link InliningNode}.
98+
*/
99+
@Nonnull
100+
public abstract InliningNode asInliningNode();
101+
}
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
/*
2+
* AbstractStorageAdapter.java
3+
*
4+
* This source file is part of the FoundationDB open source project
5+
*
6+
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
package com.apple.foundationdb.async.hnsw;
22+
23+
import com.apple.foundationdb.ReadTransaction;
24+
import com.apple.foundationdb.Transaction;
25+
import com.apple.foundationdb.linear.AffineOperator;
26+
import com.apple.foundationdb.linear.Quantizer;
27+
import com.apple.foundationdb.subspace.Subspace;
28+
import com.apple.foundationdb.tuple.Tuple;
29+
import org.slf4j.Logger;
30+
import org.slf4j.LoggerFactory;
31+
32+
import javax.annotation.Nonnull;
33+
import javax.annotation.Nullable;
34+
import java.util.concurrent.CompletableFuture;
35+
36+
/**
37+
* An abstract base class for {@link StorageAdapter} implementations.
38+
* <p>
39+
* This class provides the common infrastructure for managing HNSW graph data within a {@link Subspace}.
40+
* It handles the configuration, node creation, and listener management, while delegating the actual
41+
* storage-specific read and write operations to concrete subclasses through the {@code fetchNodeInternal}
42+
* and {@code writeNodeInternal} abstract methods.
43+
*
44+
* @param <N> the type of {@link NodeReference} used to reference nodes in the graph
45+
*/
46+
abstract class AbstractStorageAdapter<N extends NodeReference> implements StorageAdapter<N> {
47+
@Nonnull
48+
private static final Logger logger = LoggerFactory.getLogger(AbstractStorageAdapter.class);
49+
50+
@Nonnull
51+
private final Config config;
52+
@Nonnull
53+
private final NodeFactory<N> nodeFactory;
54+
@Nonnull
55+
private final Subspace subspace;
56+
@Nonnull
57+
private final OnWriteListener onWriteListener;
58+
@Nonnull
59+
private final OnReadListener onReadListener;
60+
61+
@Nonnull
62+
private final Subspace dataSubspace;
63+
64+
/**
65+
* Constructs a new {@code AbstractStorageAdapter}.
66+
* <p>
67+
* This constructor initializes the adapter with the necessary configuration,
68+
* factories, and listeners for managing an HNSW graph. It also sets up a
69+
* dedicated data subspace within the provided main subspace for storing node data.
70+
*
71+
* @param config the HNSW graph configuration
72+
* @param nodeFactory the factory to create new nodes of type {@code <N>}
73+
* @param subspace the primary subspace for storing all graph-related data
74+
* @param onWriteListener the listener to be called on write operations
75+
* @param onReadListener the listener to be called on read operations
76+
*/
77+
protected AbstractStorageAdapter(@Nonnull final Config config, @Nonnull final NodeFactory<N> nodeFactory,
78+
@Nonnull final Subspace subspace,
79+
@Nonnull final OnWriteListener onWriteListener,
80+
@Nonnull final OnReadListener onReadListener) {
81+
this.config = config;
82+
this.nodeFactory = nodeFactory;
83+
this.subspace = subspace;
84+
this.onWriteListener = onWriteListener;
85+
this.onReadListener = onReadListener;
86+
this.dataSubspace = subspace.subspace(Tuple.from(SUBSPACE_PREFIX_DATA));
87+
}
88+
89+
@Override
90+
@Nonnull
91+
public Config getConfig() {
92+
return config;
93+
}
94+
95+
@Nonnull
96+
@Override
97+
public NodeFactory<N> getNodeFactory() {
98+
return nodeFactory;
99+
}
100+
101+
@Override
102+
@Nonnull
103+
public Subspace getSubspace() {
104+
return subspace;
105+
}
106+
107+
/**
108+
* Gets the cached subspace for the data associated with this component.
109+
* <p>
110+
* The data subspace defines the portion of the directory space where the data
111+
* for this component is stored.
112+
*
113+
* @return the non-null {@link Subspace} for the data
114+
*/
115+
@Override
116+
@Nonnull
117+
public Subspace getDataSubspace() {
118+
return dataSubspace;
119+
}
120+
121+
@Override
122+
@Nonnull
123+
public OnWriteListener getOnWriteListener() {
124+
return onWriteListener;
125+
}
126+
127+
@Override
128+
@Nonnull
129+
public OnReadListener getOnReadListener() {
130+
return onReadListener;
131+
}
132+
133+
/**
134+
* Asynchronously fetches a node from a specific layer of the HNSW.
135+
* <p>
136+
* The node is identified by its {@code layer} and {@code primaryKey}. The entire fetch operation is
137+
* performed within the given {@link ReadTransaction}. After the underlying
138+
* fetch operation completes, the retrieved node is validated by the
139+
* {@link #checkNode(Node)} method before the returned future is completed.
140+
*
141+
* @param readTransaction the non-null transaction to use for the read operation
142+
* @param storageTransform an affine vector transformation operator that is used to transform the fetched vector
143+
* into the storage space that is currently being used
144+
* @param layer the layer of the tree from which to fetch the node
145+
* @param primaryKey the non-null primary key that identifies the node to fetch
146+
*
147+
* @return a {@link CompletableFuture} that will complete with the fetched {@link AbstractNode}
148+
* once it has been read from storage and validated
149+
*/
150+
@Nonnull
151+
@Override
152+
public CompletableFuture<AbstractNode<N>> fetchNode(@Nonnull final ReadTransaction readTransaction,
153+
@Nonnull final AffineOperator storageTransform,
154+
int layer, @Nonnull Tuple primaryKey) {
155+
return fetchNodeInternal(readTransaction, storageTransform, layer, primaryKey).thenApply(this::checkNode);
156+
}
157+
158+
/**
159+
* Asynchronously fetches a specific node from the data store for a given layer and primary key.
160+
* <p>
161+
* This is an internal, abstract method that concrete subclasses must implement to define
162+
* the storage-specific logic for retrieving a node. The operation is performed within the
163+
* context of the provided {@link ReadTransaction}.
164+
*
165+
* @param readTransaction the transaction to use for the read operation; must not be {@code null}
166+
* @param storageTransform an affine vector transformation operator that is used to transform the fetched vector
167+
* into the storage space that is currently being used
168+
* @param layer the layer index from which to fetch the node
169+
* @param primaryKey the primary key that uniquely identifies the node to be fetched; must not be {@code null}
170+
*
171+
* @return a {@link CompletableFuture} that will be completed with the fetched {@link AbstractNode}.
172+
* The future will complete with {@code null} if no node is found for the given key and layer.
173+
*/
174+
@Nonnull
175+
protected abstract CompletableFuture<AbstractNode<N>> fetchNodeInternal(@Nonnull ReadTransaction readTransaction,
176+
@Nonnull AffineOperator storageTransform,
177+
int layer, @Nonnull Tuple primaryKey);
178+
179+
/**
180+
* Method to perform basic invariant check(s) on a newly-fetched node.
181+
*
182+
* @param node the node to check
183+
* was passed in
184+
*
185+
* @return the node that was passed in
186+
*/
187+
@Nullable
188+
private <T extends Node<N>> T checkNode(@Nullable final T node) {
189+
return node;
190+
}
191+
192+
/**
193+
* Writes a given node and its neighbor modifications to the underlying storage.
194+
* <p>
195+
* This operation is executed within the context of the provided {@link Transaction}.
196+
* It handles persisting the node's data at a specific {@code layer} and applies
197+
* the changes to its neighbors as defined in the {@link NeighborsChangeSet}.
198+
* This method delegates the core writing logic to an internal method and provides
199+
* debug logging upon completion.
200+
*
201+
* @param transaction the non-null {@link Transaction} context for this write operation
202+
* @param quantizer the quantizer to use
203+
* @param node the non-null {@link Node} to be written to storage
204+
* @param layer the layer index where the node is being written
205+
* @param changeSet the non-null {@link NeighborsChangeSet} detailing the modifications
206+
* to the node's neighbors
207+
*/
208+
@Override
209+
public void writeNode(@Nonnull final Transaction transaction, @Nonnull final Quantizer quantizer,
210+
@Nonnull final AbstractNode<N> node, final int layer,
211+
@Nonnull final NeighborsChangeSet<N> changeSet) {
212+
writeNodeInternal(transaction, quantizer, node, layer, changeSet);
213+
if (logger.isTraceEnabled()) {
214+
logger.trace("written node with key={} at layer={}", node.getPrimaryKey(), layer);
215+
}
216+
}
217+
218+
/**
219+
* Writes a single node to the data store as part of a larger transaction.
220+
* <p>
221+
* This is an abstract method that concrete implementations must provide.
222+
* It is responsible for the low-level persistence of the given {@code node} at a
223+
* specific {@code layer}. The implementation should also handle the modifications
224+
* to the node's neighbors, as detailed in the {@code changeSet}.
225+
*
226+
* @param transaction the non-null transaction context for the write operation
227+
* @param quantizer the quantizer to use
228+
* @param node the non-null {@link Node} to write
229+
* @param layer the layer or level of the node in the structure
230+
* @param changeSet the non-null {@link NeighborsChangeSet} detailing additions or
231+
* removals of neighbor links
232+
*/
233+
protected abstract void writeNodeInternal(@Nonnull Transaction transaction, @Nonnull Quantizer quantizer,
234+
@Nonnull AbstractNode<N> node, int layer,
235+
@Nonnull NeighborsChangeSet<N> changeSet);
236+
}

0 commit comments

Comments
 (0)