Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
bfb2758
initial code drop from hnsw-poc
normen662 Sep 16, 2025
0e37815
adding tests
normen662 Sep 16, 2025
eb10a65
adding javadocs
normen662 Sep 17, 2025
00ed788
adding comments
normen662 Sep 17, 2025
174261e
more javadoc and tests
normen662 Sep 17, 2025
bbd8471
adding a lot of java doc
normen662 Sep 19, 2025
09d6288
added tests
normen662 Sep 19, 2025
ae34290
increase timeout for test case
normen662 Sep 19, 2025
8b70524
refactored Vector class to be more aligned with math libraries
normen662 Sep 23, 2025
39db545
removed efSearch from HNSW
normen662 Sep 23, 2025
1a9437c
adding some initial rabitq-related matrix ops
normen662 Sep 30, 2025
8c4eb05
best rescale factor
normen662 Oct 2, 2025
20c59be
quantize ex
normen662 Oct 3, 2025
ada8d46
basic encoding works
normen662 Oct 4, 2025
999cdba
estimator works
normen662 Oct 6, 2025
799f793
encoding + estimation
normen662 Oct 8, 2025
103e507
packing works
normen662 Oct 11, 2025
f74c887
serialization round trip works
normen662 Oct 11, 2025
55fdccc
pre-savepoint
normen662 Oct 13, 2025
1e828a3
rabitq in hnsw; barely compiles
normen662 Oct 13, 2025
e4e150d
rabitq in hnsw works
normen662 Oct 14, 2025
3602673
basic vector encoding, half support
normen662 Oct 14, 2025
6555990
refactoring so that feature branch hnsw and rabitq can use a proper l…
normen662 Oct 15, 2025
2e10275
addressing some comments
normen662 Oct 16, 2025
8079b02
addressing some comments (2)
normen662 Oct 17, 2025
585b95e
addressing some comments (3)
normen662 Oct 17, 2025
2458246
addressing some comments (4)
normen662 Oct 18, 2025
ebe029e
addressing some comments (5)
normen662 Oct 19, 2025
d2d288d
addressing some comments (6)
normen662 Oct 20, 2025
3a41d34
more tests
normen662 Oct 21, 2025
ce5e122
more tests
normen662 Oct 21, 2025
3df735b
code complete
normen662 Oct 22, 2025
09320d7
code complete -- for realz
normen662 Oct 22, 2025
bf8e33b
rebase fallout
normen662 Oct 23, 2025
b262f29
addressing some comments from #3677
normen662 Oct 24, 2025
e452392
changes to StorageAdapter needed for vector samples
normen662 Oct 24, 2025
57425f8
adding affine operator
normen662 Oct 25, 2025
40f09ac
adding storageTransformOperator
normen662 Oct 26, 2025
9baa4fd
HNSW uses rabitq and affine operators
normen662 Oct 26, 2025
3a8b78e
save point
normen662 Oct 27, 2025
9c2ee9b
rabitq works inside of HNSW
normen662 Oct 28, 2025
6bd8e8c
rabitq works inside of HNSW
normen662 Oct 28, 2025
b534e2c
adressing comments and simplifying code
normen662 Oct 30, 2025
e232ed8
adressing comments and simplifying code
normen662 Oct 30, 2025
03bc73a
adressing comments and simplifying code
normen662 Oct 30, 2025
fb7c1d2
adressing comments and simplifying code
normen662 Oct 30, 2025
f601317
inverting the apply-invertedApply
normen662 Oct 31, 2025
1c91c00
Transformed
normen662 Nov 2, 2025
d2e4ef3
addressing more comments
normen662 Nov 3, 2025
e13762d
last round of addressing comments
normen662 Nov 4, 2025
809d55f
updating copyright years
normen662 Nov 5, 2025
b0b03ff
done
normen662 Nov 5, 2025
2dc59a9
adding one more test
normen662 Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* AbstractNode.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.async.hnsw;

import com.apple.foundationdb.tuple.Tuple;
import com.google.common.collect.ImmutableList;

import javax.annotation.Nonnull;
import java.util.List;

/**
* An abstract base class implementing the {@link Node} interface.
* <p>
* This class provides the fundamental structure for a node within the HNSW graph,
* managing a unique {@link Tuple} primary key and an immutable list of its neighbors.
* Subclasses are expected to provide concrete implementations, potentially adding
* more state or behavior.
*
* @param <N> the type of the node reference used for neighbors, which must extend {@link NodeReference}
*/
abstract class AbstractNode<N extends NodeReference> implements Node<N> {
@Nonnull
private final Tuple primaryKey;

@Nonnull
private final List<N> neighbors;

/**
* Constructs a new {@code AbstractNode} with a specified primary key and a list of neighbors.
*
* @param primaryKey the unique identifier for this node; must not be {@code null}
* @param neighbors the list of nodes connected to this node; must not be {@code null}
*/
protected AbstractNode(@Nonnull final Tuple primaryKey,
@Nonnull final List<N> neighbors) {
this.primaryKey = primaryKey;
this.neighbors = ImmutableList.copyOf(neighbors);
}

/**
* Gets the primary key that uniquely identifies this object.
* @return the primary key {@link Tuple}, which will never be {@code null}.
*/
@Nonnull
@Override
public Tuple getPrimaryKey() {
return primaryKey;
}

/**
* Gets the list of neighbors connected to this node.
* <p>
* This method returns a direct reference to the internal list which is
* immutable.
* @return a non-null, possibly empty, list of neighbors.
*/
@Nonnull
@Override
public List<N> getNeighbors() {
return neighbors;
}


/**
* Converts this node into its {@link CompactNode} representation.
* <p>
* A {@code CompactNode} is a space-efficient implementation {@code Node}. This method provides the
* conversion logic to transform the current object into that compact form.
*
* @return a non-null {@link CompactNode} representing the current node.
*/
@Nonnull
public abstract CompactNode asCompactNode();

/**
* Converts this node into its {@link InliningNode} representation.
* @return this object cast to an {@link InliningNode}; never {@code null}.
* @throws ClassCastException if this object is not actually an instance of
* {@link InliningNode}.
*/
@Nonnull
public abstract InliningNode asInliningNode();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/*
* AbstractStorageAdapter.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.async.hnsw;

import com.apple.foundationdb.ReadTransaction;
import com.apple.foundationdb.Transaction;
import com.apple.foundationdb.linear.AffineOperator;
import com.apple.foundationdb.linear.Quantizer;
import com.apple.foundationdb.subspace.Subspace;
import com.apple.foundationdb.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.concurrent.CompletableFuture;

/**
* An abstract base class for {@link StorageAdapter} implementations.
* <p>
* This class provides the common infrastructure for managing HNSW graph data within a {@link Subspace}.
* It handles the configuration, node creation, and listener management, while delegating the actual
* storage-specific read and write operations to concrete subclasses through the {@code fetchNodeInternal}
* and {@code writeNodeInternal} abstract methods.
*
* @param <N> the type of {@link NodeReference} used to reference nodes in the graph
*/
abstract class AbstractStorageAdapter<N extends NodeReference> implements StorageAdapter<N> {
@Nonnull
private static final Logger logger = LoggerFactory.getLogger(AbstractStorageAdapter.class);

@Nonnull
private final Config config;
@Nonnull
private final NodeFactory<N> nodeFactory;
@Nonnull
private final Subspace subspace;
@Nonnull
private final OnWriteListener onWriteListener;
@Nonnull
private final OnReadListener onReadListener;

@Nonnull
private final Subspace dataSubspace;

/**
* Constructs a new {@code AbstractStorageAdapter}.
* <p>
* This constructor initializes the adapter with the necessary configuration,
* factories, and listeners for managing an HNSW graph. It also sets up a
* dedicated data subspace within the provided main subspace for storing node data.
*
* @param config the HNSW graph configuration
* @param nodeFactory the factory to create new nodes of type {@code <N>}
* @param subspace the primary subspace for storing all graph-related data
* @param onWriteListener the listener to be called on write operations
* @param onReadListener the listener to be called on read operations
*/
protected AbstractStorageAdapter(@Nonnull final Config config, @Nonnull final NodeFactory<N> nodeFactory,
@Nonnull final Subspace subspace,
@Nonnull final OnWriteListener onWriteListener,
@Nonnull final OnReadListener onReadListener) {
this.config = config;
this.nodeFactory = nodeFactory;
this.subspace = subspace;
this.onWriteListener = onWriteListener;
this.onReadListener = onReadListener;
this.dataSubspace = subspace.subspace(Tuple.from(SUBSPACE_PREFIX_DATA));
}

@Override
@Nonnull
public Config getConfig() {
return config;
}

@Nonnull
@Override
public NodeFactory<N> getNodeFactory() {
return nodeFactory;
}

@Override
@Nonnull
public Subspace getSubspace() {
return subspace;
}

/**
* Gets the cached subspace for the data associated with this component.
* <p>
* The data subspace defines the portion of the directory space where the data
* for this component is stored.
*
* @return the non-null {@link Subspace} for the data
*/
@Override
@Nonnull
public Subspace getDataSubspace() {
return dataSubspace;
}

@Override
@Nonnull
public OnWriteListener getOnWriteListener() {
return onWriteListener;
}

@Override
@Nonnull
public OnReadListener getOnReadListener() {
return onReadListener;
}

/**
* Asynchronously fetches a node from a specific layer of the HNSW.
* <p>
* The node is identified by its {@code layer} and {@code primaryKey}. The entire fetch operation is
* performed within the given {@link ReadTransaction}. After the underlying
* fetch operation completes, the retrieved node is validated by the
* {@link #checkNode(Node)} method before the returned future is completed.
*
* @param readTransaction the non-null transaction to use for the read operation
* @param storageTransform an affine vector transformation operator that is used to transform the fetched vector
* into the storage space that is currently being used
* @param layer the layer of the tree from which to fetch the node
* @param primaryKey the non-null primary key that identifies the node to fetch
*
* @return a {@link CompletableFuture} that will complete with the fetched {@link AbstractNode}
* once it has been read from storage and validated
*/
@Nonnull
@Override
public CompletableFuture<AbstractNode<N>> fetchNode(@Nonnull final ReadTransaction readTransaction,
@Nonnull final AffineOperator storageTransform,
int layer, @Nonnull Tuple primaryKey) {
return fetchNodeInternal(readTransaction, storageTransform, layer, primaryKey).thenApply(this::checkNode);
}

/**
* Asynchronously fetches a specific node from the data store for a given layer and primary key.
* <p>
* This is an internal, abstract method that concrete subclasses must implement to define
* the storage-specific logic for retrieving a node. The operation is performed within the
* context of the provided {@link ReadTransaction}.
*
* @param readTransaction the transaction to use for the read operation; must not be {@code null}
* @param storageTransform an affine vector transformation operator that is used to transform the fetched vector
* into the storage space that is currently being used
* @param layer the layer index from which to fetch the node
* @param primaryKey the primary key that uniquely identifies the node to be fetched; must not be {@code null}
*
* @return a {@link CompletableFuture} that will be completed with the fetched {@link AbstractNode}.
* The future will complete with {@code null} if no node is found for the given key and layer.
*/
@Nonnull
protected abstract CompletableFuture<AbstractNode<N>> fetchNodeInternal(@Nonnull ReadTransaction readTransaction,
@Nonnull AffineOperator storageTransform,
int layer, @Nonnull Tuple primaryKey);

/**
* Method to perform basic invariant check(s) on a newly-fetched node.
*
* @param node the node to check
* was passed in
*
* @return the node that was passed in
*/
@Nullable
private <T extends Node<N>> T checkNode(@Nullable final T node) {
return node;
}

/**
* Writes a given node and its neighbor modifications to the underlying storage.
* <p>
* This operation is executed within the context of the provided {@link Transaction}.
* It handles persisting the node's data at a specific {@code layer} and applies
* the changes to its neighbors as defined in the {@link NeighborsChangeSet}.
* This method delegates the core writing logic to an internal method and provides
* debug logging upon completion.
*
* @param transaction the non-null {@link Transaction} context for this write operation
* @param quantizer the quantizer to use
* @param node the non-null {@link Node} to be written to storage
* @param layer the layer index where the node is being written
* @param changeSet the non-null {@link NeighborsChangeSet} detailing the modifications
* to the node's neighbors
*/
@Override
public void writeNode(@Nonnull final Transaction transaction, @Nonnull final Quantizer quantizer,
@Nonnull final AbstractNode<N> node, final int layer,
@Nonnull final NeighborsChangeSet<N> changeSet) {
writeNodeInternal(transaction, quantizer, node, layer, changeSet);
if (logger.isTraceEnabled()) {
logger.trace("written node with key={} at layer={}", node.getPrimaryKey(), layer);
}
}

/**
* Writes a single node to the data store as part of a larger transaction.
* <p>
* This is an abstract method that concrete implementations must provide.
* It is responsible for the low-level persistence of the given {@code node} at a
* specific {@code layer}. The implementation should also handle the modifications
* to the node's neighbors, as detailed in the {@code changeSet}.
*
* @param transaction the non-null transaction context for the write operation
* @param quantizer the quantizer to use
* @param node the non-null {@link Node} to write
* @param layer the layer or level of the node in the structure
* @param changeSet the non-null {@link NeighborsChangeSet} detailing additions or
* removals of neighbor links
*/
protected abstract void writeNodeInternal(@Nonnull Transaction transaction, @Nonnull Quantizer quantizer,
@Nonnull AbstractNode<N> node, int layer,
@Nonnull NeighborsChangeSet<N> changeSet);
}
Loading
Loading