Skip to content

[MOD-9576] make CreatePreprocessorsContainerParams templated and move it to header file #670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/VecSim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ add_library(VectorSimilarity ${VECSIM_LIBTYPE}
index_factories/tiered_factory.cpp
index_factories/svs_factory.cpp
index_factories/index_factory.cpp
index_factories/components/components_factory.cpp
algorithms/hnsw/visited_nodes_handler.cpp
vec_sim.cpp
vec_sim_debug.cpp
Expand Down
23 changes: 0 additions & 23 deletions src/VecSim/index_factories/components/components_factory.cpp

This file was deleted.

47 changes: 45 additions & 2 deletions src/VecSim/index_factories/components/components_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,52 @@
#include "VecSim/index_factories/components/preprocessors_factory.h"
#include "VecSim/spaces/computer/calculator.h"

/**
* @brief Creates parameters for a preprocessors container based on the given metric, dimension,
* normalization flag, and alignment.
*
* @tparam DataType The data type of the vector elements (e.g., float, int).
* @param metric The similarity metric to be used (e.g., Cosine, Inner Product).
* @param dim The dimensionality of the vectors.
* @param is_normalized A flag indicating whether the vectors are already normalized.
* @param alignment The alignment requirement for the data.
* @return A PreprocessorsContainerParams object containing the processed parameters:
* - metric: The adjusted metric based on the input and normalization flag.
* - dim: The dimensionality of the vectors.
* - alignment: The alignment requirement for the data.
* - processed_bytes_count: The size of the processed data blob in bytes.
*
* @details
* If the metric is Cosine and the data type is integral, the processed bytes count may include
* additional space for normalization. If the vectors are already
* normalized (is_normalized == true), the metric is adjusted to Inner Product (IP) to skip
* redundant normalization during preprocessing.
*/
template <typename DataType>
PreprocessorsContainerParams CreatePreprocessorsContainerParams(VecSimMetric metric, size_t dim,
bool is_normalized,
unsigned char alignment);
unsigned char alignment) {
// By default the processed blob size is the same as the original blob size.
size_t processed_bytes_count = dim * sizeof(DataType);

VecSimMetric pp_metric = metric;
if (metric == VecSimMetric_Cosine) {
// if metric is cosine and DataType is integral, the processed_bytes_count includes the
// norm appended to the vector.
if (std::is_integral<DataType>::value) {
processed_bytes_count += sizeof(float);
}
// if is_normalized == true, we will enforce skipping normalizing vector and query blobs by
// setting the metric to IP.
if (is_normalized) {
pp_metric = VecSimMetric_IP;
}
}
return {.metric = pp_metric,
.dim = dim,
.alignment = alignment,
.processed_bytes_count = processed_bytes_count};
}

template <typename DataType, typename DistType>
IndexComponents<DataType, DistType>
Expand All @@ -29,7 +72,7 @@ CreateIndexComponents(std::shared_ptr<VecSimAllocator> allocator, VecSimMetric m
auto indexCalculator = new (allocator) DistanceCalculatorCommon<DistType>(allocator, distFunc);

PreprocessorsContainerParams ppParams =
CreatePreprocessorsContainerParams(metric, dim, is_normalized, alignment);
CreatePreprocessorsContainerParams<DataType>(metric, dim, is_normalized, alignment);
auto preprocessors = CreatePreprocessorsContainer<DataType>(allocator, ppParams);

return {indexCalculator, preprocessors};
Expand Down
5 changes: 3 additions & 2 deletions src/VecSim/index_factories/components/preprocessors_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ struct PreprocessorsContainerParams {
VecSimMetric metric;
size_t dim;
unsigned char alignment;
size_t processed_bytes_count;
};

template <typename DataType>
Expand All @@ -25,8 +26,8 @@ CreatePreprocessorsContainer(std::shared_ptr<VecSimAllocator> allocator,
if (params.metric == VecSimMetric_Cosine) {
auto multiPPContainer =
new (allocator) MultiPreprocessorsContainer<DataType, 1>(allocator, params.alignment);
auto cosine_preprocessor =
new (allocator) CosinePreprocessor<DataType>(allocator, params.dim);
auto cosine_preprocessor = new (allocator)
CosinePreprocessor<DataType>(allocator, params.dim, params.processed_bytes_count);
int next_valid_pp_index = multiPPContainer->addPreprocessor(cosine_preprocessor);
UNUSED(next_valid_pp_index);
assert(next_valid_pp_index != -1 && "Cosine preprocessor was not added correctly");
Expand Down
26 changes: 13 additions & 13 deletions src/VecSim/spaces/computer/preprocessor_container.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,37 @@
#include "VecSim/spaces/computer/preprocessor_container.h"

ProcessedBlobs PreprocessorsContainerAbstract::preprocess(const void *original_blob,
size_t processed_bytes_count) const {
return ProcessedBlobs(preprocessForStorage(original_blob, processed_bytes_count),
preprocessQuery(original_blob, processed_bytes_count));
size_t input_blob_size) const {
return ProcessedBlobs(preprocessForStorage(original_blob, input_blob_size),
preprocessQuery(original_blob, input_blob_size));
}

MemoryUtils::unique_blob
PreprocessorsContainerAbstract::preprocessForStorage(const void *original_blob,
size_t processed_bytes_count) const {
size_t input_blob_size) const {
return wrapWithDummyDeleter(const_cast<void *>(original_blob));
}

MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(
const void *original_blob, size_t processed_bytes_count, bool force_copy) const {
return maybeCopyToAlignedMem(original_blob, processed_bytes_count, force_copy);
MemoryUtils::unique_blob PreprocessorsContainerAbstract::preprocessQuery(const void *original_blob,
size_t input_blob_size,
bool force_copy) const {
return maybeCopyToAlignedMem(original_blob, input_blob_size, force_copy);
}

void PreprocessorsContainerAbstract::preprocessQueryInPlace(void *blob,
size_t processed_bytes_count) const {}
size_t input_blob_size) const {}

Check warning on line 30 in src/VecSim/spaces/computer/preprocessor_container.cpp

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessor_container.cpp#L30

Added line #L30 was not covered by tests

void PreprocessorsContainerAbstract::preprocessStorageInPlace(void *blob,
size_t processed_bytes_count) const {}
size_t input_blob_size) const {}

Check warning on line 33 in src/VecSim/spaces/computer/preprocessor_container.cpp

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessor_container.cpp#L33

Added line #L33 was not covered by tests

MemoryUtils::unique_blob PreprocessorsContainerAbstract::maybeCopyToAlignedMem(
const void *original_blob, size_t blob_bytes_count, bool force_copy) const {
const void *original_blob, size_t input_blob_size, bool force_copy) const {
bool needs_copy =
force_copy || (this->alignment && ((uintptr_t)original_blob % this->alignment != 0));

if (needs_copy) {
auto aligned_mem = this->allocator->allocate_aligned(blob_bytes_count, this->alignment);
// TODO: handle original_blob_size != processed_bytes_count
memcpy(aligned_mem, original_blob, blob_bytes_count);
auto aligned_mem = this->allocator->allocate_aligned(input_blob_size, this->alignment);
memcpy(aligned_mem, original_blob, input_blob_size);
return this->wrapAllocated(aligned_mem);
}

Expand Down
62 changes: 30 additions & 32 deletions src/VecSim/spaces/computer/preprocessor_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,19 @@
PreprocessorsContainerAbstract(std::shared_ptr<VecSimAllocator> allocator,
unsigned char alignment)
: VecsimBaseObject(allocator), alignment(alignment) {}
virtual ProcessedBlobs preprocess(const void *original_blob,
size_t processed_bytes_count) const;
// It is assumed that the resulted query blob is aligned.
virtual ProcessedBlobs preprocess(const void *original_blob, size_t input_blob_size) const;

virtual MemoryUtils::unique_blob preprocessForStorage(const void *original_blob,
size_t processed_bytes_count) const;
size_t input_blob_size) const;

// It is assumed that the resulted query blob is aligned.
virtual MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
size_t processed_bytes_count,
size_t input_blob_size,
bool force_copy = false) const;

virtual void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const;

virtual void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const;
virtual void preprocessQueryInPlace(void *blob, size_t input_blob_size) const;
virtual void preprocessStorageInPlace(void *blob, size_t input_blob_size) const;

unsigned char getAlignment() const { return alignment; }

Expand All @@ -43,7 +43,7 @@

// Allocate and copy the blob only if the original blob is not aligned.
MemoryUtils::unique_blob maybeCopyToAlignedMem(const void *original_blob,
size_t blob_bytes_count,
size_t input_blob_size,
bool force_copy = false) const;

MemoryUtils::unique_blob wrapAllocated(void *blob) const {
Expand Down Expand Up @@ -82,19 +82,17 @@
*/
int addPreprocessor(PreprocessorInterface *preprocessor);

ProcessedBlobs preprocess(const void *original_blob,
size_t processed_bytes_count) const override;
ProcessedBlobs preprocess(const void *original_blob, size_t input_blob_size) const override;

MemoryUtils::unique_blob preprocessForStorage(const void *original_blob,
size_t processed_bytes_count) const override;
size_t input_blob_size) const override;

MemoryUtils::unique_blob preprocessQuery(const void *original_blob,
size_t processed_bytes_count,
MemoryUtils::unique_blob preprocessQuery(const void *original_blob, size_t input_blob_size,
bool force_copy = false) const override;

void preprocessQueryInPlace(void *blob, size_t processed_bytes_count) const override;
void preprocessQueryInPlace(void *blob, size_t input_blob_size) const override;

void preprocessStorageInPlace(void *blob, size_t processed_bytes_count) const override;
void preprocessStorageInPlace(void *blob, size_t input_blob_size) const override;

#ifdef BUILD_TESTS
std::array<PreprocessorInterface *, n_preprocessors> getPreprocessors() const {
Expand Down Expand Up @@ -159,12 +157,13 @@
}

template <typename DataType, size_t n_preprocessors>
ProcessedBlobs MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(
const void *original_blob, size_t processed_bytes_count) const {
ProcessedBlobs
MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *original_blob,
size_t input_blob_size) const {
// No preprocessors were added yet.
if (preprocessors[0] == nullptr) {
// query might need to be aligned
auto query_ptr = this->maybeCopyToAlignedMem(original_blob, processed_bytes_count);
auto query_ptr = this->maybeCopyToAlignedMem(original_blob, input_blob_size);
return ProcessedBlobs(
std::move(Base::wrapWithDummyDeleter(const_cast<void *>(original_blob))),
std::move(query_ptr));
Expand All @@ -175,8 +174,7 @@
for (auto pp : preprocessors) {
if (!pp)
break;
pp->preprocess(original_blob, storage_blob, query_blob, processed_bytes_count,
this->alignment);
pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment);
}
// At least one blob was allocated.

Expand All @@ -194,7 +192,7 @@

if (query_blob == nullptr) { // we processed only the storage
// query might need to be aligned
auto query_ptr = this->maybeCopyToAlignedMem(original_blob, processed_bytes_count);
auto query_ptr = this->maybeCopyToAlignedMem(original_blob, input_blob_size);
return ProcessedBlobs(std::move(this->wrapAllocated(storage_blob)), std::move(query_ptr));
}

Expand All @@ -206,13 +204,13 @@
template <typename DataType, size_t n_preprocessors>
MemoryUtils::unique_blob
MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessForStorage(
const void *original_blob, size_t processed_bytes_count) const {
const void *original_blob, size_t input_blob_size) const {

void *storage_blob = nullptr;
for (auto pp : preprocessors) {
if (!pp)
break;
pp->preprocessForStorage(original_blob, storage_blob, processed_bytes_count);
pp->preprocessForStorage(original_blob, storage_blob, input_blob_size);
}

return storage_blob ? std::move(this->wrapAllocated(storage_blob))
Expand All @@ -221,40 +219,40 @@

template <typename DataType, size_t n_preprocessors>
MemoryUtils::unique_blob MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessQuery(
const void *original_blob, size_t processed_bytes_count, bool force_copy) const {
const void *original_blob, size_t input_blob_size, bool force_copy) const {

void *query_blob = nullptr;
for (auto pp : preprocessors) {
if (!pp)
break;
// modifies the memory in place
pp->preprocessQuery(original_blob, query_blob, processed_bytes_count, this->alignment);
pp->preprocessQuery(original_blob, query_blob, input_blob_size, this->alignment);
}
return query_blob ? std::move(this->wrapAllocated(query_blob))
: std::move(this->maybeCopyToAlignedMem(original_blob, processed_bytes_count,
force_copy));
return query_blob
? std::move(this->wrapAllocated(query_blob))
: std::move(this->maybeCopyToAlignedMem(original_blob, input_blob_size, force_copy));
}

template <typename DataType, size_t n_preprocessors>
void MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessQueryInPlace(
void *blob, size_t processed_bytes_count) const {
void *blob, size_t input_blob_size) const {

for (auto pp : preprocessors) {
if (!pp)
break;
// modifies the memory in place
pp->preprocessQueryInPlace(blob, processed_bytes_count, this->alignment);
pp->preprocessQueryInPlace(blob, input_blob_size, this->alignment);
}
}

template <typename DataType, size_t n_preprocessors>
void MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocessStorageInPlace(
void *blob, size_t processed_bytes_count) const {
void *blob, size_t input_blob_size) const {

for (auto pp : preprocessors) {
if (!pp)
break;
// modifies the memory in place
pp->preprocessStorageInPlace(blob, processed_bytes_count);
pp->preprocessStorageInPlace(blob, input_blob_size);

Check warning on line 256 in src/VecSim/spaces/computer/preprocessor_container.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessor_container.h#L256

Added line #L256 was not covered by tests
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will be covered by svs code

}
}
Loading
Loading