Skip to content

Commit b2f7260

Browse files
committed
Code review s1e1: Extract complex logic to dedicated funcs
1 parent 5b839dc commit b2f7260

File tree

1 file changed

+90
-80
lines changed

1 file changed

+90
-80
lines changed

src/VecSim/algorithms/svs/svs_tiered.h

Lines changed: 90 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,50 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
561561
}
562562

563563
private:
564+
static void applySwapsToLabelsArray(std::vector<size_t> &labels,
565+
const std::vector<swap_record> &swaps) {
566+
// Enumerate journal and reflect swaps in the labels.
567+
// The journal contains tuples of (label, oldId, newId).
568+
// oldId is the index of the label in flat index before the swap.
569+
// newId is the index of the label in flat index after the swap.
570+
for (const auto &p : swaps) {
571+
auto label = std::get<0>(p);
572+
auto oldId = std::get<1>(p);
573+
auto newId = std::get<2>(p);
574+
575+
// If oldId is out of bounds - skip,
576+
// but if newId is in - do no delete on newId.
577+
if (oldId >= labels.size()) {
578+
if (newId < labels.size()) {
579+
labels[newId] = SKIP_LABEL;
580+
}
581+
continue; // Next record.
582+
}
583+
584+
// If label is SKIP_LABEL, it means that the vector was not moved but updated
585+
// in-place. We do not need to remove it from labels, just skip it in
586+
// deleteVector().
587+
if (label == SKIP_LABEL) {
588+
labels[oldId] = SKIP_LABEL;
589+
continue; // Next record.
590+
}
591+
592+
// If recorded label does not match the label in labels,
593+
// it means that another vector was moved to the old position - do not delete it.
594+
if (label != labels[oldId]) {
595+
labels[oldId] = SKIP_LABEL;
596+
continue;
597+
}
598+
599+
// remove the old label from labels at position oldId
600+
labels.erase(labels.begin() + oldId);
601+
if (newId != oldId) { // swap mode
602+
// Update the label at newId position
603+
labels[newId] = label;
604+
}
605+
}
606+
}
607+
564608
void updateSVSIndex() {
565609
std::vector<labelType> labels_to_move;
566610
std::vector<DataType> vectors_to_move;
@@ -596,46 +640,9 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
596640
{ // lock frontend index for writing and delete moved vectors
597641
std::lock_guard lock(this->flatIndexGuard);
598642

599-
// Enumerate journal and reflect swaps in the labels_to_move.
600-
// The journal contains tuples of (label, oldId, newId).
601-
// oldId is the index of the label in flat index before the swap.
602-
// newId is the index of the label in flat index after the swap.
603-
for (const auto &p : this->swaps_journal) {
604-
auto label = std::get<0>(p);
605-
auto oldId = std::get<1>(p);
606-
auto newId = std::get<2>(p);
607-
608-
// If oldId is out of bounds - skip,
609-
// but if newId is in - do no delete on newId.
610-
if (oldId >= labels_to_move.size()) {
611-
if (newId < labels_to_move.size()) {
612-
labels_to_move[newId] = SKIP_LABEL;
613-
}
614-
continue; // Next record.
615-
}
616-
617-
// If label is SKIP_LABEL, it means that the vector was not moved but updated
618-
// in-place. We do not need to remove it from labels_to_move, just skip it in
619-
// deleteVector().
620-
if (label == SKIP_LABEL) {
621-
labels_to_move[oldId] = SKIP_LABEL;
622-
continue; // Next record.
623-
}
624-
625-
// If recorded label does not match the label in labels_to_move,
626-
// it means that another vector was moved to the old position - do not delete it.
627-
if (label != labels_to_move[oldId]) {
628-
labels_to_move[oldId] = SKIP_LABEL;
629-
continue;
630-
}
643+
// Apply swaps from journal to labels_to_move to reflect changes made in meanwhile.
644+
applySwapsToLabelsArray(labels_to_move, this->swaps_journal);
631645

632-
// remove the old label from labels_to_move at position oldId
633-
labels_to_move.erase(labels_to_move.begin() + oldId);
634-
if (newId != oldId) { // swap mode
635-
// Update the label at newId position
636-
labels_to_move[newId] = label;
637-
}
638-
}
639646
// delete vectors from the frontend index in reverse order
640647
// it increases the chance of avoiding swaps in the frontend index and performance
641648
// improvement
@@ -763,6 +770,49 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
763770
return ret;
764771
}
765772

773+
int deleteAndRecordSwaps_Unsafe(labelType label) {
774+
auto deleting_ids = this->frontendIndex->getElementIds(label);
775+
if (deleting_ids.empty()) {
776+
// If there are no ids to delete, return 0.
777+
return 0;
778+
}
779+
780+
// assert if all elements of deleting_ids are unique
781+
assert(std::set(deleting_ids.begin(), deleting_ids.end()).size() == deleting_ids.size() &&
782+
"deleting_ids should contain unique ids");
783+
784+
// Sort deleting_ids by id descending order
785+
std::sort(deleting_ids.begin(), deleting_ids.end(),
786+
[](const auto &a, const auto &b) { return a > b; });
787+
788+
// Delete vector from the frontend index.
789+
auto updated_ids = this->frontendIndex->deleteVectorAndGetUpdatedIds(label);
790+
// Record swaps in the journal.
791+
for (auto id : deleting_ids) {
792+
auto it = updated_ids.find(id);
793+
if (it != updated_ids.end()) {
794+
assert(id == it->first && "id in updated_ids should match the id in deleting_ids");
795+
auto newId = id;
796+
auto oldId = it->second.first;
797+
auto oldLabel = it->second.second;
798+
this->swaps_journal.emplace_back(oldLabel, oldId, newId);
799+
// Erase in assertion for debug compilation only
800+
// NOTE: There are extra braces to wrap comma C++ operator
801+
assert((updated_ids.erase(it), true));
802+
} else {
803+
// No swap, just delete is marked by oldId == newId == deleted id
804+
this->swaps_journal.emplace_back(label, id, id);
805+
}
806+
}
807+
808+
// After processing all deleting_ids, updated_ids should be empty in debug
809+
// compilation.
810+
assert(updated_ids.empty() &&
811+
"updated_ids should be empty after processing all deleting_ids");
812+
813+
return deleting_ids.size();
814+
}
815+
766816
int deleteVector(labelType label) override {
767817
int ret = 0;
768818
auto svs_index = GetSVSIndex();
@@ -776,47 +826,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
776826

777827
if (label_exists) {
778828
std::lock_guard lock(this->flatIndexGuard);
779-
780-
auto deleting_ids = this->frontendIndex->getElementIds(label);
781-
// assert if all elements of deleting_ids are unique
782-
assert(std::set<idType>(deleting_ids.begin(), deleting_ids.end()).size() ==
783-
deleting_ids.size() &&
784-
"deleting_ids should contain unique ids");
785-
786-
if (!deleting_ids.empty()) {
787-
// Sort deleting_ids by id descending order
788-
std::sort(deleting_ids.begin(), deleting_ids.end(),
789-
[](const auto &a, const auto &b) { return a > b; });
790-
791-
// Delete vector from the frontend index.
792-
auto updated_ids = this->frontendIndex->deleteVectorAndGetUpdatedIds(label);
793-
// Record swaps in the journal.
794-
for (auto id : deleting_ids) {
795-
auto it = updated_ids.find(id);
796-
if (it != updated_ids.end()) {
797-
assert(id == it->first &&
798-
"id in updated_ids should match the id in deleting_ids");
799-
auto newId = id;
800-
auto oldId = it->second.first;
801-
auto oldLabel = it->second.second;
802-
this->swaps_journal.emplace_back(oldLabel, oldId, newId);
803-
// Erase in assertion for debug compilation only
804-
// NOTE: There are extra braces to wrap comma C++ operator
805-
assert((updated_ids.erase(it), true));
806-
} else {
807-
// No swap, just delete is marked by oldId == newId == deleted id
808-
this->swaps_journal.emplace_back(label, id, id);
809-
}
810-
}
811-
812-
// After processing all deleting_ids, updated_ids should be empty in debug
813-
// compilation.
814-
assert(updated_ids.empty() &&
815-
"updated_ids should be empty after processing all deleting_ids");
816-
817-
ret = deleting_ids.size();
818-
assert(ret >= 1 && "unexpected flat index deleteVector result");
819-
}
829+
ret = this->deleteAndRecordSwaps_Unsafe(label);
820830
}
821831
{
822832
std::lock_guard lock(this->mainIndexGuard);

0 commit comments

Comments
 (0)