@@ -561,6 +561,50 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
561
561
}
562
562
563
563
private:
564
+ static void applySwapsToLabelsArray (std::vector<size_t > &labels,
565
+ const std::vector<swap_record> &swaps) {
566
+ // Enumerate journal and reflect swaps in the labels.
567
+ // The journal contains tuples of (label, oldId, newId).
568
+ // oldId is the index of the label in flat index before the swap.
569
+ // newId is the index of the label in flat index after the swap.
570
+ for (const auto &p : swaps) {
571
+ auto label = std::get<0 >(p);
572
+ auto oldId = std::get<1 >(p);
573
+ auto newId = std::get<2 >(p);
574
+
575
+ // If oldId is out of bounds - skip,
576
+ // but if newId is in - do no delete on newId.
577
+ if (oldId >= labels.size ()) {
578
+ if (newId < labels.size ()) {
579
+ labels[newId] = SKIP_LABEL;
580
+ }
581
+ continue ; // Next record.
582
+ }
583
+
584
+ // If label is SKIP_LABEL, it means that the vector was not moved but updated
585
+ // in-place. We do not need to remove it from labels, just skip it in
586
+ // deleteVector().
587
+ if (label == SKIP_LABEL) {
588
+ labels[oldId] = SKIP_LABEL;
589
+ continue ; // Next record.
590
+ }
591
+
592
+ // If recorded label does not match the label in labels,
593
+ // it means that another vector was moved to the old position - do not delete it.
594
+ if (label != labels[oldId]) {
595
+ labels[oldId] = SKIP_LABEL;
596
+ continue ;
597
+ }
598
+
599
+ // remove the old label from labels at position oldId
600
+ labels.erase (labels.begin () + oldId);
601
+ if (newId != oldId) { // swap mode
602
+ // Update the label at newId position
603
+ labels[newId] = label;
604
+ }
605
+ }
606
+ }
607
+
564
608
void updateSVSIndex () {
565
609
std::vector<labelType> labels_to_move;
566
610
std::vector<DataType> vectors_to_move;
@@ -596,46 +640,9 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
596
640
{ // lock frontend index for writing and delete moved vectors
597
641
std::lock_guard lock (this ->flatIndexGuard );
598
642
599
- // Enumerate journal and reflect swaps in the labels_to_move.
600
- // The journal contains tuples of (label, oldId, newId).
601
- // oldId is the index of the label in flat index before the swap.
602
- // newId is the index of the label in flat index after the swap.
603
- for (const auto &p : this ->swaps_journal ) {
604
- auto label = std::get<0 >(p);
605
- auto oldId = std::get<1 >(p);
606
- auto newId = std::get<2 >(p);
607
-
608
- // If oldId is out of bounds - skip,
609
- // but if newId is in - do no delete on newId.
610
- if (oldId >= labels_to_move.size ()) {
611
- if (newId < labels_to_move.size ()) {
612
- labels_to_move[newId] = SKIP_LABEL;
613
- }
614
- continue ; // Next record.
615
- }
616
-
617
- // If label is SKIP_LABEL, it means that the vector was not moved but updated
618
- // in-place. We do not need to remove it from labels_to_move, just skip it in
619
- // deleteVector().
620
- if (label == SKIP_LABEL) {
621
- labels_to_move[oldId] = SKIP_LABEL;
622
- continue ; // Next record.
623
- }
624
-
625
- // If recorded label does not match the label in labels_to_move,
626
- // it means that another vector was moved to the old position - do not delete it.
627
- if (label != labels_to_move[oldId]) {
628
- labels_to_move[oldId] = SKIP_LABEL;
629
- continue ;
630
- }
643
+ // Apply swaps from journal to labels_to_move to reflect changes made in meanwhile.
644
+ applySwapsToLabelsArray (labels_to_move, this ->swaps_journal );
631
645
632
- // remove the old label from labels_to_move at position oldId
633
- labels_to_move.erase (labels_to_move.begin () + oldId);
634
- if (newId != oldId) { // swap mode
635
- // Update the label at newId position
636
- labels_to_move[newId] = label;
637
- }
638
- }
639
646
// delete vectors from the frontend index in reverse order
640
647
// it increases the chance of avoiding swaps in the frontend index and performance
641
648
// improvement
@@ -763,6 +770,49 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
763
770
return ret;
764
771
}
765
772
773
+ int deleteAndRecordSwaps_Unsafe (labelType label) {
774
+ auto deleting_ids = this ->frontendIndex ->getElementIds (label);
775
+ if (deleting_ids.empty ()) {
776
+ // If there are no ids to delete, return 0.
777
+ return 0 ;
778
+ }
779
+
780
+ // assert if all elements of deleting_ids are unique
781
+ assert (std::set (deleting_ids.begin (), deleting_ids.end ()).size () == deleting_ids.size () &&
782
+ " deleting_ids should contain unique ids" );
783
+
784
+ // Sort deleting_ids by id descending order
785
+ std::sort (deleting_ids.begin (), deleting_ids.end (),
786
+ [](const auto &a, const auto &b) { return a > b; });
787
+
788
+ // Delete vector from the frontend index.
789
+ auto updated_ids = this ->frontendIndex ->deleteVectorAndGetUpdatedIds (label);
790
+ // Record swaps in the journal.
791
+ for (auto id : deleting_ids) {
792
+ auto it = updated_ids.find (id);
793
+ if (it != updated_ids.end ()) {
794
+ assert (id == it->first && " id in updated_ids should match the id in deleting_ids" );
795
+ auto newId = id;
796
+ auto oldId = it->second .first ;
797
+ auto oldLabel = it->second .second ;
798
+ this ->swaps_journal .emplace_back (oldLabel, oldId, newId);
799
+ // Erase in assertion for debug compilation only
800
+ // NOTE: There are extra braces to wrap comma C++ operator
801
+ assert ((updated_ids.erase (it), true ));
802
+ } else {
803
+ // No swap, just delete is marked by oldId == newId == deleted id
804
+ this ->swaps_journal .emplace_back (label, id, id);
805
+ }
806
+ }
807
+
808
+ // After processing all deleting_ids, updated_ids should be empty in debug
809
+ // compilation.
810
+ assert (updated_ids.empty () &&
811
+ " updated_ids should be empty after processing all deleting_ids" );
812
+
813
+ return deleting_ids.size ();
814
+ }
815
+
766
816
int deleteVector (labelType label) override {
767
817
int ret = 0 ;
768
818
auto svs_index = GetSVSIndex ();
@@ -776,47 +826,7 @@ class TieredSVSIndex : public VecSimTieredIndex<DataType, float> {
776
826
777
827
if (label_exists) {
778
828
std::lock_guard lock (this ->flatIndexGuard );
779
-
780
- auto deleting_ids = this ->frontendIndex ->getElementIds (label);
781
- // assert if all elements of deleting_ids are unique
782
- assert (std::set<idType>(deleting_ids.begin (), deleting_ids.end ()).size () ==
783
- deleting_ids.size () &&
784
- " deleting_ids should contain unique ids" );
785
-
786
- if (!deleting_ids.empty ()) {
787
- // Sort deleting_ids by id descending order
788
- std::sort (deleting_ids.begin (), deleting_ids.end (),
789
- [](const auto &a, const auto &b) { return a > b; });
790
-
791
- // Delete vector from the frontend index.
792
- auto updated_ids = this ->frontendIndex ->deleteVectorAndGetUpdatedIds (label);
793
- // Record swaps in the journal.
794
- for (auto id : deleting_ids) {
795
- auto it = updated_ids.find (id);
796
- if (it != updated_ids.end ()) {
797
- assert (id == it->first &&
798
- " id in updated_ids should match the id in deleting_ids" );
799
- auto newId = id;
800
- auto oldId = it->second .first ;
801
- auto oldLabel = it->second .second ;
802
- this ->swaps_journal .emplace_back (oldLabel, oldId, newId);
803
- // Erase in assertion for debug compilation only
804
- // NOTE: There are extra braces to wrap comma C++ operator
805
- assert ((updated_ids.erase (it), true ));
806
- } else {
807
- // No swap, just delete is marked by oldId == newId == deleted id
808
- this ->swaps_journal .emplace_back (label, id, id);
809
- }
810
- }
811
-
812
- // After processing all deleting_ids, updated_ids should be empty in debug
813
- // compilation.
814
- assert (updated_ids.empty () &&
815
- " updated_ids should be empty after processing all deleting_ids" );
816
-
817
- ret = deleting_ids.size ();
818
- assert (ret >= 1 && " unexpected flat index deleteVector result" );
819
- }
829
+ ret = this ->deleteAndRecordSwaps_Unsafe (label);
820
830
}
821
831
{
822
832
std::lock_guard lock (this ->mainIndexGuard );
0 commit comments