forked from nmeisburger/LSH-Tables
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLSH.cpp
128 lines (107 loc) · 3.7 KB
/
LSH.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "LSH.h"
LSH::LSH() {
L = NUM_TABLES;
reservoir_size = RESERVOIR_SIZE;
range_pow = RANGE_POW;
range = 1 << range_pow;
reservoirs = new Reservoir *[L];
for (int i = 0; i < L; i++) {
reservoirs[i] = new Reservoir[range]();
}
}
void LSH::insert(unsigned int num_items, unsigned int *items, unsigned int *hashes) {
#pragma omp parallel for default(none) shared(num_items, hashes, items)
for (size_t n = 0; n < num_items; n++) {
for (size_t table = 0; table < L; table++) {
reservoirs[table][hashes[n * L + table]].add(items[n]);
}
}
}
void LSH::insert(unsigned int item, unsigned int *hashes) {
for (size_t table = 0; table < L; table++) {
reservoirs[table][hashes[table]].add(item);
}
}
void LSH::retrieve(unsigned int num_query, unsigned int *hashes, unsigned int *results_buffer) {
#pragma omp parallel for default(none) shared(num_query, hashes, results_buffer)
for (size_t query = 0; query < num_query; query++) {
for (size_t table = 0; table < L; table++) {
size_t loc = query * L + table;
reservoirs[table][hashes[loc]].retrieve(results_buffer + loc * reservoir_size);
}
}
}
void LSH::top_k(unsigned int num_query, unsigned int top_k, unsigned int *hashes,
unsigned int *selection) {
unsigned int *extracted_reservoirs = new unsigned int[num_query * L * reservoir_size];
this->retrieve(num_query, hashes, extracted_reservoirs);
unsigned int block = L * reservoir_size;
for (size_t query = 0; query < num_query; query++) {
unsigned int *start = extracted_reservoirs + query * block;
std::sort(start, start + block);
std::vector<std::pair<unsigned int, unsigned int>> counts;
unsigned int count = 0;
unsigned int last = *start;
for (size_t i = 0; i < block; i++) {
if (last == start[i]) {
count++;
} else {
if (last != EMPTY) {
counts.push_back(std::make_pair(last, count));
}
count = 1;
last = start[i];
}
}
if (last != EMPTY) {
counts.push_back(std::make_pair(last, count));
}
std::sort(counts.begin(), counts.end(),
[&counts](std::pair<int, int> a, std::pair<int, int> b) {
return a.second > b.second;
});
size_t k;
for (k = 0; k < std::min(top_k, (unsigned int)counts.size()); k++) {
selection[query * top_k + k] = counts[k].first;
}
for (; k < top_k; k++) {
selection[query * top_k + k] = EMPTY;
}
}
delete[] extracted_reservoirs;
}
void LSH::reset() {
for (size_t t = 0; t < L; t++) {
for (size_t r = 0; r < range; r++) {
reservoirs[t][r].reset();
}
}
}
void LSH::view() {
for (size_t t = 0; t < L; t++) {
printf("LSH Table %lu\n", t);
for (size_t r = 0; r < range; r++) {
reservoirs[t][r].view();
}
printf("\n");
}
}
void LSH::add_random_items(unsigned int num_items, bool verbose) {
unsigned int *items = new unsigned int[num_items];
unsigned int *hashes = new unsigned int[num_items * L];
for (size_t i = 0; i < num_items; i++) {
items[i] = i;
if (verbose)
printf("Item: %lu -> { ", i);
for (size_t h = 0; h < L; h++) {
hashes[i * L + h] = rand() % range;
if (verbose)
printf("%lu ", hashes[i * L + h]);
}
if (verbose)
printf("}\n");
}
insert(num_items, items, hashes);
delete[] items;
delete[] hashes;
}