forked from nmeisburger/LSH-Tables
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
97897df
commit 4535f9f
Showing
7 changed files
with
338 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#include "LSH.h" | ||
|
||
LSH::LSH() { | ||
|
||
L = NUM_TABLES; | ||
reservoir_size = RESERVOIR_SIZE; | ||
range_pow = RANGE_POW; | ||
range = 1 << range_pow; | ||
|
||
reservoirs = new Reservoir *[L]; | ||
for (int i = 0; i < L; i++) { | ||
reservoirs[i] = new Reservoir[range](); | ||
} | ||
} | ||
|
||
void LSH::insert(unsigned int num_items, unsigned int *items, unsigned int *hashes) { | ||
#pragma omp parallel for default(none) shared(num_items, hashes, items) | ||
for (size_t n = 0; n < num_items; n++) { | ||
for (size_t table = 0; table < L; table++) { | ||
reservoirs[table][hashes[n * L + table]].add(items[n]); | ||
} | ||
} | ||
} | ||
|
||
void LSH::insert(unsigned int item, unsigned int *hashes) { | ||
for (size_t table = 0; table < L; table++) { | ||
reservoirs[table][hashes[table]].add(item); | ||
} | ||
} | ||
|
||
void LSH::retrieve(unsigned int num_query, unsigned int *hashes, unsigned int *results_buffer) { | ||
|
||
#pragma omp parallel for default(none) shared(num_query, hashes, results_buffer) | ||
for (size_t query = 0; query < num_query; query++) { | ||
for (size_t table = 0; table < L; table++) { | ||
size_t loc = query * L + table; | ||
reservoirs[table][hashes[loc]].retrieve(results_buffer + loc * reservoir_size); | ||
} | ||
} | ||
} | ||
|
||
void LSH::top_k(unsigned int num_query, unsigned int top_k, unsigned int *hashes, | ||
unsigned int *selection) { | ||
|
||
unsigned int *extracted_reservoirs = new unsigned int[num_query * L * reservoir_size]; | ||
|
||
this->retrieve(num_query, hashes, extracted_reservoirs); | ||
|
||
unsigned int block = L * reservoir_size; | ||
for (size_t query = 0; query < num_query; query++) { | ||
unsigned int *start = extracted_reservoirs + query * block; | ||
std::sort(start, start + block); | ||
std::vector<std::pair<unsigned int, unsigned int>> counts; | ||
unsigned int count = 0; | ||
unsigned int last = *start; | ||
for (size_t i = 0; i < block; i++) { | ||
if (last == start[i]) { | ||
count++; | ||
} else { | ||
if (last != EMPTY) { | ||
counts.push_back(std::make_pair(last, count)); | ||
} | ||
count = 1; | ||
last = start[i]; | ||
} | ||
} | ||
if (last != EMPTY) { | ||
counts.push_back(std::make_pair(last, count)); | ||
} | ||
|
||
std::sort(counts.begin(), counts.end(), | ||
[&counts](std::pair<int, int> a, std::pair<int, int> b) { | ||
return a.second > b.second; | ||
}); | ||
|
||
size_t k; | ||
for (k = 0; k < std::min(top_k, (unsigned int)counts.size()); k++) { | ||
selection[query * top_k + k] = counts[k].first; | ||
} | ||
for (; k < top_k; k++) { | ||
selection[query * top_k + k] = EMPTY; | ||
} | ||
} | ||
|
||
delete[] extracted_reservoirs; | ||
} | ||
|
||
void LSH::reset() { | ||
for (size_t t = 0; t < L; t++) { | ||
for (size_t r = 0; r < range; r++) { | ||
reservoirs[t][r].reset(); | ||
} | ||
} | ||
} | ||
|
||
void LSH::view() { | ||
for (size_t t = 0; t < L; t++) { | ||
printf("LSH Table %lu\n", t); | ||
for (size_t r = 0; r < range; r++) { | ||
reservoirs[t][r].view(); | ||
} | ||
printf("\n"); | ||
} | ||
} | ||
|
||
void LSH::add_random_items(unsigned int num_items, bool verbose) { | ||
|
||
unsigned int *items = new unsigned int[num_items]; | ||
unsigned int *hashes = new unsigned int[num_items * L]; | ||
|
||
for (size_t i = 0; i < num_items; i++) { | ||
items[i] = i; | ||
if (verbose) | ||
printf("Item: %lu -> { ", i); | ||
for (size_t h = 0; h < L; h++) { | ||
hashes[i * L + h] = rand() % range; | ||
if (verbose) | ||
printf("%lu ", hashes[i * L + h]); | ||
} | ||
if (verbose) | ||
printf("}\n"); | ||
} | ||
|
||
insert(num_items, items, hashes); | ||
|
||
delete[] items; | ||
delete[] hashes; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#pragma once | ||
|
||
#include "Reservoir.h" | ||
#include <iostream> | ||
#include <omp.h> | ||
#include <vector> | ||
|
||
class LSH { | ||
private: | ||
unsigned int L; | ||
unsigned int reservoir_size; | ||
unsigned int range_pow; | ||
unsigned int range; | ||
Reservoir **reservoirs; | ||
|
||
public: | ||
LSH(); | ||
|
||
void insert(unsigned int num_items, unsigned int *items, unsigned int *hashes); | ||
|
||
void insert(unsigned int item, unsigned int *hashes); | ||
|
||
void retrieve(unsigned int num_query, unsigned int *hashes, unsigned int *results_buffer); | ||
|
||
void top_k(unsigned int num_query, unsigned int top_k, unsigned int *hashes, | ||
unsigned int *selection); | ||
|
||
void reset(); | ||
|
||
void view(); | ||
|
||
void add_random_items(unsigned int num_items, bool verbose); | ||
|
||
~LSH(); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
TARGET := lsh | ||
|
||
CPPOBJDIR = cppobjs | ||
|
||
CPPSOURCES := $(wildcard *.cpp) | ||
CPPOBJS := $(CPPSOURCES:%.cpp=$(CPPOBJDIR)/%.o) | ||
|
||
COBJDIR = cobjs | ||
|
||
CSOURCES := $(wildcard *.c) | ||
COBJS := $(CSOURCES:%.c=$(COBJDIR)/%.o) | ||
|
||
OBJS = $(CPPOBJS) $(COBJS) | ||
|
||
OPT_FLAGS := -fno-strict-aliasing -O2 -fopenmp | ||
|
||
INC := /usr/include/ | ||
|
||
LIB := -L/usr/lib64/ | ||
LIB += -fopenmp | ||
|
||
CXXFLAGS := -m64 -DUNIX -std=c++11 $(WARN_FLAGS) $(OPT_FLAGS) -I$(INC) | ||
CFLAGS := -m64 -DUNIX $(WARN_FLAGS) $(OPT_FLAGS) -I$(INC) | ||
|
||
LDFLAGS := $(LIB) | ||
|
||
.PHONY: clean | ||
|
||
$(TARGET): $(CPPOBJDIR) $(COBJDIR) $(CPPOBJS) $(COBJS) | ||
g++ -fPIC -o $(TARGET) $(CPPOBJS) $(LDFLAGS) | ||
|
||
$(CPPOBJS): $(CPPOBJDIR)/%.o: %.cpp | ||
@echo "compile $@ $<" | ||
g++ -fPIC $(CXXFLAGS) -c $< -o $@ | ||
|
||
$(COBJS): $(COBJDIR)/%.o: %.c | ||
@echo "compile $@ $<" | ||
gcc -fPIC $(CFLAGS) -c $< -o $@ | ||
|
||
$(CPPOBJDIR): | ||
@ mkdir -p $(CPPOBJDIR) | ||
|
||
$(COBJDIR): | ||
@ mkdir -p $(COBJDIR) | ||
|
||
clean: | ||
$(RM) $(TARGET) $(OBJ) | ||
$(RM) -rf $(CPPOBJDIR) | ||
$(RM) -rf $(COBJDIR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#include "Reservoir.h" | ||
|
||
Reservoir::Reservoir() { | ||
size = RESERVOIR_SIZE; | ||
count = 0; | ||
lock = new omp_lock_t(); | ||
|
||
omp_init_lock(lock); | ||
|
||
reservoir = new unsigned int[size]; | ||
|
||
for (size_t i = 0; i < size; i++) { | ||
reservoir[i] = EMPTY; | ||
} | ||
} | ||
|
||
void Reservoir::add(unsigned int item) { | ||
omp_set_lock(lock); | ||
if (count < size) { | ||
reservoir[count] = item; | ||
count++; | ||
} else { | ||
unsigned int loc = rand() % count; | ||
if (loc < size) { | ||
reservoir[loc] = item; | ||
} | ||
count++; | ||
} | ||
omp_unset_lock(lock); | ||
} | ||
|
||
void Reservoir::retrieve(unsigned int *buffer) { | ||
omp_set_lock(lock); | ||
std::copy(reservoir, reservoir + size, buffer); | ||
omp_unset_lock(lock); | ||
} | ||
|
||
unsigned int Reservoir::get_size() { return size; } | ||
|
||
unsigned int Reservoir::get_count() { return count; } | ||
|
||
void Reservoir::reset() { | ||
for (size_t i = 0; i < size; i++) { | ||
reservoir[i] = EMPTY; | ||
} | ||
count = 0; | ||
} | ||
|
||
void Reservoir::view() { | ||
printf("Reservoir [%d/%d] ", count, size); | ||
for (size_t i = 0; i < std::min(count, size); i++) { | ||
printf("%u ", reservoir[i]); | ||
} | ||
printf("\n"); | ||
} | ||
|
||
Reservoir::~Reservoir() { | ||
omp_destroy_lock(lock); | ||
delete[] reservoir; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#pragma once | ||
|
||
#include "config.h" | ||
#include <algorithm> | ||
#include <iostream> | ||
#include <omp.h> | ||
|
||
#define EMPTY -1 | ||
|
||
class Reservoir { | ||
private: | ||
unsigned int size; | ||
unsigned int count; | ||
omp_lock_t *lock; | ||
unsigned int *reservoir; | ||
|
||
public: | ||
Reservoir(unsigned int size); | ||
|
||
Reservoir(); | ||
|
||
void add(unsigned int item); | ||
|
||
void retrieve(unsigned int *buffer); | ||
|
||
unsigned int get_size(); | ||
|
||
unsigned int get_count(); | ||
|
||
void reset(); | ||
|
||
void view(); | ||
|
||
~Reservoir(); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#define NUM_TABLES 4 | ||
#define RESERVOIR_SIZE 7 | ||
#define RANGE_POW 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#include "LSH.h" | ||
#include <iostream> | ||
|
||
int main() { | ||
|
||
LSH *lsh = new LSH(); | ||
|
||
unsigned int i[] = {1, 1, 1, 2, 2, 2, 3, 3, 3}; | ||
|
||
unsigned int h[] = {0, 1, 2, 3, 0, 1, 2, 3, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, | ||
1, 0, 1, 3, 2, 2, 1, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 3}; | ||
|
||
lsh->insert(9, i, h); | ||
|
||
lsh->view(); | ||
|
||
unsigned int q[] = {0, 1, 2, 3, 1, 2, 0, 1}; | ||
|
||
unsigned int r[10]; | ||
|
||
lsh->top_k(2, 5, q, r); | ||
|
||
for (int i; i < 10; i++) { | ||
std::cout << r[i] << std::endl; | ||
} | ||
|
||
return 0; | ||
} |