Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
nmeisburger committed Apr 8, 2020
1 parent 97897df commit 4535f9f
Show file tree
Hide file tree
Showing 7 changed files with 338 additions and 0 deletions.
128 changes: 128 additions & 0 deletions LSH.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#include "LSH.h"

LSH::LSH() {

L = NUM_TABLES;
reservoir_size = RESERVOIR_SIZE;
range_pow = RANGE_POW;
range = 1 << range_pow;

reservoirs = new Reservoir *[L];
for (int i = 0; i < L; i++) {
reservoirs[i] = new Reservoir[range]();
}
}

void LSH::insert(unsigned int num_items, unsigned int *items, unsigned int *hashes) {
#pragma omp parallel for default(none) shared(num_items, hashes, items)
for (size_t n = 0; n < num_items; n++) {
for (size_t table = 0; table < L; table++) {
reservoirs[table][hashes[n * L + table]].add(items[n]);
}
}
}

void LSH::insert(unsigned int item, unsigned int *hashes) {
for (size_t table = 0; table < L; table++) {
reservoirs[table][hashes[table]].add(item);
}
}

void LSH::retrieve(unsigned int num_query, unsigned int *hashes, unsigned int *results_buffer) {

#pragma omp parallel for default(none) shared(num_query, hashes, results_buffer)
for (size_t query = 0; query < num_query; query++) {
for (size_t table = 0; table < L; table++) {
size_t loc = query * L + table;
reservoirs[table][hashes[loc]].retrieve(results_buffer + loc * reservoir_size);
}
}
}

void LSH::top_k(unsigned int num_query, unsigned int top_k, unsigned int *hashes,
unsigned int *selection) {

unsigned int *extracted_reservoirs = new unsigned int[num_query * L * reservoir_size];

this->retrieve(num_query, hashes, extracted_reservoirs);

unsigned int block = L * reservoir_size;
for (size_t query = 0; query < num_query; query++) {
unsigned int *start = extracted_reservoirs + query * block;
std::sort(start, start + block);
std::vector<std::pair<unsigned int, unsigned int>> counts;
unsigned int count = 0;
unsigned int last = *start;
for (size_t i = 0; i < block; i++) {
if (last == start[i]) {
count++;
} else {
if (last != EMPTY) {
counts.push_back(std::make_pair(last, count));
}
count = 1;
last = start[i];
}
}
if (last != EMPTY) {
counts.push_back(std::make_pair(last, count));
}

std::sort(counts.begin(), counts.end(),
[&counts](std::pair<int, int> a, std::pair<int, int> b) {
return a.second > b.second;
});

size_t k;
for (k = 0; k < std::min(top_k, (unsigned int)counts.size()); k++) {
selection[query * top_k + k] = counts[k].first;
}
for (; k < top_k; k++) {
selection[query * top_k + k] = EMPTY;
}
}

delete[] extracted_reservoirs;
}

void LSH::reset() {
for (size_t t = 0; t < L; t++) {
for (size_t r = 0; r < range; r++) {
reservoirs[t][r].reset();
}
}
}

void LSH::view() {
for (size_t t = 0; t < L; t++) {
printf("LSH Table %lu\n", t);
for (size_t r = 0; r < range; r++) {
reservoirs[t][r].view();
}
printf("\n");
}
}

void LSH::add_random_items(unsigned int num_items, bool verbose) {

unsigned int *items = new unsigned int[num_items];
unsigned int *hashes = new unsigned int[num_items * L];

for (size_t i = 0; i < num_items; i++) {
items[i] = i;
if (verbose)
printf("Item: %lu -> { ", i);
for (size_t h = 0; h < L; h++) {
hashes[i * L + h] = rand() % range;
if (verbose)
printf("%lu ", hashes[i * L + h]);
}
if (verbose)
printf("}\n");
}

insert(num_items, items, hashes);

delete[] items;
delete[] hashes;
}
35 changes: 35 additions & 0 deletions LSH.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#pragma once

#include "Reservoir.h"
#include <iostream>
#include <omp.h>
#include <vector>

class LSH {
private:
unsigned int L;
unsigned int reservoir_size;
unsigned int range_pow;
unsigned int range;
Reservoir **reservoirs;

public:
LSH();

void insert(unsigned int num_items, unsigned int *items, unsigned int *hashes);

void insert(unsigned int item, unsigned int *hashes);

void retrieve(unsigned int num_query, unsigned int *hashes, unsigned int *results_buffer);

void top_k(unsigned int num_query, unsigned int top_k, unsigned int *hashes,
unsigned int *selection);

void reset();

void view();

void add_random_items(unsigned int num_items, bool verbose);

~LSH();
};
49 changes: 49 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
TARGET := lsh

CPPOBJDIR = cppobjs

CPPSOURCES := $(wildcard *.cpp)
CPPOBJS := $(CPPSOURCES:%.cpp=$(CPPOBJDIR)/%.o)

COBJDIR = cobjs

CSOURCES := $(wildcard *.c)
COBJS := $(CSOURCES:%.c=$(COBJDIR)/%.o)

OBJS = $(CPPOBJS) $(COBJS)

OPT_FLAGS := -fno-strict-aliasing -O2 -fopenmp

INC := /usr/include/

LIB := -L/usr/lib64/
LIB += -fopenmp

CXXFLAGS := -m64 -DUNIX -std=c++11 $(WARN_FLAGS) $(OPT_FLAGS) -I$(INC)
CFLAGS := -m64 -DUNIX $(WARN_FLAGS) $(OPT_FLAGS) -I$(INC)

LDFLAGS := $(LIB)

.PHONY: clean

$(TARGET): $(CPPOBJDIR) $(COBJDIR) $(CPPOBJS) $(COBJS)
g++ -fPIC -o $(TARGET) $(CPPOBJS) $(LDFLAGS)

$(CPPOBJS): $(CPPOBJDIR)/%.o: %.cpp
@echo "compile $@ $<"
g++ -fPIC $(CXXFLAGS) -c $< -o $@

$(COBJS): $(COBJDIR)/%.o: %.c
@echo "compile $@ $<"
gcc -fPIC $(CFLAGS) -c $< -o $@

$(CPPOBJDIR):
@ mkdir -p $(CPPOBJDIR)

$(COBJDIR):
@ mkdir -p $(COBJDIR)

clean:
$(RM) $(TARGET) $(OBJ)
$(RM) -rf $(CPPOBJDIR)
$(RM) -rf $(COBJDIR)
60 changes: 60 additions & 0 deletions Reservoir.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include "Reservoir.h"

Reservoir::Reservoir() {
size = RESERVOIR_SIZE;
count = 0;
lock = new omp_lock_t();

omp_init_lock(lock);

reservoir = new unsigned int[size];

for (size_t i = 0; i < size; i++) {
reservoir[i] = EMPTY;
}
}

void Reservoir::add(unsigned int item) {
omp_set_lock(lock);
if (count < size) {
reservoir[count] = item;
count++;
} else {
unsigned int loc = rand() % count;
if (loc < size) {
reservoir[loc] = item;
}
count++;
}
omp_unset_lock(lock);
}

void Reservoir::retrieve(unsigned int *buffer) {
omp_set_lock(lock);
std::copy(reservoir, reservoir + size, buffer);
omp_unset_lock(lock);
}

unsigned int Reservoir::get_size() { return size; }

unsigned int Reservoir::get_count() { return count; }

void Reservoir::reset() {
for (size_t i = 0; i < size; i++) {
reservoir[i] = EMPTY;
}
count = 0;
}

void Reservoir::view() {
printf("Reservoir [%d/%d] ", count, size);
for (size_t i = 0; i < std::min(count, size); i++) {
printf("%u ", reservoir[i]);
}
printf("\n");
}

Reservoir::~Reservoir() {
omp_destroy_lock(lock);
delete[] reservoir;
}
35 changes: 35 additions & 0 deletions Reservoir.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#pragma once

#include "config.h"
#include <algorithm>
#include <iostream>
#include <omp.h>

#define EMPTY -1

class Reservoir {
private:
unsigned int size;
unsigned int count;
omp_lock_t *lock;
unsigned int *reservoir;

public:
Reservoir(unsigned int size);

Reservoir();

void add(unsigned int item);

void retrieve(unsigned int *buffer);

unsigned int get_size();

unsigned int get_count();

void reset();

void view();

~Reservoir();
};
3 changes: 3 additions & 0 deletions config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#define NUM_TABLES 4
#define RESERVOIR_SIZE 7
#define RANGE_POW 2
28 changes: 28 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "LSH.h"
#include <iostream>

int main() {

LSH *lsh = new LSH();

unsigned int i[] = {1, 1, 1, 2, 2, 2, 3, 3, 3};

unsigned int h[] = {0, 1, 2, 3, 0, 1, 2, 3, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2,
1, 0, 1, 3, 2, 2, 1, 2, 3, 3, 3, 3, 2, 1, 1, 1, 2, 3};

lsh->insert(9, i, h);

lsh->view();

unsigned int q[] = {0, 1, 2, 3, 1, 2, 0, 1};

unsigned int r[10];

lsh->top_k(2, 5, q, r);

for (int i; i < 10; i++) {
std::cout << r[i] << std::endl;
}

return 0;
}

0 comments on commit 4535f9f

Please sign in to comment.