Skip to content

Commit 663ad68

Browse files
committed
Added robust bootstrapping algorithm. Also found new papers on using NFFT and/or the FASPER algorithm. CULSP IS NAIVE. CRAP.
1 parent 9776f79 commit 663ad68

13 files changed

+694
-89
lines changed

Makefile

+9-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ periodogram_nomain.o : periodogram.cpp
4646
$(CXX) -Dmain=oldmain $(CXXFLAGS) -c -o $@ $^
4747

4848
culsp.o : culsp.cu
49-
$(NVCC) -Xcompiler -fpic $(NVCCFLAGS) -c -o $@ $^ -I$(cuda_inc)
49+
$(NVCC) $(NVCCFLAGS) -c -o $@ $^ -I$(cuda_inc)
5050

5151
culsp_wrap.o : culsp_wrap.cpp
5252
$(CXX) -fPIC $(CXXFLAGS) -c -o $@ $^ -I$(python_inc)
@@ -58,7 +58,15 @@ python : culsp_wrap.o culsp.o periodogram_nomain.o
5858
#mv _culspy.so culspy/
5959
#touch culspy/__init__.py
6060

61+
#testminmax.o : testminmax.cu
62+
# $(NVCC) $(NVCCFLAGS) -c -o $@ $^ -I$(cuda_inc)
63+
64+
#testmax : testminmax.o
65+
# $(CXX) $(CXXFLAGS) -o $@ $^ -lm -lcudart -L$(cuda_lib)
66+
6167
clean : clean-python
6268
rm -f *o $(EXECUTABLE)
69+
#clean-testmax :
70+
# rm -f testmax testminmax.o
6371
clean-python:
6472
rm -r -f *pyc *so CuLSP* dist/ build/ culspy.py

README

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
Jan 20, 2016 -- John Hoffman
22

3+
Main python functions are LSP and LSPbootstrap
4+
35
Python wrappings + updated makefile!!
46

57
Modified code from original form to comply with

culsp.cu

+118-2
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
#include <stdlib.h>
2121
#include <string.h>
2222
#include <argtable2.h>
23-
#include "culsp.h"
23+
2424
#include "periodogram.h"
25+
#include "culsp.h"
2526
#include "culsp_kernel.cu"
27+
#include "minmax.cu"
2628

2729
// Wrapper macros
2830

@@ -34,6 +36,15 @@
3436
exit(EXIT_FAILURE); \
3537
}}
3638

39+
#define CUDA_ERR_CHECK() { \
40+
err = cudaGetLastError(); \
41+
if(err != cudaSuccess) { \
42+
fprintf(stderr, "Cuda error: kernel launch failed in file '%s' in line %i : %s.\n", \
43+
__FILE__, __LINE__, cudaGetErrorString(err)); \
44+
exit(EXIT_FAILURE); \
45+
}}
46+
47+
3748
// Forward declarations
3849

3950
//void initialize (int, char **, char **, char **, float *, float *, int *);
@@ -108,6 +119,7 @@ main( int argc, char** argv)
108119
free(t);
109120

110121
// Finish
122+
return 0;
111123

112124
}
113125

@@ -193,6 +205,7 @@ eval_LS_periodogram (int N_t, int N_f, float df, float minf,
193205
float *t, float *X, float *P)
194206
{
195207

208+
196209
// Allocate device memory and copy data over
197210

198211
float *d_t;
@@ -217,7 +230,7 @@ eval_LS_periodogram (int N_t, int N_f, float df, float minf,
217230

218231
//printf("Launching kernel...\n");
219232

220-
culsp_kernel<<<grid_dim, block_dim>>>(d_t, d_X, d_P, df, N_t, minf);
233+
culsp_kernel<<<grid_dim, block_dim>>>(d_t, d_X, d_P, df, N_t, N_f, minf);
221234

222235
cudaError_t err = cudaGetLastError();
223236
if(err != cudaSuccess) {
@@ -241,3 +254,106 @@ eval_LS_periodogram (int N_t, int N_f, float df, float minf,
241254
// Finish
242255

243256
}
257+
258+
void
259+
bootstrap_LS_periodogram(int N_t, int N_f, float df, float minf,
260+
float *t, float *X, float *max_heights, int N_bootstrap, int use_gpu_to_get_max){
261+
262+
263+
// Allocate device memory and copy data over
264+
265+
float *d_t, *d_X, *d_P;
266+
float *P, *gmax;
267+
int i, gd, gdm, gdm0;
268+
float val;
269+
270+
curandState *state;
271+
cudaError_t err;
272+
273+
CUDA_CALL(cudaMalloc((void**) &d_t, N_t*sizeof(float)));
274+
CUDA_CALL(cudaMalloc((void**) &d_X, N_t*sizeof(float)));
275+
CUDA_CALL(cudaMalloc((void**) &d_P, N_f*sizeof(float)));
276+
277+
CUDA_CALL(cudaMemcpy(d_t, t, N_t*sizeof(float), cudaMemcpyHostToDevice));
278+
CUDA_CALL(cudaMemcpy(d_X, X, N_t*sizeof(float), cudaMemcpyHostToDevice));
279+
280+
// Get N_bootstraps LSP; then get max_height of each of these.
281+
// This can be made faster by altering the bootstrap code to get
282+
// the max within the function, though this is much more complicated
283+
// should be small speed increase:
284+
// <LC> ~ 0.4 MB; transfer time wasted = 2 * (0.4/1000 GB) / (~15 GB/s PCIe3x16)
285+
// ~ 8E-4 seconds...maaaaaybe significant...
286+
// timing the results on
287+
288+
289+
gd = N_f/BLOCK_SIZE;
290+
if (gd * BLOCK_SIZE < N_f) gd += 1; // ensure we have enough blocks
291+
292+
dim3 grid_dim(gd, 1, 1);
293+
dim3 block_dim(BLOCK_SIZE, 1, 1);
294+
295+
// setup the random generator
296+
CUDA_CALL(cudaMalloc((void **) &state, gd*BLOCK_SIZE * sizeof(curandState)));
297+
setup_curand_kernel<<<grid_dim, block_dim>>>(state, time(NULL));
298+
299+
if (use_gpu_to_get_max){
300+
// allocate memory for the maximums array
301+
CUDA_CALL(cudaMalloc((void **) &gmax, gd * sizeof(float)));
302+
303+
} else {
304+
305+
//printf("USING CPU TO FIND MAX(P_LS)\n");
306+
P = (float *) malloc(N_f * sizeof(float));
307+
}
308+
309+
for(i=0; i<N_bootstrap; i++){
310+
311+
bootstrap_kernel<<<grid_dim, block_dim>>>(d_t, d_X, d_P, df, N_t, N_f, minf, state);
312+
//CUDA_ERR_CHECK();
313+
314+
if (use_gpu_to_get_max){
315+
// calculate the maximum.
316+
max_reduce<<<grid_dim, block_dim, BLOCK_SIZE * sizeof(float)>>>(d_P, gmax, N_f);
317+
318+
// Now reduce until only one block is needed.
319+
gdm = gd;
320+
while (gdm > 1){
321+
322+
gdm0 = gdm;
323+
gdm /= BLOCK_SIZE;
324+
if( gdm * BLOCK_SIZE < gdm0 ) gdm += 1;
325+
326+
dim3 grid_dim_max(gdm, 1, 1);
327+
328+
max_reduce<<<grid_dim_max, block_dim, BLOCK_SIZE*sizeof(float)>>>(gmax, gmax, gdm0);
329+
330+
}
331+
332+
//copy max(P) to the host
333+
CUDA_CALL(cudaMemcpy(&val, gmax, sizeof(float), cudaMemcpyDeviceToHost));
334+
335+
} else {
336+
337+
CUDA_CALL(cudaMemcpy(P, d_P, N_f*sizeof(float), cudaMemcpyDeviceToHost));
338+
//printf("CPUMAX");
339+
val = cpu_maxf(P, N_f);
340+
}
341+
342+
max_heights[i] = val;
343+
}
344+
345+
//CUDA_ERR_CHECK();
346+
347+
CUDA_CALL(cudaThreadSynchronize());
348+
349+
CUDA_CALL(cudaFree(d_P));
350+
CUDA_CALL(cudaFree(d_X));
351+
CUDA_CALL(cudaFree(d_t));
352+
if (use_gpu_to_get_max) {
353+
CUDA_CALL(cudaFree(state));
354+
CUDA_CALL(cudaFree(gmax));
355+
}
356+
357+
// Finish
358+
359+
}

culsp.h

+1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
void initialize(int, char **, char **, char **, float *, float *, int *);
44
void initialize_cuda(int);
55
void eval_LS_periodogram(int, int, float, float, float *, float *, float *);
6+
void bootstrap_LS_periodogram(int, int, float, float, float *, float *, float *, int, int);
67
#endif

culsp.i

+36
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
extern void initialize_cuda(int);
66
extern void set_frequency_params(int, float *, float, float, int *, float *);
77
extern void eval_LS_periodogram(int, int, float,float, float *, float *, float *);
8+
extern void bootstrap_LS_periodogram(int, int, float,float, float *, float *, float *, int, int);
89
%}
910

1011
extern void initialize_cuda(int);
1112

1213
%apply int *OUTPUT { int *OUTPUT1 };
1314
extern void set_frequency_params(int, float *, float, float, int *OUTPUT1, float *OUTPUT);
1415
extern void eval_LS_periodogram(int, int, float,float, float *, float *, float *);
16+
extern void bootstrap_LS_periodogram(int, int, float,float, float *, float *, float *, int, int);
1517

1618
%inline %{
1719

@@ -80,4 +82,38 @@ def LSP(t, x, f_over=1.0, f_high=1.0, minf=0.0, maxf=None, Nf=None ):
8082

8183
return freqs, power
8284

85+
def LSPbootstrap(t, x, f_over=1.0, f_high=1.0, minf=0.0,
86+
maxf=None, Nf=None, Nbootstrap=100,
87+
use_gpu_to_get_max = True ):
88+
Nt = len(t)
89+
ct = _convert_to_c(t)
90+
cx = _convert_to_c(x)
91+
92+
if use_gpu_to_get_max: gpumax = 1
93+
else: gpumax = 0
94+
95+
if maxf is not None and Nf is not None:
96+
Nf = correct_nf(Nf)
97+
df = (maxf - minf)/Nf
98+
else:
99+
ct = _convert_to_c(t)
100+
Nf0, df0 = _culspy.set_frequency_params(Nt, ct, f_over, f_high)
101+
if not Nf is None:
102+
Nf = correct_nf(Nf)
103+
df = (Nf0 * df0)/Nf
104+
else:
105+
Nf = Nf0
106+
df = df0
107+
108+
cmax_heights = _culspy.get_float_array(Nbootstrap)
109+
_culspy.bootstrap_LS_periodogram(Nt, Nf, df, minf, ct, cx,
110+
cmax_heights, Nbootstrap, gpumax)
111+
112+
max_heights = _convert_to_py(cmax_heights, Nbootstrap)
113+
114+
return max_heights
115+
116+
117+
118+
83119
%}

0 commit comments

Comments
 (0)