Skip to content

Commit a3accd8

Browse files
committed
SWIG python wrapper (should install)
1 parent aa24717 commit a3accd8

15 files changed

+4633
-2432
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
*o
22
culsp
3+
*swp

Makefile

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ CC=gcc
2626
cuda_lib=/usr/local/cuda/lib64
2727
cuda_inc=/usr/local/cuda/include
2828

29+
python_inc=/usr/include/python2.7
2930
BLOCK_SIZE=256
3031

31-
NVCCFLAGS := --ptxas-options=-v -DBLOCK_SIZE=$(BLOCK_SIZE) -arch $(ARCH)
32-
CXXFLAGS := -DBLOCK_SIZE=$(BLOCK_SIZE)
32+
NVCCFLAGS := -Xcompiler -fpic --ptxas-options=-v -DBLOCK_SIZE=$(BLOCK_SIZE) -arch $(ARCH)
33+
CXXFLAGS := -fPIC -DBLOCK_SIZE=$(BLOCK_SIZE)
3334
LINK := -largtable2 -lm -lcudart -L$(cuda_lib)
3435

3536
all : $(EXECUTABLE)
@@ -41,5 +42,23 @@ $(EXECUTABLE): culsp.o periodogram.o
4142
periodogram.o : periodogram.cpp
4243
$(CXX) $(CXXFLAGS) -c -o $@ $^
4344

45+
periodogram_nomain.o : periodogram.cpp
46+
$(CXX) -Dmain=oldmain $(CXXFLAGS) -c -o $@ $^
47+
4448
culsp.o : culsp.cu
45-
$(NVCC) $(NVCCFLAGS) -c -o $@ $^ -I$(cuda_inc)
49+
$(NVCC) -Xcompiler -fpic $(NVCCFLAGS) -c -o $@ $^ -I$(cuda_inc)
50+
51+
culsp_wrap.o : culsp_wrap.cpp
52+
$(CXX) -fPIC $(CXXFLAGS) -c -o $@ $^ -I$(python_inc)
53+
54+
python : culsp_wrap.o culsp.o periodogram_nomain.o
55+
$(CXX) -fPIC $(CXXFLAGS) -shared -o _culspy.so $^ $(LINK) -lpython2.7
56+
#mkdir culspy
57+
#mv culspy.py culspy/
58+
#mv _culspy.so culspy/
59+
#touch culspy/__init__.py
60+
61+
clean :
62+
rm -f *o *so *pyc $(EXECUTABLE)
63+
rm -f -r culspy/
64+
rm -r -f build/

culsp.cu

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include <stdlib.h>
2121
#include <string.h>
2222
#include <argtable2.h>
23-
23+
#include "culsp.h"
2424
#include "periodogram.h"
2525
#include "culsp_kernel.cu"
2626

@@ -36,9 +36,9 @@
3636

3737
// Forward declarations
3838

39-
void initialize (int, char **, char **, char **, float *, float *, int *);
40-
void initialize_cuda (int, int);
41-
void eval_LS_periodogram (int, int, float, float *, float *, float *);
39+
//void initialize (int, char **, char **, char **, float *, float *, int *);
40+
//void initialize_cuda (int, int);
41+
//void eval_LS_periodogram (int, int, float, float *, float *, float *);
4242

4343
// Main program
4444

@@ -60,6 +60,7 @@ main( int argc, char** argv)
6060

6161
float df;
6262
float *P;
63+
float minf = 0.0;
6364

6465
// Initialize
6566

@@ -79,15 +80,16 @@ main( int argc, char** argv)
7980

8081
// Initialize CUDA
8182

82-
initialize_cuda(device, N_f);
83+
initialize_cuda(device);
8384

8485
// Start the timer
8586

8687
double time_a = get_time();
8788

8889
// Evaluate the Lomb-Scargle periodogram
89-
90-
eval_LS_periodogram(N_t, N_f, df, t, X, P);
90+
// set minf to 0 here (simplicity; I'm interacting with this
91+
// through python anyway.
92+
eval_LS_periodogram(N_t, N_f, df, minf, t, X, P);
9193

9294
// Stop the timer
9395

@@ -166,9 +168,9 @@ initialize (int argc, char **argv, char **filename_in, char **filename_out,
166168
////
167169

168170
void
169-
initialize_cuda (int device, int N_f)
170-
{
171-
171+
initialize_cuda (int device)
172+
{
173+
172174
// Select the device
173175

174176
CUDA_CALL(cudaSetDevice(device));
@@ -187,7 +189,7 @@ initialize_cuda (int device, int N_f)
187189
////
188190

189191
void
190-
eval_LS_periodogram (int N_t, int N_f, float df,
192+
eval_LS_periodogram (int N_t, int N_f, float df, float minf,
191193
float *t, float *X, float *P)
192194
{
193195

@@ -209,13 +211,13 @@ eval_LS_periodogram (int N_t, int N_f, float df,
209211
dim3 grid_dim(N_f/BLOCK_SIZE, 1, 1);
210212
dim3 block_dim(BLOCK_SIZE, 1, 1);
211213

212-
printf("Grid of %d frequency blocks of size %d threads\n", N_f/BLOCK_SIZE, BLOCK_SIZE);
214+
//printf("Grid of %d frequency blocks of size %d threads\n", N_f/BLOCK_SIZE, BLOCK_SIZE);
213215

214216
// Launch the kernel
215217

216-
printf("Launching kernel...\n");
218+
//printf("Launching kernel...\n");
217219

218-
culsp_kernel<<<grid_dim, block_dim>>>(d_t, d_X, d_P, df, N_t);
220+
culsp_kernel<<<grid_dim, block_dim>>>(d_t, d_X, d_P, df, N_t, minf);
219221

220222
cudaError_t err = cudaGetLastError();
221223
if(err != cudaSuccess) {
@@ -226,7 +228,7 @@ eval_LS_periodogram (int N_t, int N_f, float df,
226228

227229
CUDA_CALL(cudaThreadSynchronize());
228230

229-
printf("Completed!\n");
231+
//printf("Completed!\n");
230232

231233
// Copy data from the device
232234

culsp.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#ifndef CULSP_H
2+
#define CULSP_H
3+
void initialize(int, char **, char **, char **, float *, float *, int *);
4+
void initialize_cuda(int);
5+
void eval_LS_periodogram(int, int, float, float, float *, float *, float *);
6+
#endif

culsp.i

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
%module culspy
2+
%include typemaps.i
3+
4+
%{
5+
extern void initialize_cuda(int);
6+
extern void set_frequency_params(int, float *, float, float, int *, float *);
7+
extern void eval_LS_periodogram(int, int, float,float, float *, float *, float *);
8+
%}
9+
10+
extern void initialize_cuda(int);
11+
12+
%apply int *OUTPUT { int *OUTPUT1 };
13+
extern void set_frequency_params(int, float *, float, float, int *OUTPUT1, float *OUTPUT);
14+
extern void eval_LS_periodogram(int, int, float,float, float *, float *, float *);
15+
16+
%inline %{
17+
18+
float *get_float_array(int n){
19+
float *x = (float *)malloc(n * sizeof(float));
20+
return x;
21+
}
22+
float get_val(float *x,int i){
23+
return x[i];
24+
}
25+
void set_val(float *x, int i, float val){
26+
x[i] = val;
27+
}
28+
29+
int get_block_size(){
30+
return BLOCK_SIZE;
31+
}
32+
%}
33+
34+
%pythoncode %{
35+
BLOCK_SIZE = _culspy.get_block_size()
36+
37+
def _convert_to_c(arr):
38+
N = len(arr);
39+
carr = _culspy.get_float_array(N);
40+
for i, val in enumerate(arr):
41+
_culspy.set_val(carr, i, val)
42+
return carr
43+
44+
def _convert_to_py(carr, N):
45+
return [ _culspy.get_val(carr, i) for i in range(N) ]
46+
47+
def _insettings(arr, settings):
48+
return [ v in settings for v in arr ]
49+
50+
def correct_nf(Nf):
51+
if Nf % BLOCK_SIZE != 0:
52+
return (BLOCK_SIZE - Nf % BLOCK_SIZE) % BLOCK_SIZE
53+
return Nf
54+
55+
56+
def LSP(t, x, f_over=1.0, f_high=1.0, minf=0.0, maxf=None, Nf=None ):
57+
Nt = len(t)
58+
ct = _convert_to_c(t)
59+
cx = _convert_to_c(x)
60+
61+
if maxf is not None and Nf is not None:
62+
Nf = correct_nf(Nf)
63+
df = (maxf - minf)/Nf
64+
else:
65+
ct = _convert_to_c(t)
66+
Nf0, df0 = _culspy.set_frequency_params(Nt, ct, f_over, f_high)
67+
if not Nf is None:
68+
Nf = correct_nf(Nf)
69+
df = (Nf0 * df0)/Nf
70+
else:
71+
Nf = Nf0
72+
df = df0
73+
74+
cpower = _culspy.get_float_array(Nf)
75+
_culspy.eval_LS_periodogram(Nt, Nf, df, minf, ct, cx, cpower)
76+
77+
freqs = [ minf + df * i for i in range(Nf) ]
78+
power = _convert_to_py(cpower, Nf)
79+
80+
81+
return freqs, power
82+
83+
%}

culsp_kernel.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@
2424

2525
__global__ void
2626
__launch_bounds__(BLOCK_SIZE)
27-
culsp_kernel(float *d_t, float *d_X, float *d_P, float df, int N_t)
27+
culsp_kernel(float *d_t, float *d_X, float *d_P, float df, int N_t, float minf)
2828
{
2929

3030
__shared__ float s_t[BLOCK_SIZE];
3131
__shared__ float s_X[BLOCK_SIZE];
3232

3333
// Calculate the frequency
3434

35-
float f = (blockIdx.x*BLOCK_SIZE+threadIdx.x+1)*df;
35+
float f = (blockIdx.x*BLOCK_SIZE+threadIdx.x+1)*df + minf;
3636

3737
// Calculate the various sums
3838

0 commit comments

Comments
 (0)