Skip to content

Commit 1390711

Browse files
committed
Add cupy example and switch to perf_counter().
1 parent b67a249 commit 1390711

File tree

7 files changed

+138
-36
lines changed

7 files changed

+138
-36
lines changed

README.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ documentation.
120120
TODO
121121
====
122122

123+
- numba: https://numba.pydata.org/
123124
- picking a license
124125
- badge for google slides
125126
- change documentation links to main branch
@@ -128,6 +129,5 @@ TODO
128129
- matplotlib, pytest, black in dev install
129130
- manifest file to not include file in package
130131
- GitHub actions for releasing to PyPi when changes to version
131-
- cupy for GPU
132132
- pytorch compatible
133133
- Cython / C++

examples/cupy_fft.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
3+
CuPy Example
4+
5+
Installing:
6+
- check Cuda version, e.g. from Terminal run: `nvcc --version` or `nvidia-smi`
7+
- install corresponding version of cupy, e.g. `pip install cupy-cuda11x`
8+
9+
Installation page: https://docs.cupy.dev/en/stable/install.html
10+
11+
"""
12+
13+
from importlib import util
14+
import os
15+
import numpy as np
16+
import scipy
17+
import time
18+
19+
try:
20+
import cupy as cp
21+
import cupyx
22+
23+
CUPY_AVAILABLE = True
24+
except ImportError:
25+
CUPY_AVAILABLE = False
26+
27+
28+
def get_array_module(x):
29+
"""
30+
Returns correct numerical module based on input.
31+
32+
Parameters
33+
----------
34+
x : :obj:`numpy.ndarray` or :obj:`cupy.ndarray`
35+
Array
36+
Returns
37+
-------
38+
mod : :obj:`func`
39+
Module to be used to process array (:mod:`numpy` or :mod:`cupy`)
40+
"""
41+
if CUPY_AVAILABLE:
42+
return cp.get_array_module(x)
43+
else:
44+
return np
45+
46+
47+
def fft2(x):
48+
"""
49+
Applies correct fft method based on input.
50+
51+
Parameters
52+
----------
53+
x : :obj:`numpy.ndarray` or :obj:`cupy.ndarray`
54+
Array
55+
56+
Returns
57+
-------
58+
mod : :obj:`func`
59+
Module to be used to process array (:mod:`numpy` or :mod:`cupy`)
60+
"""
61+
if get_array_module(x) == np:
62+
func = scipy.fft.fft2
63+
else:
64+
func = cupyx.scipy.fft.fft2
65+
return func(x)
66+
67+
68+
# compare FFT computation
69+
n = 1024
70+
n_trials = 100
71+
x = np.random.rand(n, n)
72+
73+
if CUPY_AVAILABLE:
74+
x_gpu = cp.asarray(x)
75+
print(x_gpu.device)
76+
else:
77+
x_gpu = x
78+
print("Cupy not available. Using numpy instead.")
79+
80+
# numpy
81+
start = time.perf_counter()
82+
for _ in range(n_trials):
83+
fft2(x)
84+
time_cpu = time.perf_counter() - start
85+
print(f"CPU processing took {time_cpu} seconds")
86+
87+
# cupy
88+
start = time.perf_counter()
89+
for _ in range(n_trials):
90+
fft2(x_gpu)
91+
time_gpu = time.perf_counter() - start
92+
print(f"GPU processing took {time_gpu} seconds")
93+
94+
# speed-up
95+
print(f"Speed-up: {time_cpu / time_gpu}")

examples/joblib_example.py renamed to examples/joblib_parallel.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@
55
66
"""
77

8-
from joblib import Parallel, delayed
8+
99
import numpy as np
1010
import time
1111

12+
try:
13+
from joblib import Parallel, delayed
14+
except ImportError:
15+
print("Install joblib to run this example")
16+
exit()
17+
1218

1319
def f(seed, n, proc_time):
1420
np.random.seed(seed)
@@ -28,17 +34,17 @@ def f(seed, n, proc_time):
2834
# Compare processing time for serial and parallel processing
2935

3036
# Serial processing
31-
start = time.time()
37+
start = time.perf_counter()
3238
outputs_ser = []
3339
for seed in range(n_exp):
3440
outputs_ser.append(f(seed, n, proc_time))
35-
serial_time = time.time() - start
41+
serial_time = time.perf_counter() - start
3642
print(f"Serial processing took {serial_time} seconds")
3743

3844
# Parallel processing
39-
start = time.time()
45+
start = time.perf_counter()
4046
outputs_par = Parallel(n_jobs=n_cpu)(delayed(f)(seed, n, proc_time) for seed in range(n_exp))
41-
parallel_time = time.time() - start
47+
parallel_time = time.perf_counter() - start
4248
print(f"Parallel processing took {parallel_time} seconds")
4349

4450
# Speed-up

examples/numpy_speedup_examples.py renamed to examples/numpy_speedup.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,20 @@
2020
b = np.random.randn(n)
2121

2222
# Nonvectorized
23-
start = time.time()
23+
start = time.perf_counter()
2424
for _ in range(n_trials):
2525
c = []
2626
for i in range(len(a)):
2727
c.append(a[i] + b[i])
2828
c = np.array(c)
29-
nonvectorized_time = (time.time() - start) / n_trials
29+
nonvectorized_time = (time.perf_counter() - start) / n_trials
3030
print(f"Nonvectorized time: {nonvectorized_time} seconds")
3131

3232
# Vectorized
33-
start = time.time()
33+
start = time.perf_counter()
3434
for _ in range(n_trials):
3535
c_vec = a + b
36-
vectorized_time = (time.time() - start) / n_trials
36+
vectorized_time = (time.perf_counter() - start) / n_trials
3737
print(f"Vectorized time: {vectorized_time} seconds")
3838

3939
assert np.allclose(c, c_vec)
@@ -47,20 +47,20 @@
4747
b = np.random.randn(n) # add a row vector to each row of a
4848

4949
# Nonvectorized
50-
start = time.time()
50+
start = time.perf_counter()
5151
for _ in range(n_trials):
5252
c = []
5353
for i in range(len(a)):
5454
c.append(a[i] + b)
5555
c = np.array(c)
56-
nonvectorized_time = (time.time() - start) / n_trials
56+
nonvectorized_time = (time.perf_counter() - start) / n_trials
5757
print(f"Nonvectorized time: {nonvectorized_time} seconds")
5858

5959
# Vectorized
60-
start = time.time()
60+
start = time.perf_counter()
6161
for _ in range(n_trials):
6262
c_vec = a + b[np.newaxis, :]
63-
vectorized_time = (time.time() - start) / n_trials
63+
vectorized_time = (time.perf_counter() - start) / n_trials
6464
print(f"Vectorized time: {vectorized_time} seconds")
6565

6666
assert np.allclose(c, c_vec)
@@ -74,20 +74,20 @@
7474
a = np.random.randn(n, n_signals)
7575

7676
# Nonvectorized
77-
start = time.time()
77+
start = time.perf_counter()
7878
for _ in range(n_trials):
7979
c = []
8080
for i in range(n_signals):
8181
c.append(np.fft.fft(a[:, i]))
8282
c = np.array(c)
83-
nonvectorized_time = (time.time() - start) / n_trials
83+
nonvectorized_time = (time.perf_counter() - start) / n_trials
8484
print(f"Nonvectorized time: {nonvectorized_time} seconds")
8585

8686
# Vectorized
87-
start = time.time()
87+
start = time.perf_counter()
8888
for _ in range(n_trials):
8989
c_vec = np.fft.fft(a, axis=0)
90-
vectorized_time = (time.time() - start) / n_trials
90+
vectorized_time = (time.perf_counter() - start) / n_trials
9191
print(f"Vectorized time: {vectorized_time} seconds")
9292

9393
assert np.allclose(c.T, c_vec)
@@ -99,20 +99,20 @@
9999

100100
# float64
101101
a = np.random.randn(512, 512)
102-
start = time.time()
102+
start = time.perf_counter()
103103
for _ in range(n_trials):
104104
# b = np.fft.rfft2(a)
105105
b = rfft2(a)
106-
float64_time = (time.time() - start) / n_trials
106+
float64_time = (time.perf_counter() - start) / n_trials
107107
print(f"float64 time: {float64_time} seconds")
108108

109109
# float32
110110
a32 = a.astype(np.float32)
111-
start = time.time()
111+
start = time.perf_counter()
112112
for _ in range(n_trials):
113113
# b32 = np.fft.rfft2(a32)
114114
b32 = rfft2(a32)
115-
float32_time = (time.time() - start) / n_trials
115+
float32_time = (time.perf_counter() - start) / n_trials
116116
print(f"float32 time: {float32_time} seconds")
117117

118118
assert b32.dtype == np.complex64

notebooks/real_fft.ipynb

+8-8
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,17 @@
120120
"n_trials = 1000\n",
121121
"\n",
122122
"# FFT\n",
123-
"start_time = time.time()\n",
123+
"start_time = time.perf_counter()\n",
124124
"for _ in range(n_trials):\n",
125125
" np.fft.fft(signal)\n",
126-
"proc_time_fft = (time.time() - start_time) / n_trials\n",
126+
"proc_time_fft = (time.perf_counter() - start_time) / n_trials\n",
127127
"print(f\"FFT: {proc_time_fft} s\")\n",
128128
"\n",
129129
"# RFFT\n",
130-
"start_time = time.time()\n",
130+
"start_time = time.perf_counter()\n",
131131
"for _ in range(n_trials):\n",
132132
" np.fft.rfft(signal)\n",
133-
"proc_time_rfft = (time.time() - start_time) / n_trials\n",
133+
"proc_time_rfft = (time.perf_counter() - start_time) / n_trials\n",
134134
"print(f\"RFFT: {proc_time_rfft} s\")"
135135
]
136136
},
@@ -181,16 +181,16 @@
181181
" signal = np.random.randn(fft_len)\n",
182182
"\n",
183183
" # FFT\n",
184-
" start_time = time.time()\n",
184+
" start_time = time.perf_counter()\n",
185185
" for _ in range(n_trials):\n",
186186
" np.fft.fft(signal)\n",
187-
" fft_times.append((time.time() - start_time) / n_trials)\n",
187+
" fft_times.append((time.perf_counter() - start_time) / n_trials)\n",
188188
"\n",
189189
" # RFFT\n",
190-
" start_time = time.time()\n",
190+
" start_time = time.perf_counter()\n",
191191
" for _ in range(n_trials):\n",
192192
" np.fft.rfft(signal)\n",
193-
" rfft_times.append((time.time() - start_time) / n_trials)\n",
193+
" rfft_times.append((time.perf_counter() - start_time) / n_trials)\n",
194194
"\n",
195195
"# plot\n",
196196
"plt.figure(figsize=(10, 5))\n",

profile/fftconvolve.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
# create random signal
12-
n = 1000
12+
n = 10000
1313
signal = np.random.randn(n)
1414

1515
# create filter
@@ -25,24 +25,24 @@
2525
# rfft
2626
print("rfft")
2727
rfft_convolved_signal = np.zeros_like(signal)
28-
start_time = time.time()
28+
start_time = time.perf_counter()
2929
for _ in tqdm(range(n_trials)):
3030
rfft_out = rfft_convolver(signal)
31-
proc_time_rfft = (time.time() - start_time) / n_trials
31+
proc_time_rfft = (time.perf_counter() - start_time) / n_trials
3232

3333
# fft
3434
print("fft")
3535
fft_convolved_signal = np.zeros_like(signal)
36-
start_time = time.time()
36+
start_time = time.perf_counter()
3737
for _ in tqdm(range(n_trials)):
3838
fft_out = fft_convolver(signal)
39-
proc_time_fft = (time.time() - start_time) / n_trials
39+
proc_time_fft = (time.perf_counter() - start_time) / n_trials
4040

4141
# fft without initializing
4242
print("fft naive (without initializing)")
4343
for _ in tqdm(range(n_trials)):
4444
fft_naive_out = np.convolve(signal, filter, mode="full")
45-
proc_time_fft_naive = (time.time() - start_time) / n_trials
45+
proc_time_fft_naive = (time.perf_counter() - start_time) / n_trials
4646

4747
# check results
4848
assert np.allclose(rfft_out, fft_out)

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
"scipy",
2424
"matplotlib",
2525
"hydra-core",
26+
"tqdm",
2627
],
2728
include_package_data=True,
2829
)

0 commit comments

Comments
 (0)