Add cupy example and switch to perf_counter().

ebezzam · ebezzam · commit 1390711c51d4 · 2023-04-19T11:09:45.000Z
diff --git a/README.rst b/README.rst
@@ -120,6 +120,7 @@ documentation.
 TODO
 ====
 
+- numba: https://numba.pydata.org/
 - picking a license
 - badge for google slides
 - change documentation links to main branch
@@ -128,6 +129,5 @@ TODO
 - matplotlib, pytest, black in dev install
 - manifest file to not include file in package
 - GitHub actions for releasing to PyPi when changes to version
-- cupy for GPU
 - pytorch compatible
 - Cython / C++
diff --git a/examples/cupy_fft.py b/examples/cupy_fft.py
@@ -0,0 +1,95 @@
+"""
+
+CuPy Example
+
+Installing:
+- check Cuda version, e.g. from Terminal run: `nvcc --version` or `nvidia-smi`
+- install corresponding version of cupy, e.g. `pip install cupy-cuda11x`
+
+Installation page: https://docs.cupy.dev/en/stable/install.html
+
+"""
+
+from importlib import util
+import os
+import numpy as np
+import scipy
+import time
+
+try:
+    import cupy as cp
+    import cupyx
+
+    CUPY_AVAILABLE = True
+except ImportError:
+    CUPY_AVAILABLE = False
+
+
+def get_array_module(x):
+    """
+    Returns correct numerical module based on input.
+
+    Parameters
+    ----------
+    x : :obj:`numpy.ndarray` or :obj:`cupy.ndarray`
+        Array
+    Returns
+    -------
+    mod : :obj:`func`
+        Module to be used to process array (:mod:`numpy` or :mod:`cupy`)
+    """
+    if CUPY_AVAILABLE:
+        return cp.get_array_module(x)
+    else:
+        return np
+
+
+def fft2(x):
+    """
+    Applies correct fft method based on input.
+
+    Parameters
+    ----------
+    x : :obj:`numpy.ndarray` or :obj:`cupy.ndarray`
+        Array
+
+    Returns
+    -------
+    mod : :obj:`func`
+        Module to be used to process array (:mod:`numpy` or :mod:`cupy`)
+    """
+    if get_array_module(x) == np:
+        func = scipy.fft.fft2
+    else:
+        func = cupyx.scipy.fft.fft2
+    return func(x)
+
+
+# compare FFT computation
+n = 1024
+n_trials = 100
+x = np.random.rand(n, n)
+
+if CUPY_AVAILABLE:
+    x_gpu = cp.asarray(x)
+    print(x_gpu.device)
+else:
+    x_gpu = x
+    print("Cupy not available. Using numpy instead.")
+
+# numpy
+start = time.perf_counter()
+for _ in range(n_trials):
+    fft2(x)
+time_cpu = time.perf_counter() - start
+print(f"CPU processing took {time_cpu} seconds")
+
+# cupy
+start = time.perf_counter()
+for _ in range(n_trials):
+    fft2(x_gpu)
+time_gpu = time.perf_counter() - start
+print(f"GPU processing took {time_gpu} seconds")
+
+# speed-up
+print(f"Speed-up: {time_cpu / time_gpu}")
diff --git a/examples/joblib_parallel.py b/examples/joblib_parallel.py
@@ -5,10 +5,16 @@
 
 """
 
-from joblib import Parallel, delayed
+
 import numpy as np
 import time
 
+try:
+    from joblib import Parallel, delayed
+except ImportError:
+    print("Install joblib to run this example")
+    exit()
+
 
 def f(seed, n, proc_time):
     np.random.seed(seed)
@@ -28,17 +34,17 @@ def f(seed, n, proc_time):
     # Compare processing time for serial and parallel processing
 
     # Serial processing
-    start = time.time()
+    start = time.perf_counter()
     outputs_ser = []
     for seed in range(n_exp):
         outputs_ser.append(f(seed, n, proc_time))
-    serial_time = time.time() - start
+    serial_time = time.perf_counter() - start
     print(f"Serial processing took {serial_time} seconds")
 
     # Parallel processing
-    start = time.time()
+    start = time.perf_counter()
     outputs_par = Parallel(n_jobs=n_cpu)(delayed(f)(seed, n, proc_time) for seed in range(n_exp))
-    parallel_time = time.time() - start
+    parallel_time = time.perf_counter() - start
     print(f"Parallel processing took {parallel_time} seconds")
 
     # Speed-up
diff --git a/examples/numpy_speedup.py b/examples/numpy_speedup.py
@@ -20,20 +20,20 @@
 b = np.random.randn(n)
 
 # Nonvectorized
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     c = []
     for i in range(len(a)):
         c.append(a[i] + b[i])
     c = np.array(c)
-nonvectorized_time = (time.time() - start) / n_trials
+nonvectorized_time = (time.perf_counter() - start) / n_trials
 print(f"Nonvectorized time: {nonvectorized_time} seconds")
 
 # Vectorized
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     c_vec = a + b
-vectorized_time = (time.time() - start) / n_trials
+vectorized_time = (time.perf_counter() - start) / n_trials
 print(f"Vectorized time: {vectorized_time} seconds")
 
 assert np.allclose(c, c_vec)
@@ -47,20 +47,20 @@
 b = np.random.randn(n)  # add a row vector to each row of a
 
 # Nonvectorized
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     c = []
     for i in range(len(a)):
         c.append(a[i] + b)
     c = np.array(c)
-nonvectorized_time = (time.time() - start) / n_trials
+nonvectorized_time = (time.perf_counter() - start) / n_trials
 print(f"Nonvectorized time: {nonvectorized_time} seconds")
 
 # Vectorized
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     c_vec = a + b[np.newaxis, :]
-vectorized_time = (time.time() - start) / n_trials
+vectorized_time = (time.perf_counter() - start) / n_trials
 print(f"Vectorized time: {vectorized_time} seconds")
 
 assert np.allclose(c, c_vec)
@@ -74,20 +74,20 @@
 a = np.random.randn(n, n_signals)
 
 # Nonvectorized
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     c = []
     for i in range(n_signals):
         c.append(np.fft.fft(a[:, i]))
     c = np.array(c)
-nonvectorized_time = (time.time() - start) / n_trials
+nonvectorized_time = (time.perf_counter() - start) / n_trials
 print(f"Nonvectorized time: {nonvectorized_time} seconds")
 
 # Vectorized
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     c_vec = np.fft.fft(a, axis=0)
-vectorized_time = (time.time() - start) / n_trials
+vectorized_time = (time.perf_counter() - start) / n_trials
 print(f"Vectorized time: {vectorized_time} seconds")
 
 assert np.allclose(c.T, c_vec)
@@ -99,20 +99,20 @@
 
 # float64
 a = np.random.randn(512, 512)
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     # b = np.fft.rfft2(a)
     b = rfft2(a)
-float64_time = (time.time() - start) / n_trials
+float64_time = (time.perf_counter() - start) / n_trials
 print(f"float64 time: {float64_time} seconds")
 
 # float32
 a32 = a.astype(np.float32)
-start = time.time()
+start = time.perf_counter()
 for _ in range(n_trials):
     # b32 = np.fft.rfft2(a32)
     b32 = rfft2(a32)
-float32_time = (time.time() - start) / n_trials
+float32_time = (time.perf_counter() - start) / n_trials
 print(f"float32 time: {float32_time} seconds")
 
 assert b32.dtype == np.complex64
diff --git a/notebooks/real_fft.ipynb b/notebooks/real_fft.ipynb
@@ -120,17 +120,17 @@
     "n_trials = 1000\n",
     "\n",
     "# FFT\n",
-    "start_time = time.time()\n",
+    "start_time = time.perf_counter()\n",
     "for _ in range(n_trials):\n",
     "    np.fft.fft(signal)\n",
-    "proc_time_fft = (time.time() - start_time) / n_trials\n",
+    "proc_time_fft = (time.perf_counter() - start_time) / n_trials\n",
     "print(f\"FFT: {proc_time_fft} s\")\n",
     "\n",
     "# RFFT\n",
-    "start_time = time.time()\n",
+    "start_time = time.perf_counter()\n",
     "for _ in range(n_trials):\n",
     "     np.fft.rfft(signal)\n",
-    "proc_time_rfft = (time.time() - start_time) / n_trials\n",
+    "proc_time_rfft = (time.perf_counter() - start_time) / n_trials\n",
     "print(f\"RFFT: {proc_time_rfft} s\")"
    ]
   },
@@ -181,16 +181,16 @@
     "    signal = np.random.randn(fft_len)\n",
     "\n",
     "    # FFT\n",
-    "    start_time = time.time()\n",
+    "    start_time = time.perf_counter()\n",
     "    for _ in range(n_trials):\n",
     "        np.fft.fft(signal)\n",
-    "    fft_times.append((time.time() - start_time) / n_trials)\n",
+    "    fft_times.append((time.perf_counter() - start_time) / n_trials)\n",
     "\n",
     "    # RFFT\n",
-    "    start_time = time.time()\n",
+    "    start_time = time.perf_counter()\n",
     "    for _ in range(n_trials):\n",
     "        np.fft.rfft(signal)\n",
-    "    rfft_times.append((time.time() - start_time) / n_trials)\n",
+    "    rfft_times.append((time.perf_counter() - start_time) / n_trials)\n",
     "\n",
     "# plot\n",
     "plt.figure(figsize=(10, 5))\n",
diff --git a/profile/fftconvolve.py b/profile/fftconvolve.py
@@ -9,7 +9,7 @@
 
 
 # create random signal
-n = 1000
+n = 10000
 signal = np.random.randn(n)
 
 # create filter
@@ -25,24 +25,24 @@
 # rfft
 print("rfft")
 rfft_convolved_signal = np.zeros_like(signal)
-start_time = time.time()
+start_time = time.perf_counter()
 for _ in tqdm(range(n_trials)):
     rfft_out = rfft_convolver(signal)
-proc_time_rfft = (time.time() - start_time) / n_trials
+proc_time_rfft = (time.perf_counter() - start_time) / n_trials
 
 # fft
 print("fft")
 fft_convolved_signal = np.zeros_like(signal)
-start_time = time.time()
+start_time = time.perf_counter()
 for _ in tqdm(range(n_trials)):
     fft_out = fft_convolver(signal)
-proc_time_fft = (time.time() - start_time) / n_trials
+proc_time_fft = (time.perf_counter() - start_time) / n_trials
 
 # fft without initializing
 print("fft naive (without initializing)")
 for _ in tqdm(range(n_trials)):
     fft_naive_out = np.convolve(signal, filter, mode="full")
-proc_time_fft_naive = (time.time() - start_time) / n_trials
+proc_time_fft_naive = (time.perf_counter() - start_time) / n_trials
 
 # check results
 assert np.allclose(rfft_out, fft_out)
diff --git a/setup.py b/setup.py
@@ -23,6 +23,7 @@
         "scipy",
         "matplotlib",
         "hydra-core",
+        "tqdm",
     ],
     include_package_data=True,
 )

Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`	`"scipy",`
`24`	`24`	`"matplotlib",`
`25`	`25`	`"hydra-core",`
	`26`	`+ "tqdm",`
`26`	`27`	`],`
`27`	`28`	`include_package_data=True,`
`28`	`29`	`)`