Created
December 7, 2016 11:46
-
-
Save eickenberg/b0f6b3ad08694841d2c0ede71b3cc948 to your computer and use it in GitHub Desktop.
Script for benchmarking skcuda fft performance (pure calculation) wrt pyfftw
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""testing skcuda fft in 3 dimensions""" | |
import pycuda.autoinit | |
import pycuda.gpuarray as gpuarray | |
import numpy as np | |
#from scipy import fftpack as fft | |
from pyfftw.interfaces import numpy_fft as fft | |
import skcuda.fft as cu_fft | |
Bs = (8, 12, 16, 20, 24,) | |
Ns = (32, 64, 96, 128, 256) | |
ns = np.ones([len(Ns), len(Bs)], dtype=int) | |
ns[0:2, 0:2] = 10 | |
import time | |
#N = 64 | |
#B = 16 | |
cpu_fft_times = [] | |
cpu_ifft_times = [] | |
gpu_fft_times = [] | |
gpu_ifft_times = [] | |
for j, N in enumerate(Ns): | |
for k, B in enumerate(Bs): | |
n = ns[j, k] | |
x = np.empty((B, N, N, N), dtype=np.float32) | |
xf = np.empty_like(x, dtype=np.complex64) | |
y = np.empty_like(x) | |
x[:] = np.random.randn(*x.shape).astype('float32') | |
t0 = time.time() | |
for i in range(n): | |
xf[:] = fft.fftn(x, axes=(1, 2, 3)) | |
t1 = time.time() | |
cpu_fft_times.append((t1 - t0) / n) | |
t2 = time.time() | |
for i in range(n): | |
y[:] = np.real(fft.ifftn(xf, axes=(1, 2, 3))) | |
t3 = time.time() | |
cpu_ifft_times.append((t3 - t2)/n) | |
x_gpu = gpuarray.to_gpu(x) | |
xf_gpu = gpuarray.empty((B, N, N, N // 2 + 1), np.complex64) | |
plan_forward = cu_fft.Plan((N, N, N), np.float32, np.complex64, B) | |
t4 = time.time() | |
for i in range(n): | |
cu_fft.fft(x_gpu, xf_gpu, plan_forward) | |
t5 = time.time() | |
gpu_fft_times.append((t5 - t4) / n) | |
y_gpu = gpuarray.empty_like(x_gpu) | |
plan_inverse = cu_fft.Plan((N, N, N), np.complex64, np.float32, B) | |
t6 = time.time() | |
for i in range(n): | |
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True) | |
t7 = time.time() | |
gpu_ifft_times.append((t7 - t6)/n) | |
print((N, B, n, cpu_fft_times[-1], cpu_ifft_times[-1], | |
gpu_fft_times[-1], gpu_ifft_times[-1], | |
cpu_fft_times[-1] / gpu_fft_times[-1], | |
cpu_ifft_times[-1] / gpu_ifft_times[-1])) | |
print(((y - y_gpu.get()) ** 2).sum()) | |
cpu_fft_times = np.array( cpu_fft_times ).reshape(len(Ns), len(Bs)) | |
cpu_ifft_times = np.array( cpu_ifft_times).reshape(len(Ns), len(Bs)) | |
gpu_fft_times = np.array( gpu_fft_times ).reshape(len(Ns), len(Bs)) | |
gpu_ifft_times = np.array( gpu_ifft_times).reshape(len(Ns), len(Bs)) | |
import matplotlib | |
matplotlib.use("Agg") | |
import matplotlib.pyplot as plt | |
plt.figure() | |
plt.plot(Ns, cpu_fft_times, 'b-') | |
plt.plot(Ns, cpu_ifft_times, 'b-.') | |
plt.plot(Ns, gpu_fft_times, 'r-') | |
plt.plot(Ns, gpu_ifft_times, 'r-.') | |
plt.yscale('log') | |
plt.xscale('log') | |
plt.title("computation time as a function of N for B={}".format(Bs)) | |
plt.xticks(Ns, map(str, Ns)) | |
plt.savefig("f_of_N.png") | |
plt.savefig("f_of_N.svg") | |
plt.savefig("f_of_N.pdf") | |
plt.close() | |
plt.figure() | |
plt.plot(Bs, cpu_fft_times.T, 'b-') | |
plt.plot(Bs, cpu_ifft_times.T, 'b-.') | |
plt.plot(Bs, gpu_fft_times.T, 'r-') | |
plt.plot(Bs, gpu_ifft_times.T, 'r-.') | |
plt.yscale('log') | |
plt.xscale('log') | |
plt.title("computation time as a function of B for N={}".format(Ns)) | |
plt.xticks(Bs, map(str, Bs)) | |
plt.savefig("f_of_B.png") | |
plt.savefig("f_of_B.svg") | |
plt.savefig("f_of_B.pdf") | |
plt.close() |
speedups are generally around 10^3, can go over 10^4 and reach 10^5. Caveat: Everything is in memory already.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Blue is CPU, red is GPU, full is FFT, dotted is IFFT