Skip to content
Snippets Groups Projects
Commit 21d19cbd authored by Neil Gershenfeld's avatar Neil Gershenfeld
Browse files

wip

parent 4135bd71
No related branches found
No related tags found
No related merge requests found
Pipeline #4932 passed
...@@ -11,51 +11,92 @@ import time ...@@ -11,51 +11,92 @@ import time
# problem size # problem size
# #
block_size = 2**10 block_size = 2**10
grid_size = 2**20 grid_size = 2**21
NPTS = grid_size*block_size NPTS = grid_size*block_size
# #
# CUDA kernels # kernels and functions
# #
@cuda.jit @cuda.jit
def init(arr): def init(arr):
i = 1+cuda.grid(1) i = 1+cuda.grid(1)
arr[i] = 0.5/((i-0.75)*(i-0.25)) arr[i-1] = 0.5/((i-0.75)*(i-0.25))
#
@cuda.reduce @cuda.reduce
def sum_reduce(a,b): def Numba_reduce(a,b):
return a+b return a+b
# #
# compile kernels @cuda.jit
def CUDA_sum(arr,len):
i = cuda.grid(1)
if (i < len):
arr[i] += arr[i+len]
#
def CUDA_reduce(arr,NPTS):
len = NPTS >> 1
while (1):
CUDA_sum[grid_size,block_size](arr,len)
len = len >> 1
if (len == 0):
return
#
# device array
# #
arr = cuda.device_array(NPTS,np.float32) arr = cuda.device_array(NPTS,np.float32)
#
# compile kernels
#
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
pi = sum_reduce(arr) pi = Numba_reduce(arr)
CUDA_reduce(arr,NPTS)
# #
# array calc # CUDA kernel array calculation
# #
start_time = time.time() start_time = time.time()
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
end_time = time.time() end_time = time.time()
mflops = NPTS*4.0/(1.0e6*(end_time-start_time)) mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
print("Numba CUDA array calculation:") print("CUDA kernel array calculation:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
# #
# reduction # Numba reduce
# #
init[grid_size,block_size](arr)
start_time = time.time() start_time = time.time()
pi = sum_reduce(arr) pi = Numba_reduce(arr)
end_time = time.time() end_time = time.time()
mflops = NPTS*1.0/(1.0e6*(end_time-start_time)) mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
print("Numba CUDA reduction:") print("Numba reduce:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
# #
# both # both with Numba reduce
# #
start_time = time.time() start_time = time.time()
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
pi = sum_reduce(arr) pi = Numba_reduce(arr)
end_time = time.time() end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("Numba CUDA both:") print("both with Numba reduce:")
print(" NPTS = %d, pi = %f"%(NPTS,pi)) print(" NPTS = %d, pi = %f"%(NPTS,pi))
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
# CUDA kernel reduction
#
init[grid_size,block_size](arr)
start_time = time.time()
CUDA_reduce(arr,NPTS)
end_time = time.time()
mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
print("CUDA kernel reduction:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
# both with CUDA kernel reduction
#
start_time = time.time()
init[grid_size,block_size](arr)
CUDA_reduce(arr,NPTS)
end_time = time.time()
darr = arr.copy_to_host()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("both with CUDA kernel reduction:")
print(" NPTS = %d, pi = %f"%(NPTS,darr[0]))
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment