Commit 21d19cbd authored by Neil Gershenfeld's avatar Neil Gershenfeld

wip

parent 4135bd71
Pipeline #4932 passed with stage
in 4 seconds
...@@ -11,51 +11,92 @@ import time ...@@ -11,51 +11,92 @@ import time
# problem size # problem size
# #
block_size = 2**10 block_size = 2**10
grid_size = 2**20 grid_size = 2**21
NPTS = grid_size*block_size NPTS = grid_size*block_size
# #
# CUDA kernels # kernels and functions
# #
@cuda.jit @cuda.jit
def init(arr): def init(arr):
i = 1+cuda.grid(1) i = 1+cuda.grid(1)
arr[i] = 0.5/((i-0.75)*(i-0.25)) arr[i-1] = 0.5/((i-0.75)*(i-0.25))
#
@cuda.reduce @cuda.reduce
def sum_reduce(a,b): def Numba_reduce(a,b):
return a+b return a+b
# #
# compile kernels @cuda.jit
def CUDA_sum(arr,len):
i = cuda.grid(1)
if (i < len):
arr[i] += arr[i+len]
#
def CUDA_reduce(arr,NPTS):
len = NPTS >> 1
while (1):
CUDA_sum[grid_size,block_size](arr,len)
len = len >> 1
if (len == 0):
return
#
# device array
# #
arr = cuda.device_array(NPTS,np.float32) arr = cuda.device_array(NPTS,np.float32)
#
# compile kernels
#
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
pi = sum_reduce(arr) pi = Numba_reduce(arr)
CUDA_reduce(arr,NPTS)
# #
# array calc # CUDA kernel array calculation
# #
start_time = time.time() start_time = time.time()
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
end_time = time.time() end_time = time.time()
mflops = NPTS*4.0/(1.0e6*(end_time-start_time)) mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
print("Numba CUDA array calculation:") print("CUDA kernel array calculation:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
# #
# reduction # Numba reduce
# #
init[grid_size,block_size](arr)
start_time = time.time() start_time = time.time()
pi = sum_reduce(arr) pi = Numba_reduce(arr)
end_time = time.time() end_time = time.time()
mflops = NPTS*1.0/(1.0e6*(end_time-start_time)) mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
print("Numba CUDA reduction:") print("Numba reduce:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
# #
# both # both with Numba reduce
# #
start_time = time.time() start_time = time.time()
init[grid_size,block_size](arr) init[grid_size,block_size](arr)
pi = sum_reduce(arr) pi = Numba_reduce(arr)
end_time = time.time() end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("Numba CUDA both:") print("both with Numba reduce:")
print(" NPTS = %d, pi = %f"%(NPTS,pi)) print(" NPTS = %d, pi = %f"%(NPTS,pi))
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
# CUDA kernel reduction
#
init[grid_size,block_size](arr)
start_time = time.time()
CUDA_reduce(arr,NPTS)
end_time = time.time()
mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
print("CUDA kernel reduction:")
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
# both with CUDA kernel reduction
#
start_time = time.time()
init[grid_size,block_size](arr)
CUDA_reduce(arr,NPTS)
end_time = time.time()
darr = arr.copy_to_host()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("both with CUDA kernel reduction:")
print(" NPTS = %d, pi = %f"%(NPTS,darr[0]))
print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment