Commit 21d19cbd by Neil Gershenfeld

### wip

parent 4135bd71
Pipeline #4932 passed with stage
in 4 seconds
 ... ... @@ -11,51 +11,92 @@ import time # problem size # block_size = 2**10 grid_size = 2**20 grid_size = 2**21 NPTS = grid_size*block_size # # CUDA kernels # kernels and functions # @cuda.jit def init(arr): i = 1+cuda.grid(1) arr[i] = 0.5/((i-0.75)*(i-0.25)) arr[i-1] = 0.5/((i-0.75)*(i-0.25)) # @cuda.reduce def sum_reduce(a,b): def Numba_reduce(a,b): return a+b # # compile kernels @cuda.jit def CUDA_sum(arr,len): i = cuda.grid(1) if (i < len): arr[i] += arr[i+len] # def CUDA_reduce(arr,NPTS): len = NPTS >> 1 while (1): CUDA_sum[grid_size,block_size](arr,len) len = len >> 1 if (len == 0): return # # device array # arr = cuda.device_array(NPTS,np.float32) # # compile kernels # init[grid_size,block_size](arr) pi = sum_reduce(arr) pi = Numba_reduce(arr) CUDA_reduce(arr,NPTS) # # array calc # CUDA kernel array calculation # start_time = time.time() init[grid_size,block_size](arr) end_time = time.time() mflops = NPTS*4.0/(1.0e6*(end_time-start_time)) print("Numba CUDA array calculation:") print("CUDA kernel array calculation:") print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # # reduction # Numba reduce # init[grid_size,block_size](arr) start_time = time.time() pi = sum_reduce(arr) pi = Numba_reduce(arr) end_time = time.time() mflops = NPTS*1.0/(1.0e6*(end_time-start_time)) print("Numba CUDA reduction:") print("Numba reduce:") print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # # both # both with Numba reduce # start_time = time.time() init[grid_size,block_size](arr) pi = sum_reduce(arr) pi = Numba_reduce(arr) end_time = time.time() mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) print("Numba CUDA both:") print("both with Numba reduce:") print(" NPTS = %d, pi = %f"%(NPTS,pi)) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # # CUDA kernel reduction # init[grid_size,block_size](arr) start_time = time.time() CUDA_reduce(arr,NPTS) end_time = time.time() mflops = NPTS*1.0/(1.0e6*(end_time-start_time)) print("CUDA kernel reduction:") print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops)) # # both with CUDA kernel reduction # start_time = time.time() init[grid_size,block_size](arr) CUDA_reduce(arr,NPTS) end_time = time.time() darr = arr.copy_to_host() mflops = NPTS*5.0/(1.0e6*(end_time-start_time)) print("both with CUDA kernel reduction:") print(" NPTS = %d, pi = %f"%(NPTS,darr[0])) print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!