# # numbapig.py # Neil Gershenfeld 3/1/20 # calculation of pi by a Numba CUDA sum # pi = 3.14159265358979323846 # from numba import cuda import numpy as np import time block_size = 1024 grid_size = 1024 nloop = 1000000 npts = grid_size*block_size @cuda.jit def init(arr,nloop): i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x; start = nloop*i+1; end = nloop*(i+1)+1; arr[i] = 0; for j in range(start,end): arr[i] += 0.5/((j-0.75)*(j-0.25)); @cuda.jit def reduce_sum(arr,len): i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x; if (i < len): arr[i] += arr[i+len] def reduce(arr,npts): len = npts >> 1 while (1): reduce_sum[grid_size,block_size](arr,len) len = len >> 1 if (len == 0): return darr = cuda.device_array(npts,np.float64) init[grid_size,block_size](darr,nloop) # compile kernel reduce(darr,npts) # compile kernel start_time = time.time() init[grid_size,block_size](darr,nloop) reduce(darr,npts) cuda.synchronize() end_time = time.time() mflops = npts*nloop*5.0/(1.0e6*(end_time-start_time)) harr = darr.copy_to_host() print("npts = %d, nloop = %d, pi = %f"%(npts,nloop,harr[0])) print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))