wip

73d4808e · Neil Gershenfeld · 7ecacb5c · 73d4808e
Commit 73d4808e authored Feb 29, 2020 by Neil Gershenfeld
--- a/Python/numbapig.py
+++ b/Python/numbapig.py
@@ -36,6 +36,7 @@ def CUDA_reduce(arr,NPTS):
   len = NPTS >> 1
   while (1):
      CUDA_sum[grid_size,block_size](arr,len)
+      cuda.synchronize()
      len = len >> 1
      if (len == 0):
         return
@@ -64,6 +65,7 @@ CUDA_result(arr,result)
 #
 start_time = time.time()
 init[grid_size,block_size](arr)
+cuda.synchronize()
 end_time = time.time()
 mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
 print("CUDA kernel array calculation:")
@@ -83,6 +85,7 @@ print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
 #
 start_time = time.time()
 init[grid_size,block_size](arr)
+cuda.synchronize()
 pi = Numba_reduce(arr)
 end_time = time.time()
 mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
@@ -104,8 +107,10 @@ print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
 #
 start_time = time.time()
 init[grid_size,block_size](arr)
+cuda.synchronize()
 CUDA_reduce(arr,NPTS)
 CUDA_result(arr,result)
+cuda.synchronize()
 end_time = time.time()
 pi = result.copy_to_host()
 mflops = NPTS*5.0/(1.0e6*(end_time-start_time))