numbapig.py 3.11 KB
Newer Older
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
1
2
3
#
# numbapig.py
# Neil Gershenfeld 2/9/20
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
4
# calculation of pi by a Numba GPU sum
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
5
6
7
8
9
10
11
12
13
# pi = 3.14159265358979323846 
#
from numba import cuda
import numpy as np
import time
#
# problem size
#
block_size = 2**10
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
14
grid_size = 2**21
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
15
16
NPTS = grid_size*block_size
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
17
# kernels and functions
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
18
19
20
21
#
@cuda.jit
def init(arr):
    i = 1+cuda.grid(1)
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
22
    arr[i-1] = 0.5/((i-0.75)*(i-0.25))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
23
    #arr[i-1] = i # for testing reduction
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
24
#
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
25
@cuda.reduce
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
26
def Numba_reduce(a,b):
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
27
28
    return a+b
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@cuda.jit
def CUDA_sum(arr,len):
    i = cuda.grid(1)
    if (i < len):
       arr[i] += arr[i+len]
#
def CUDA_reduce(arr,NPTS):
   len = NPTS >> 1
   while (1):
      CUDA_sum[grid_size,block_size](arr,len)
      len = len >> 1
      if (len == 0):
         return
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
43
44
45
46
47
48
@cuda.jit
def CUDA_result(arr,result):
    i = cuda.grid(1)
    if (i == 0):
      result[0] = arr[0]
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
49
# device array
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
50
51
#
arr = cuda.device_array(NPTS,np.float32)
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
52
53
54
result = cuda.device_array(1,np.float32)
#arr = cuda.device_array(NPTS,np.int64) # for testing reduction
#result = cuda.device_array(1,np.int64) # for testing reduction
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
55
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
56
# compile kernels by calling them
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
57
#
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
58
init[grid_size,block_size](arr)
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
59
60
pi = Numba_reduce(arr)
CUDA_reduce(arr,NPTS)
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
61
CUDA_result(arr,result)
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
62
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
63
# CUDA kernel array calculation
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
64
65
66
67
68
#
start_time = time.time()
init[grid_size,block_size](arr)
end_time = time.time()
mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
69
print("CUDA kernel array calculation:")
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
70
71
print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
72
# Numba reduce
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
73
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
74
init[grid_size,block_size](arr)
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
75
start_time = time.time()
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
76
pi = Numba_reduce(arr)
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
77
78
end_time = time.time()
mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
79
print("Numba reduce:")
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
80
81
print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
82
# both with Numba reduce
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
83
84
85
#
start_time = time.time()
init[grid_size,block_size](arr)
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
86
pi = Numba_reduce(arr)
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
87
88
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
89
print("both with Numba reduce:")
Neil Gershenfeld's avatar
wip  
Neil Gershenfeld committed
90
91
print("   NPTS = %d, pi = %f"%(NPTS,pi))
print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#
# CUDA kernel reduction
#
init[grid_size,block_size](arr)
start_time = time.time()
CUDA_reduce(arr,NPTS)
end_time = time.time()
mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
print("CUDA kernel reduction:")
print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
# both with CUDA kernel reduction
#
start_time = time.time()
init[grid_size,block_size](arr)
CUDA_reduce(arr,NPTS)
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
108
CUDA_result(arr,result)
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
109
end_time = time.time()
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
110
pi = result.copy_to_host()
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
111
112
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("both with CUDA kernel reduction:")
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
113
print("   NPTS = %d, pi = %f"%(NPTS,pi[0]))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
114
print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
115
116
117
118
119
120
121
122
123
124
125
126
127
#
# both with CUDA kernel reduction and transfer
#
start_time = time.time()
init[grid_size,block_size](arr)
CUDA_reduce(arr,NPTS)
CUDA_result(arr,result)
pi = result.copy_to_host()
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("both with CUDA kernel reduction and transfer:")
print("   NPTS = %d, pi = %f"%(NPTS,pi[0]))
print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
Neil Gershenfeld's avatar
wip    
Neil Gershenfeld committed
128