Commit ad3a74d7 authored by Thrasyvoulos Karydis's avatar Thrasyvoulos Karydis

Add CUDA pi implementation from NMM class

parent fd8c4691
Pipeline #2726 passed with stage
in 0 seconds
/*
* pi.cu
* Thras Karydis 2/4/2017
* use CUDA to evaluate pi by summation
*/
#include <stdio.h>
#include <cuda.h>
#include <sys/time.h>
#define NLOOP 1
#define NPTS 1000000000 // Number of points (one billion)
#define NUM_THREAD 256 // Number of threads per block (max is 1024)
#define NUM_BLOCK (NPTS + NUM_THREAD - 1) / NUM_THREAD // Number of blocks per grid
int tid;
double pi = 0;
// Kernel that executes on the GPU
__global__ void compute_pi(float *sum) {
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; // Sequential thread index across the blocks
if (idx < NPTS)
sum[idx] = 0.5/((idx-0.75)*(idx-0.25));
}
// Main routine that executes on the host
int main(void) {
unsigned long int start_time,end_time;
struct timeval start,end;
double flops;
int blocksPerGrid = (NPTS + NUM_THREAD - 1) / NUM_THREAD; // Compute number of blocks needed
float *sumHost, *sumDev; // Pointer to host & device arrays
size_t size = blocksPerGrid*NUM_THREAD*sizeof(float); //Array memory size
sumHost = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **) &sumDev, size); // Allocate array on device
// Initialize array in device to 0
cudaMemset(sumDev, 0, size);
gettimeofday(&start, NULL);
// Do calculation on device
compute_pi <<<blocksPerGrid, NUM_THREAD>>> (sumDev); // call CUDA kernel
// Retrieve result from device and store it in host array
cudaMemcpy(sumHost, sumDev, size, cudaMemcpyDeviceToHost);
// Reduce
for(tid=1; tid<NPTS; tid++)
pi += sumHost[tid];
gettimeofday(&end, NULL);
// Print results
start_time = start.tv_sec * 1e6 + start.tv_usec;
end_time = end.tv_sec * 1e6 + end.tv_usec;
flops = NLOOP*(NPTS*(5.0/(end_time-start_time)));
printf("processes = %d: %d blocks and %d threads per block\n", blocksPerGrid*NUM_THREAD, blocksPerGrid, NUM_THREAD);
printf("NPTS = %d, NLOOP = %d, pi = %f\n", NPTS, NLOOP, pi/NLOOP);
printf("time = %f, estimated GFlops = %f\n",(end_time-start_time)/1.0e9,flops);
// Cleanup
free(sumHost);
cudaFree(sumDev);
return 0;
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment