Commit fde4cf7c authored by Neil Gershenfeld's avatar Neil Gershenfeld
Browse files

wip

parent 64e3c3be
Pipeline #16405 passed with stage
in 1 second
......@@ -7,10 +7,27 @@
#include <iostream>
#include <chrono>
#include <cstdint>
#include <string>
uint64_t blocks = 1024;
uint64_t threads = 1024;
uint64_t nloop = 1000000;
uint64_t npts = blocks*threads;
void cudaCheck(string msg) {
cudaError err;
err = cudaGetLastError();
if (cudaSuccess != err)
cerr << msg << ": " << cudaGetErrorString(err) << endl;
}
void reduce(double *arr) {
uint64_t len = npts >> 1;
while (1) {
reduce_sum<<<blocks,threads>>>(arr,len);
cudaCheck("reduce");
len = len >> 1;
if (len == 0)
return;
}
}
__global__ void init(double *arr,uint64_t nloop) {
uint64_t i = blockIdx.x*blockDim.x+threadIdx.x;
uint64_t start = nloop*i+1;
......@@ -24,22 +41,6 @@ __global__ void reduce_sum(double *arr,uint64_t len) {
if (i < len)
arr[i] += arr[i+len];
}
void reduce(double *arr) {
uint64_t len = npts >> 1;
while (1) {
reduce_sum<<<blocks,threads>>>(arr,len);
cudaCheck("reduce");
len = len >> 1;
if (len == 0)
return;
}
}
void cudaCheck(string msg) {
cudaError err;
err = cudaGetLastError();
if (cudaSuccess != err)
cerr << msg << ": " << cudaGetErrorString(err) << endl;
}
int main(void) {
double harr[1],*darr;
cudaMalloc(&darr,npts*sizeof(double));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment