Commit d913aa21 authored by Neil Gershenfeld's avatar Neil Gershenfeld
Browse files

wip

parent d6068c8f
Pipeline #16409 passed with stage
in 1 second
......@@ -35,7 +35,7 @@ void reduce(double *arr) {
uint64_t len = npts >> 1;
while (1) {
reduce_sum<<<blocks,threads>>>(arr,len);
cudaCheck("reduce");
cudaCheck("reduce_sum");
len = len >> 1;
if (len == 0)
return;
......@@ -50,10 +50,12 @@ int main(void) {
cudaCheck("init");
reduce(darr);
cudaDeviceSynchronize();
cudaCheck("cudaDeviceSynchronize");
auto tend = std::chrono::high_resolution_clock::now();
auto dt = std::chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count();
auto mflops = npts*nloop*5.0/dt;
cudaMemcpy(harr,darr,8,cudaMemcpyDeviceToHost);
cudaCheck("cudaMemcpy");
printf("npts = %ld, nloop = %ld, pi = %lf\n",npts,nloop,harr[0]);
printf("time = %f, estimated MFlops = %f\n",1e-6*dt,mflops);
cudaFree(darr);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment