From 096e5506f05ca4431c0676b18a3abef24517f66d Mon Sep 17 00:00:00 2001 From: Nicholas Wilt Date: Fri, 2 Dec 2016 19:46:08 -0600 Subject: [PATCH] Port reduction sample code to new error handling --- reduction/reduction.cu | 32 ++++++++++++++++---------------- reduction/reductionTemplated.cu | 32 ++++++++++++++++---------------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/reduction/reduction.cu b/reduction/reduction.cu index 247b869..c8d9802 100644 --- a/reduction/reduction.cu +++ b/reduction/reduction.cu @@ -81,13 +81,13 @@ TimedReduction( cudaEvent_t stop = 0; cudaError_t status; - CUDART_CHECK( cudaMalloc( &deviceAnswer, sizeof(int) ) ); - CUDART_CHECK( cudaMalloc( &partialSums, cBlocks*sizeof(int) ) ); - CUDART_CHECK( cudaEventCreate( &start ) ); - CUDART_CHECK( cudaEventCreate( &stop ) ); - CUDART_CHECK( cudaThreadSynchronize() ); + cuda(Malloc( &deviceAnswer, sizeof(int) ) ); + cuda(Malloc( &partialSums, cBlocks*sizeof(int) ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); + cuda(ThreadSynchronize() ); - CUDART_CHECK( cudaEventRecord( start, 0 ) ); + cuda(EventRecord( start, 0 ) ); hostReduction( deviceAnswer, partialSums, @@ -95,8 +95,8 @@ TimedReduction( N, cBlocks, cThreads ); - CUDART_CHECK( cudaEventRecord( stop, 0 ) ); - CUDART_CHECK( cudaMemcpy( + cuda(EventRecord( stop, 0 ) ); + cuda(Memcpy( answer, deviceAnswer, sizeof(int), @@ -159,13 +159,13 @@ usPerInvocation( int cIterations, size_t N, double ret = 0.0f; chTimerTimestamp start, stop; - CUDART_CHECK( cudaMalloc( &smallArray, N*sizeof(int) ) ); - CUDART_CHECK( cudaMalloc( &partialSums, 1*sizeof(int) ) ); + cuda(Malloc( &smallArray, N*sizeof(int) ) ); + cuda(Malloc( &partialSums, 1*sizeof(int) ) ); chTimerGetTime( &start ); for ( int i = 0; i < cIterations; i++ ) { pfnReduction( partialSums, partialSums, smallArray, N, 1, 256 ); } - CUDART_CHECK( cudaThreadSynchronize() ); + cuda(ThreadSynchronize() ); chTimerGetTime( &stop ); ret = chTimerElapsedTime( &start, &stop ); ret = (ret / (double) cIterations) * 1e6; @@ -209,10 +209,10 @@ main( int argc, char *argv[] ) hostData = (int *) malloc( cInts*sizeof(int) ); if ( ! hostData ) goto Error; - CUDART_CHECK( cudaSetDevice( device ) ); - CUDART_CHECK( cudaSetDeviceFlags( cudaDeviceMapHost ) ); - CUDART_CHECK( cudaMalloc( &deviceData, cInts*sizeof(int) ) ); - CUDART_CHECK( cudaGetDeviceProperties( &props, 0 ) ); + cuda(SetDevice( device ) ); + cuda(SetDeviceFlags( cudaDeviceMapHost ) ); + cuda(Malloc( &deviceData, cInts*sizeof(int) ) ); + cuda(GetDeviceProperties( &props, 0 ) ); sum = 0; for ( size_t i = 0; i < cInts; i++ ) { @@ -220,7 +220,7 @@ main( int argc, char *argv[] ) sum += value; hostData[i] = value; } - CUDART_CHECK( cudaMemcpy( deviceData, hostData, cInts*sizeof(int), + cuda(Memcpy( deviceData, hostData, cInts*sizeof(int), cudaMemcpyHostToDevice ) ); { diff --git a/reduction/reductionTemplated.cu b/reduction/reductionTemplated.cu index 4af9a52..8be6a23 100644 --- a/reduction/reductionTemplated.cu +++ b/reduction/reductionTemplated.cu @@ -125,18 +125,18 @@ TimedReduction( cudaEvent_t stop = 0; cudaError_t status; - CUDART_CHECK( cudaMalloc( &deviceAnswer, sizeof(ReductionType) ) ); - CUDART_CHECK( cudaMalloc( &partialSums, cBlocks*sizeof(ReductionType) ) ); - CUDART_CHECK( cudaEventCreate( &start ) ); - CUDART_CHECK( cudaEventCreate( &stop ) ); - CUDART_CHECK( cudaThreadSynchronize() ); + cuda(Malloc( &deviceAnswer, sizeof(ReductionType) ) ); + cuda(Malloc( &partialSums, cBlocks*sizeof(ReductionType) ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); + cuda(ThreadSynchronize() ); - CUDART_CHECK( cudaEventRecord( start, 0 ) ); + cuda(EventRecord( start, 0 ) ); hostReduction( deviceAnswer, partialSums, deviceIn, N, cBlocks, cThreads ); - CUDART_CHECK( cudaEventRecord( stop, 0 ) ); - CUDART_CHECK( cudaMemcpy( answer, deviceAnswer, sizeof(T), cudaMemcpyDeviceToHost ) ); + cuda(EventRecord( stop, 0 ) ); + cuda(Memcpy( answer, deviceAnswer, sizeof(T), cudaMemcpyDeviceToHost ) ); - CUDART_CHECK( cudaEventElapsedTime( &ms, start, stop ) ) + cuda(EventElapsedTime( &ms, start, stop ) ) ret = ms * 1000.0f; // fall through to free resources before returning @@ -203,13 +203,13 @@ usPerInvocation( int cIterations, size_t N, double ret = 0.0f; chTimerTimestamp start, stop; - CUDART_CHECK( cudaMalloc( &smallArray, N*sizeof(T) ) ); - CUDART_CHECK( cudaMalloc( &partialSums, 1*sizeof(ReductionType) ) ); + cuda(Malloc( &smallArray, N*sizeof(T) ) ); + cuda(Malloc( &partialSums, 1*sizeof(ReductionType) ) ); chTimerGetTime( &start ); for ( int i = 0; i < cIterations; i++ ) { pfnReduction( partialSums, partialSums, smallArray, N, 1, 256 ); } - CUDART_CHECK( cudaThreadSynchronize() ); + cuda(ThreadSynchronize() ); chTimerGetTime( &stop ); ret = chTimerElapsedTime( &start, &stop ); ret = (ret / (double) cIterations) * 1e6; @@ -246,10 +246,10 @@ ShmooReport( size_t N, bool bFloat ) hostData = (T *) malloc( N*sizeof(T) ); if ( ! hostData ) goto Error; - CUDART_CHECK( cudaSetDeviceFlags( cudaDeviceMapHost ) ); - CUDART_CHECK( cudaMalloc( &deviceData, N*sizeof(T) ) ); - CUDART_CHECK( cudaGetDeviceProperties( &props, 0 ) ); - CUDART_CHECK( cudaMemcpy( deviceData, hostData, N*sizeof(T), cudaMemcpyHostToDevice ) ); + cuda(SetDeviceFlags( cudaDeviceMapHost ) ); + cuda(Malloc( &deviceData, N*sizeof(T) ) ); + cuda(GetDeviceProperties( &props, 0 ) ); + cuda(Memcpy( deviceData, hostData, N*sizeof(T), cudaMemcpyHostToDevice ) ); forkPrint( g_fileShmoo, "N\tThreads(1)\tus(1)\t" "Threads(2)\tus(2)\t"