Browse Source

Port reduction sample code to new error handling

divergence
Nicholas Wilt 9 years ago
parent
commit
096e5506f0
  1. 32
      reduction/reduction.cu
  2. 32
      reduction/reductionTemplated.cu

32
reduction/reduction.cu

@ -81,13 +81,13 @@ TimedReduction(
cudaEvent_t stop = 0; cudaEvent_t stop = 0;
cudaError_t status; cudaError_t status;
CUDART_CHECK( cudaMalloc( &deviceAnswer, sizeof(int) ) ); cuda(Malloc( &deviceAnswer, sizeof(int) ) );
CUDART_CHECK( cudaMalloc( &partialSums, cBlocks*sizeof(int) ) ); cuda(Malloc( &partialSums, cBlocks*sizeof(int) ) );
CUDART_CHECK( cudaEventCreate( &start ) ); cuda(EventCreate( &start ) );
CUDART_CHECK( cudaEventCreate( &stop ) ); cuda(EventCreate( &stop ) );
CUDART_CHECK( cudaThreadSynchronize() ); cuda(ThreadSynchronize() );
CUDART_CHECK( cudaEventRecord( start, 0 ) ); cuda(EventRecord( start, 0 ) );
hostReduction( hostReduction(
deviceAnswer, deviceAnswer,
partialSums, partialSums,
@ -95,8 +95,8 @@ TimedReduction(
N, N,
cBlocks, cBlocks,
cThreads ); cThreads );
CUDART_CHECK( cudaEventRecord( stop, 0 ) ); cuda(EventRecord( stop, 0 ) );
CUDART_CHECK( cudaMemcpy( cuda(Memcpy(
answer, answer,
deviceAnswer, deviceAnswer,
sizeof(int), sizeof(int),
@ -159,13 +159,13 @@ usPerInvocation( int cIterations, size_t N,
double ret = 0.0f; double ret = 0.0f;
chTimerTimestamp start, stop; chTimerTimestamp start, stop;
CUDART_CHECK( cudaMalloc( &smallArray, N*sizeof(int) ) ); cuda(Malloc( &smallArray, N*sizeof(int) ) );
CUDART_CHECK( cudaMalloc( &partialSums, 1*sizeof(int) ) ); cuda(Malloc( &partialSums, 1*sizeof(int) ) );
chTimerGetTime( &start ); chTimerGetTime( &start );
for ( int i = 0; i < cIterations; i++ ) { for ( int i = 0; i < cIterations; i++ ) {
pfnReduction( partialSums, partialSums, smallArray, N, 1, 256 ); pfnReduction( partialSums, partialSums, smallArray, N, 1, 256 );
} }
CUDART_CHECK( cudaThreadSynchronize() ); cuda(ThreadSynchronize() );
chTimerGetTime( &stop ); chTimerGetTime( &stop );
ret = chTimerElapsedTime( &start, &stop ); ret = chTimerElapsedTime( &start, &stop );
ret = (ret / (double) cIterations) * 1e6; ret = (ret / (double) cIterations) * 1e6;
@ -209,10 +209,10 @@ main( int argc, char *argv[] )
hostData = (int *) malloc( cInts*sizeof(int) ); hostData = (int *) malloc( cInts*sizeof(int) );
if ( ! hostData ) if ( ! hostData )
goto Error; goto Error;
CUDART_CHECK( cudaSetDevice( device ) ); cuda(SetDevice( device ) );
CUDART_CHECK( cudaSetDeviceFlags( cudaDeviceMapHost ) ); cuda(SetDeviceFlags( cudaDeviceMapHost ) );
CUDART_CHECK( cudaMalloc( &deviceData, cInts*sizeof(int) ) ); cuda(Malloc( &deviceData, cInts*sizeof(int) ) );
CUDART_CHECK( cudaGetDeviceProperties( &props, 0 ) ); cuda(GetDeviceProperties( &props, 0 ) );
sum = 0; sum = 0;
for ( size_t i = 0; i < cInts; i++ ) { for ( size_t i = 0; i < cInts; i++ ) {
@ -220,7 +220,7 @@ main( int argc, char *argv[] )
sum += value; sum += value;
hostData[i] = value; hostData[i] = value;
} }
CUDART_CHECK( cudaMemcpy( deviceData, hostData, cInts*sizeof(int), cuda(Memcpy( deviceData, hostData, cInts*sizeof(int),
cudaMemcpyHostToDevice ) ); cudaMemcpyHostToDevice ) );
{ {

32
reduction/reductionTemplated.cu

@ -125,18 +125,18 @@ TimedReduction(
cudaEvent_t stop = 0; cudaEvent_t stop = 0;
cudaError_t status; cudaError_t status;
CUDART_CHECK( cudaMalloc( &deviceAnswer, sizeof(ReductionType) ) ); cuda(Malloc( &deviceAnswer, sizeof(ReductionType) ) );
CUDART_CHECK( cudaMalloc( &partialSums, cBlocks*sizeof(ReductionType) ) ); cuda(Malloc( &partialSums, cBlocks*sizeof(ReductionType) ) );
CUDART_CHECK( cudaEventCreate( &start ) ); cuda(EventCreate( &start ) );
CUDART_CHECK( cudaEventCreate( &stop ) ); cuda(EventCreate( &stop ) );
CUDART_CHECK( cudaThreadSynchronize() ); cuda(ThreadSynchronize() );
CUDART_CHECK( cudaEventRecord( start, 0 ) ); cuda(EventRecord( start, 0 ) );
hostReduction( deviceAnswer, partialSums, deviceIn, N, cBlocks, cThreads ); hostReduction( deviceAnswer, partialSums, deviceIn, N, cBlocks, cThreads );
CUDART_CHECK( cudaEventRecord( stop, 0 ) ); cuda(EventRecord( stop, 0 ) );
CUDART_CHECK( cudaMemcpy( answer, deviceAnswer, sizeof(T), cudaMemcpyDeviceToHost ) ); cuda(Memcpy( answer, deviceAnswer, sizeof(T), cudaMemcpyDeviceToHost ) );
CUDART_CHECK( cudaEventElapsedTime( &ms, start, stop ) ) cuda(EventElapsedTime( &ms, start, stop ) )
ret = ms * 1000.0f; ret = ms * 1000.0f;
// fall through to free resources before returning // fall through to free resources before returning
@ -203,13 +203,13 @@ usPerInvocation( int cIterations, size_t N,
double ret = 0.0f; double ret = 0.0f;
chTimerTimestamp start, stop; chTimerTimestamp start, stop;
CUDART_CHECK( cudaMalloc( &smallArray, N*sizeof(T) ) ); cuda(Malloc( &smallArray, N*sizeof(T) ) );
CUDART_CHECK( cudaMalloc( &partialSums, 1*sizeof(ReductionType) ) ); cuda(Malloc( &partialSums, 1*sizeof(ReductionType) ) );
chTimerGetTime( &start ); chTimerGetTime( &start );
for ( int i = 0; i < cIterations; i++ ) { for ( int i = 0; i < cIterations; i++ ) {
pfnReduction( partialSums, partialSums, smallArray, N, 1, 256 ); pfnReduction( partialSums, partialSums, smallArray, N, 1, 256 );
} }
CUDART_CHECK( cudaThreadSynchronize() ); cuda(ThreadSynchronize() );
chTimerGetTime( &stop ); chTimerGetTime( &stop );
ret = chTimerElapsedTime( &start, &stop ); ret = chTimerElapsedTime( &start, &stop );
ret = (ret / (double) cIterations) * 1e6; ret = (ret / (double) cIterations) * 1e6;
@ -246,10 +246,10 @@ ShmooReport( size_t N, bool bFloat )
hostData = (T *) malloc( N*sizeof(T) ); hostData = (T *) malloc( N*sizeof(T) );
if ( ! hostData ) if ( ! hostData )
goto Error; goto Error;
CUDART_CHECK( cudaSetDeviceFlags( cudaDeviceMapHost ) ); cuda(SetDeviceFlags( cudaDeviceMapHost ) );
CUDART_CHECK( cudaMalloc( &deviceData, N*sizeof(T) ) ); cuda(Malloc( &deviceData, N*sizeof(T) ) );
CUDART_CHECK( cudaGetDeviceProperties( &props, 0 ) ); cuda(GetDeviceProperties( &props, 0 ) );
CUDART_CHECK( cudaMemcpy( deviceData, hostData, N*sizeof(T), cudaMemcpyHostToDevice ) ); cuda(Memcpy( deviceData, hostData, N*sizeof(T), cudaMemcpyHostToDevice ) );
forkPrint( g_fileShmoo, "N\tThreads(1)\tus(1)\t" forkPrint( g_fileShmoo, "N\tThreads(1)\tus(1)\t"
"Threads(2)\tus(2)\t" "Threads(2)\tus(2)\t"

Loading…
Cancel
Save