Browse Source

Get Reduction sample to build under HIP

get-last-error-is-asynchronous
Nicholas Wilt 3 years ago
parent
commit
96a8b8240b
  1. 12
      reduction/reduction.cu
  2. 4
      reduction/reduction5Atomics.cuh

12
reduction/reduction.cu

@ -59,10 +59,12 @@ typedef struct TimingResult_struct {
double double
chEventBandwidth( cudaEvent_t start, cudaEvent_t stop, double cBytes ) chEventBandwidth( cudaEvent_t start, cudaEvent_t stop, double cBytes )
{ {
cudaError_t status;
float ms; float ms;
if ( cudaSuccess != cudaEventElapsedTime( &ms, start, stop ) ) cuda(EventElapsedTime( &ms, start, stop ) );
return 0.0;
return cBytes * 1000.0f / ms; return cBytes * 1000.0f / ms;
Error:
return 0.0;
} }
typedef void (*pfnReduction)(int *out, int *intermediateSums, const int *in, size_t N, int cBlocks, int cThreads); typedef void (*pfnReduction)(int *out, int *intermediateSums, const int *in, size_t N, int cBlocks, int cThreads);
@ -120,12 +122,13 @@ Shmoo( TimingResult *timingResult,
bool bPrint, bool bPrintMax, bool bPrint, bool bPrintMax,
void (*pfnReduce)(int *out, int *intermediateSums, const int *in, size_t N, int cBlocks, int cThreads) ) void (*pfnReduce)(int *out, int *intermediateSums, const int *in, size_t N, int cBlocks, int cThreads) )
{ {
cudaError_t status;
double maxBW = 0.0f; double maxBW = 0.0f;
int maxThreads; int maxThreads;
int cBlocks = 1800; int cBlocks = 1800;
cudaDeviceProp props; cudaDeviceProp props;
cudaGetDeviceProperties( &props, 0 ); cuda(GetDeviceProperties( &props, 0 ));
for ( int cThreads = 128; cThreads <= props.maxThreadsPerBlock; cThreads*=2 ) { for ( int cThreads = 128; cThreads <= props.maxThreadsPerBlock; cThreads*=2 ) {
int sum = 0; int sum = 0;
double bw = TimedReduction( &sum, deviceData, cInts, cBlocks, cThreads, pfnReduce ); double bw = TimedReduction( &sum, deviceData, cInts, cBlocks, cThreads, pfnReduce );
@ -147,6 +150,7 @@ Shmoo( TimingResult *timingResult,
printf( "Max bandwidth of %.2f G/s attained by %d blocks " printf( "Max bandwidth of %.2f G/s attained by %d blocks "
"of %d threads\n", maxBW, cBlocks, maxThreads ); "of %d threads\n", maxBW, cBlocks, maxThreads );
} }
Error:;
} }
double double
@ -254,7 +258,7 @@ main( int argc, char *argv[] )
sum += hostData[i]; sum += hostData[i];
} }
printf( "Testing on %d integers\n", cInts ); printf( "Testing on %d integers\n", (int) cInts );
printf( "\t\t" ); printf( "\t\t" );
for ( int i = 128; i <= props.maxThreadsPerBlock; i *= 2 ) { for ( int i = 128; i <= props.maxThreadsPerBlock; i *= 2 ) {
printf( "%d\t", i ); printf( "%d\t", i );

4
reduction/reduction5Atomics.cuh

@ -54,6 +54,8 @@ Reduction5( int *answer, int *partial,
const int *in, size_t N, const int *in, size_t N,
int numBlocks, int numThreads ) int numBlocks, int numThreads )
{ {
cudaMemset( answer, 0, sizeof(int) ); cudaError_t status;
cuda(Memset( answer, 0, sizeof(int) ));
Reduction5_kernel<<< numBlocks, numThreads>>>( answer, in, N ); Reduction5_kernel<<< numBlocks, numThreads>>>( answer, in, N );
Error:;
} }

Loading…
Cancel
Save