Browse Source

Get Reduction sample to build under HIP

get-last-error-is-asynchronous
Nicholas Wilt 3 years ago
parent
commit
96a8b8240b
  1. 12
      reduction/reduction.cu
  2. 4
      reduction/reduction5Atomics.cuh

12
reduction/reduction.cu

@ -59,10 +59,12 @@ typedef struct TimingResult_struct { @@ -59,10 +59,12 @@ typedef struct TimingResult_struct {
double
chEventBandwidth( cudaEvent_t start, cudaEvent_t stop, double cBytes )
{
cudaError_t status;
float ms;
if ( cudaSuccess != cudaEventElapsedTime( &ms, start, stop ) )
return 0.0;
cuda(EventElapsedTime( &ms, start, stop ) );
return cBytes * 1000.0f / ms;
Error:
return 0.0;
}
typedef void (*pfnReduction)(int *out, int *intermediateSums, const int *in, size_t N, int cBlocks, int cThreads);
@ -120,12 +122,13 @@ Shmoo( TimingResult *timingResult, @@ -120,12 +122,13 @@ Shmoo( TimingResult *timingResult,
bool bPrint, bool bPrintMax,
void (*pfnReduce)(int *out, int *intermediateSums, const int *in, size_t N, int cBlocks, int cThreads) )
{
cudaError_t status;
double maxBW = 0.0f;
int maxThreads;
int cBlocks = 1800;
cudaDeviceProp props;
cudaGetDeviceProperties( &props, 0 );
cuda(GetDeviceProperties( &props, 0 ));
for ( int cThreads = 128; cThreads <= props.maxThreadsPerBlock; cThreads*=2 ) {
int sum = 0;
double bw = TimedReduction( &sum, deviceData, cInts, cBlocks, cThreads, pfnReduce );
@ -147,6 +150,7 @@ Shmoo( TimingResult *timingResult, @@ -147,6 +150,7 @@ Shmoo( TimingResult *timingResult,
printf( "Max bandwidth of %.2f G/s attained by %d blocks "
"of %d threads\n", maxBW, cBlocks, maxThreads );
}
Error:;
}
double
@ -254,7 +258,7 @@ main( int argc, char *argv[] ) @@ -254,7 +258,7 @@ main( int argc, char *argv[] )
sum += hostData[i];
}
printf( "Testing on %d integers\n", cInts );
printf( "Testing on %d integers\n", (int) cInts );
printf( "\t\t" );
for ( int i = 128; i <= props.maxThreadsPerBlock; i *= 2 ) {
printf( "%d\t", i );

4
reduction/reduction5Atomics.cuh

@ -54,6 +54,8 @@ Reduction5( int *answer, int *partial, @@ -54,6 +54,8 @@ Reduction5( int *answer, int *partial,
const int *in, size_t N,
int numBlocks, int numThreads )
{
cudaMemset( answer, 0, sizeof(int) );
cudaError_t status;
cuda(Memset( answer, 0, sizeof(int) ));
Reduction5_kernel<<< numBlocks, numThreads>>>( answer, in, N );
Error:;
}

Loading…
Cancel
Save