Browse Source

Update a few apps to use HIP

hipcc
Nicholas Wilt 4 years ago committed by Nicholas Wilt
parent
commit
eafe3b9439
  1. 47
      chLib/chError.h
  2. 14
      concurrency/breakevenHtoDMemcpy.cu
  3. 13
      concurrency/nullKernelAsync.cu
  4. 18
      concurrency/nullKernelSync.cu
  5. 26
      microbench/globalRead.cu

47
chLib/chError.h

@ -39,7 +39,6 @@ @@ -39,7 +39,6 @@
*
*/
#ifndef __CHERROR_H__
#define __CHERROR_H__
@ -47,15 +46,27 @@ @@ -47,15 +46,27 @@
#include <stdio.h>
#endif
#include <chCUDA.h>
#ifdef __HIPCC__
#include <hip/hip_runtime.h>
#define cuda( fn ) do { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#else
#ifndef NO_CUDA
#include <chCUDA.h>
template<typename T>
inline const char *
chGetErrorString( T status )
{
return cudaGetErrorString(status);
return hipGetErrorString(status);
//return cudaGetErrorString(status);
}
template<>
@ -163,7 +174,7 @@ chGetErrorString( CUresult status ) @@ -163,7 +174,7 @@ chGetErrorString( CUresult status )
#ifdef DEBUG
#define CUDART_CHECK( fn ) do { \
(status) = (fn); \
if ( cudaSuccess != (status) ) { \
if ( hipSuccess != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
@ -172,9 +183,9 @@ chGetErrorString( CUresult status ) @@ -172,9 +183,9 @@ chGetErrorString( CUresult status )
} while (0);
#define cuda( fn ) do { \
(status) = (cuda##fn); \
if ( cudaSuccess != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t" \
(status) = (hip##fn); \
if ( hipSuccess != (status) ) { \
fprintf( stderr, "HIP Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
@ -182,7 +193,7 @@ chGetErrorString( CUresult status ) @@ -182,7 +193,7 @@ chGetErrorString( CUresult status )
} while (0);
#define cu( fn ) do { \
(status) = (cu##fn); \
(status) = (hip##fn); \
if ( CUDA_SUCCESS != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t%s "\
"returned 0x%x (%s)\n", \
@ -203,17 +214,16 @@ chGetErrorString( CUresult status ) @@ -203,17 +214,16 @@ chGetErrorString( CUresult status )
#else
#define CUDART_CHECK( fn ) do { \
status = (fn); \
if ( cudaSuccess != (status) ) { \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define cuda( fn ) do { \
status = (cuda##fn); \
if ( cudaSuccess != (status) ) { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
@ -244,14 +254,14 @@ chGetErrorString( T status ) @@ -244,14 +254,14 @@ chGetErrorString( T status )
return "CUDA support is not built in.";
}
static inline const char* cudaGetErrorString( cudaError_t error )
{
return "CUDA support is not built in.";
}
//static inline const char* cudaGetErrorString( hipError_t error )
//{
// return "CUDA support is not built in.";
//}
#define CUDART_CHECK( fn ) do { \
status = (fn); \
if ( cudaSuccess != (status) ) { \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
@ -266,3 +276,6 @@ static inline const char* cudaGetErrorString( cudaError_t error ) @@ -266,3 +276,6 @@ static inline const char* cudaGetErrorString( cudaError_t error )
#endif
#endif
#endif // __HIPCC__

14
concurrency/breakevenHtoDMemcpy.cu

@ -38,13 +38,17 @@ @@ -38,13 +38,17 @@
#include <stdio.h>
#ifdef __HIPCC__
#endif
#include "chError.h"
#include "chTimer.h"
int
main( int argc, char *argv[] )
{
cudaError_t status;
hipError_t status;
int *deviceInt = 0;
int *hostInt = 0;
const int cIterations = 100000;
@ -58,7 +62,7 @@ main( int argc, char *argv[] ) @@ -58,7 +62,7 @@ main( int argc, char *argv[] )
chTimerTimestamp start, stop;
cuda(Malloc( &deviceInt, numBytes ) );
cuda(HostAlloc( &hostInt, numBytes, 0 ) );
cuda(HostMalloc( (void **) &hostInt, numBytes, 0 ) );
for ( size_t byteCount = byteIncrement;
byteCount <= numBytes;
@ -68,7 +72,7 @@ main( int argc, char *argv[] ) @@ -68,7 +72,7 @@ main( int argc, char *argv[] )
chTimerGetTime( &start );
for ( int i = 0; i < cIterations; i++ ) {
cuda(MemcpyAsync( deviceInt, hostInt, byteCount,
cudaMemcpyHostToDevice, NULL ) );
hipMemcpyHostToDevice, NULL ) );
}
cuda(DeviceSynchronize() );
chTimerGetTime( &stop );
@ -81,8 +85,8 @@ main( int argc, char *argv[] ) @@ -81,8 +85,8 @@ main( int argc, char *argv[] )
}
cudaFree( deviceInt );
cudaFreeHost( hostInt );
hipFree( deviceInt );
hipHostFree( hostInt );
return 0;
Error:
printf( "Error performing allocation\n" );

13
concurrency/nullKernelAsync.cu

@ -38,9 +38,18 @@ @@ -38,9 +38,18 @@
#include <stdio.h>
#include "chError.h"
//#include "chError.h"
#include "chTimer.h"
#include <hip/hip_runtime.h>
#define cuda( fn ) do { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
__global__
void
NullKernel()
@ -50,7 +59,7 @@ NullKernel() @@ -50,7 +59,7 @@ NullKernel()
double
usPerLaunch( int cIterations )
{
cudaError_t status;
hipError_t status;
double microseconds, ret;
chTimerTimestamp start, stop;

18
concurrency/nullKernelSync.cu

@ -38,7 +38,17 @@ @@ -38,7 +38,17 @@
#include <stdio.h>
#include "chError.h"
//#include "chError.h"
#include <hip/hip_runtime.h>
#define cuda( fn ) do { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#include "chTimer.h"
__global__
@ -50,11 +60,11 @@ NullKernel() @@ -50,11 +60,11 @@ NullKernel()
double
usPerLaunch( int cIterations )
{
cudaError_t status;
hipError_t status;
double microseconds, ret;
chTimerTimestamp start, stop;
cuda(Free(0) );
hipFree(0);//(Free(0) );
chTimerGetTime( &start );
for ( int i = 0; i < cIterations; i++ ) {
@ -67,7 +77,7 @@ usPerLaunch( int cIterations ) @@ -67,7 +77,7 @@ usPerLaunch( int cIterations )
ret = microseconds / (float) cIterations;
Error:
return (status) ? 0.0 : ret;
return ret;//return (status) ? 0.0 : ret;
}
int

26
microbench/globalRead.cu

@ -131,10 +131,10 @@ BandwidthReads( size_t N, int cBlocks, int cThreads ) @@ -131,10 +131,10 @@ BandwidthReads( size_t N, int cBlocks, int cThreads )
double elapsedTime;
float ms;
int cIterations;
cudaError_t status;
hipError_t status;
T sumCPU;
cudaEvent_t evStart = 0;
cudaEvent_t evStop = 0;
hipEvent_t evStart = 0;
hipEvent_t evStop = 0;
cuda(Malloc( &in, N*sizeof(T) ) );
cuda(Malloc( &out, cBlocks*cThreads*sizeof(T) ) );
@ -154,14 +154,14 @@ BandwidthReads( size_t N, int cBlocks, int cThreads ) @@ -154,14 +154,14 @@ BandwidthReads( size_t N, int cBlocks, int cThreads )
hostIn[i] = nextrand;
}
cuda(Memcpy( in, hostIn, N*sizeof(T), cudaMemcpyHostToDevice ) );
cuda(Memcpy( in, hostIn, N*sizeof(T), hipMemcpyHostToDevice ) );
cuda(EventCreate( &evStart ) );
cuda(EventCreate( &evStop ) );
{
// confirm that kernel launch with this configuration writes correct result
GlobalReads<T,n><<<cBlocks,cThreads>>>( out, in+bOffset, N-bOffset, true );
cuda(Memcpy( hostOut, out, cBlocks*cThreads*sizeof(T), cudaMemcpyDeviceToHost ) );
cuda(Memcpy( hostOut, out, cBlocks*cThreads*sizeof(T), hipMemcpyDeviceToHost ) );
cuda(GetLastError() );
T sumGPU = T(0);
for ( size_t i = 0; i < cBlocks*cThreads; i++ ) {
@ -174,11 +174,11 @@ BandwidthReads( size_t N, int cBlocks, int cThreads ) @@ -174,11 +174,11 @@ BandwidthReads( size_t N, int cBlocks, int cThreads )
}
cIterations = 10;
cudaEventRecord( evStart );
cuda(EventRecord( evStart ) );
for ( int i = 0; i < cIterations; i++ ) {
GlobalReads<T,n><<<cBlocks,cThreads>>>( out, in+bOffset, N-bOffset, false );
}
cudaEventRecord( evStop );
cuda(EventRecord( evStop ) );
cuda(DeviceSynchronize() );
// make configurations that cannot launch error-out with 0 bandwidth
cuda(GetLastError() );
@ -193,10 +193,10 @@ BandwidthReads( size_t N, int cBlocks, int cThreads ) @@ -193,10 +193,10 @@ BandwidthReads( size_t N, int cBlocks, int cThreads )
Error:
if ( hostIn ) delete[] hostIn;
if ( hostOut ) delete[] hostOut;
cudaEventDestroy( evStart );
cudaEventDestroy( evStop );
cudaFree( in );
cudaFree( out );
hipEventDestroy( evStart );
hipEventDestroy( evStop );
hipFree( in );
hipFree( out );
return ret;
}
@ -252,10 +252,10 @@ Shmoo( size_t N, size_t threadStart, size_t threadStop, size_t cBlocks ) @@ -252,10 +252,10 @@ Shmoo( size_t N, size_t threadStart, size_t threadStop, size_t cBlocks )
int
main( int argc, char *argv[] )
{
cudaError_t status;
hipError_t status;
int device = 0;
int size = 16;
cudaDeviceProp prop;
hipDeviceProp_t prop;
if ( chCommandLineGet( &device, "device", argc, argv ) ) {
printf( "Using device %d...\n", device );
}

Loading…
Cancel
Save