diff --git a/chLib/chError_hip.h b/chLib/chError_hip.h index 7b47b23..79f5333 100644 --- a/chLib/chError_hip.h +++ b/chLib/chError_hip.h @@ -70,6 +70,11 @@ template hipError_t hipHostAlloc( T **pp, size_t N, unsigned int Fla template hipError_t hipHostGetDevicePointer( T **pp, void *p, unsigned int Flags ) { return hipHostGetDevicePointer( (void **) pp, p, Flags ); } + +template hipError_t hipMallocPitch( T **pp, size_t *pPitch, size_t WidthInBytes, size_t Height ) +{ + return hipMallocPitch( (void **) pp, pPitch, WidthInBytes, Height ); +} #endif // entry points @@ -79,17 +84,22 @@ template hipError_t hipHostGetDevicePointer( T **pp, void *p, unsign #define cudaHostGetDevicePointer hipHostGetDevicePointer #define cudaStreamDestroy hipStreamDestroy #define cudaEventDestroy hipEventDestroy +#define cudaFreeArray hipFreeArray #define cudaGetErrorString hipGetErrorString // data types typedef hipStream_t cudaStream_t; typedef hipDeviceProp_t cudaDeviceProp; +typedef hipArray cudaArray; + // defines #define cudaMemcpyHostToDevice hipMemcpyHostToDevice #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost #define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice +#define cudaFuncCachePreferShared hipFuncCachePreferShared + #define cudaHostAllocMapped 0 #define cudaHostAllocPortable 0 diff --git a/chLib/pgm.cu b/chLib/pgm.cu index f6dbd40..6d6f04d 100644 --- a/chLib/pgm.cu +++ b/chLib/pgm.cu @@ -37,6 +37,7 @@ #include #include #include +#include #include "pgm.h" int @@ -55,6 +56,7 @@ pgmLoad( unsigned char *idata = NULL; unsigned char *ddata = NULL; size_t dPitch; + cudaError_t status; fp = fopen( filename, "rb" ); if ( fp == NULL) { @@ -88,15 +90,14 @@ pgmLoad( if ( (size_t) w != fread( idata+row*padWidth, 1, w, fp ) ) goto Error; } - if ( cudaSuccess != cudaMallocPitch( (void **) &ddata, &dPitch, padWidth, padHeight ) ) - goto Error; + cuda(MallocPitch( (void **) &ddata, &dPitch, padWidth, padHeight ) ); *pWidth = padWidth; *pHeight = padHeight; *pHostPitch = padWidth; *pHostData = idata; *pDeviceData = ddata; *pDevicePitch = (unsigned int) dPitch; - cudaMemcpy2D( ddata, dPitch, idata, padWidth, padWidth, padHeight, cudaMemcpyHostToDevice ); + cuda(Memcpy2D( ddata, dPitch, idata, padWidth, padWidth, padHeight, cudaMemcpyHostToDevice )); fclose(fp); return 0; Error: diff --git a/histogram/histogram.cu b/histogram/histogram.cu index d0a3167..643b3e7 100644 --- a/histogram/histogram.cu +++ b/histogram/histogram.cu @@ -70,7 +70,9 @@ texture texImage; #include "histogramPerThread4x64.cuh" #include "histogramPerThread4x32.cuh" +#ifndef __HIPCC__ #include "histogramNPP.cuh" +#endif using namespace cudahandbook::threading; @@ -266,7 +268,7 @@ main(int argc, char *argv[]) char *outputFilename = NULL; cudaArray *pArrayImage = NULL; - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); + //cudaChannelFormatDesc desc = cudaCreateChannelDesc(); { g_numCPUCores = processorCount(); @@ -370,11 +372,12 @@ main(int argc, char *argv[]) } } - +#ifndef __HIPCC__ cuda(MallocArray( &pArrayImage, &desc, w, h ) ); cuda(MemcpyToArray( pArrayImage, 0, 0, hidata, w*h, cudaMemcpyHostToDevice ) ); cuda(BindTextureToArray( texImage, pArrayImage ) ); +#endif { cudaDeviceProp prop; @@ -451,8 +454,9 @@ main(int argc, char *argv[]) TEST_VECTOR( GPUhistogramPerThread4x32, false, 1, NULL ); TEST_VECTOR( GPUhistogramPerThread4x32_PeriodicMerge, false, 1, NULL ); } - +#ifndef __HIPCC__ TEST_VECTOR( GPUhistogramNPP, false, 1, NULL ); +#endif ret = 0; Error: diff --git a/histogram/histogramPerBlock.cuh b/histogram/histogramPerBlock.cuh index d32baec..13846ae 100644 --- a/histogram/histogramPerBlock.cuh +++ b/histogram/histogramPerBlock.cuh @@ -91,8 +91,8 @@ GPUhistogramPerBlock( cudaError_t status; cudaEvent_t start = 0, stop = 0; - cuda(EventCreate( &start, 0 ) ); - cuda(EventCreate( &stop, 0 ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); cuda(EventRecord( start, 0 ) ); //histogramPerBlock<<>>( pHist, x, y, w, h ); diff --git a/histogram/histogramPerBlockOffset.cuh b/histogram/histogramPerBlockOffset.cuh index d373ce6..6030371 100644 --- a/histogram/histogramPerBlockOffset.cuh +++ b/histogram/histogramPerBlockOffset.cuh @@ -99,8 +99,8 @@ GPUhistogramPerBlock( cudaError_t status; cudaEvent_t start = 0, stop = 0; - cuda(EventCreate( &start, 0 ) ); - cuda(EventCreate( &stop, 0 ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); cuda(EventRecord( start, 0 ) ); //histogramPerBlock<<>>( pHist, x, y, w, h ); diff --git a/histogram/histogramPerBlockReduce.cuh b/histogram/histogramPerBlockReduce.cuh index e006a29..5da535f 100644 --- a/histogram/histogramPerBlockReduce.cuh +++ b/histogram/histogramPerBlockReduce.cuh @@ -97,8 +97,8 @@ GPUhistogramPerBlockReduce( cudaEvent_t start = 0, stop = 0; - cuda(EventCreate( &start, 0 ) ); - cuda(EventCreate( &stop, 0 ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); cuda(EventRecord( start, 0 ) ); { diff --git a/histogram/histogramPerGrid.cuh b/histogram/histogramPerGrid.cuh index 2462a61..a40a88f 100644 --- a/histogram/histogramPerGrid.cuh +++ b/histogram/histogramPerGrid.cuh @@ -69,8 +69,8 @@ GPUhistogramPerGrid( cudaError_t status; cudaEvent_t start = 0, stop = 0; - cuda(EventCreate( &start, 0 ) ); - cuda(EventCreate( &stop, 0 ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); cuda(EventRecord( start, 0 ) ); // histogramPerGrid<<>>( pHist, w, h ); diff --git a/histogram/histogramPerThread4x32.cuh b/histogram/histogramPerThread4x32.cuh index b4f1e71..7ded062 100644 --- a/histogram/histogramPerThread4x32.cuh +++ b/histogram/histogramPerThread4x32.cuh @@ -154,8 +154,8 @@ GPUhistogramPerThread4x32( int numthreads = threads.x*threads.y; int numblocks = bPeriodicMerge ? 256 : INTDIVIDE_CEILING( w*h, numthreads*(255/4) ); - cuda(EventCreate( &start, 0 ) ); - cuda(EventCreate( &stop, 0 ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); cuda(Memset( pHist, 0, 256*sizeof(unsigned int) ) ); diff --git a/histogram/histogramPerThread4x64.cuh b/histogram/histogramPerThread4x64.cuh index d76b08e..aede69e 100644 --- a/histogram/histogramPerThread4x64.cuh +++ b/histogram/histogramPerThread4x64.cuh @@ -93,8 +93,8 @@ GPUhistogramPerThread4x64( int numthreads = threads.x*threads.y; int numblocks = bPeriodicMerge ? 256 : INTDIVIDE_CEILING( w*h, numthreads*(255/4) ); - cuda(EventCreate( &start, 0 ) ); - cuda(EventCreate( &stop, 0 ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); cuda(Memset( pHist, 0, 256*sizeof(unsigned int) ) ); diff --git a/histogram/histogramPerThread64.cuh b/histogram/histogramPerThread64.cuh index 427c080..fd8c45a 100644 --- a/histogram/histogramPerThread64.cuh +++ b/histogram/histogramPerThread64.cuh @@ -108,8 +108,8 @@ GPUhistogramPerThread64( int numthreads = threads.x*threads.y; int numblocks = INTDIVIDE_CEILING( w*h, numthreads*255 ); - cuda(EventCreate( &start, 0 ) ); - cuda(EventCreate( &stop, 0 ) ); + cuda(EventCreate( &start ) ); + cuda(EventCreate( &stop ) ); cuda(Memset( pHist, 0, 256*sizeof(unsigned int) ) ); diff --git a/scan/int/scan2Level.cuh b/scan/int/scan2Level.cuh index 10ad0d0..d079bce 100644 --- a/scan/int/scan2Level.cuh +++ b/scan/int/scan2Level.cuh @@ -168,11 +168,8 @@ scan2Level( T *out, const T *in, size_t N, int b ) cudaError_t status; T *gPartials = 0; - status = cudaGetSymbolAddress( - (void **) &gPartials, - g_globalPartials ); + cuda(GetSymbolAddress( (void **) &gPartials, g_globalPartials )); - if ( cudaSuccess == status ) { // // ceil(N/b) = number of partial sums to compute @@ -218,6 +215,7 @@ scan2Level( T *out, const T *in, size_t N, int b ) N, elementsPerPartial ); } +Error:; } template diff --git a/scan/int/testScan.cu b/scan/int/testScan.cu index b8cb0ee..38e99ee 100644 --- a/scan/int/testScan.cu +++ b/scan/int/testScan.cu @@ -163,7 +163,7 @@ main( int argc, char *argv[] ) { cudaDeviceProp prop; - cudaGetDeviceProperties( &prop, 0 ); + cuda(GetDeviceProperties( &prop, 0 )); maxThreads = prop.maxThreadsPerBlock; }