diff --git a/chLib/chError_hip.h b/chLib/chError_hip.h
index 7b47b23..79f5333 100644
--- a/chLib/chError_hip.h
+++ b/chLib/chError_hip.h
@@ -70,6 +70,11 @@ template<typename T> hipError_t hipHostAlloc( T **pp, size_t N, unsigned int Fla
 template<typename T> hipError_t hipHostGetDevicePointer( T **pp, void *p, unsigned int Flags ) {
     return hipHostGetDevicePointer( (void **) pp, p, Flags );
 }
+
+template<typename T> hipError_t hipMallocPitch( T **pp, size_t *pPitch, size_t WidthInBytes, size_t Height )
+{
+    return hipMallocPitch( (void **) pp, pPitch, WidthInBytes, Height );
+}
 #endif
 
 // entry points
@@ -79,17 +84,22 @@ template<typename T> hipError_t hipHostGetDevicePointer( T **pp, void *p, unsign
 #define cudaHostGetDevicePointer hipHostGetDevicePointer
 #define cudaStreamDestroy hipStreamDestroy
 #define cudaEventDestroy hipEventDestroy
+#define cudaFreeArray hipFreeArray
 #define cudaGetErrorString hipGetErrorString
 
 // data types
 typedef hipStream_t cudaStream_t;
 typedef hipDeviceProp_t cudaDeviceProp;
+typedef hipArray cudaArray;
+
 
 // defines
 #define cudaMemcpyHostToDevice hipMemcpyHostToDevice
 #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
 #define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
 
+#define cudaFuncCachePreferShared hipFuncCachePreferShared
+
 #define cudaHostAllocMapped 0
 #define cudaHostAllocPortable 0
 
diff --git a/chLib/pgm.cu b/chLib/pgm.cu
index f6dbd40..6d6f04d 100644
--- a/chLib/pgm.cu
+++ b/chLib/pgm.cu
@@ -37,6 +37,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
+#include <chError.h>
 #include "pgm.h"
 
 int
@@ -55,6 +56,7 @@ pgmLoad(
     unsigned char *idata = NULL;
     unsigned char *ddata = NULL;
     size_t dPitch;
+    cudaError_t status;
 
     fp = fopen( filename, "rb" );
     if ( fp == NULL) {
@@ -88,15 +90,14 @@ pgmLoad(
         if ( (size_t) w != fread( idata+row*padWidth, 1, w, fp ) )
             goto Error;
     }
-    if ( cudaSuccess != cudaMallocPitch( (void **) &ddata, &dPitch, padWidth, padHeight ) )
-        goto Error;
+    cuda(MallocPitch( (void **) &ddata, &dPitch, padWidth, padHeight ) );
     *pWidth = padWidth;
     *pHeight = padHeight;
     *pHostPitch = padWidth;
     *pHostData = idata;
     *pDeviceData = ddata;
     *pDevicePitch = (unsigned int) dPitch;
-    cudaMemcpy2D( ddata, dPitch, idata, padWidth, padWidth, padHeight, cudaMemcpyHostToDevice );
+    cuda(Memcpy2D( ddata, dPitch, idata, padWidth, padWidth, padHeight, cudaMemcpyHostToDevice ));
     fclose(fp);
     return 0;
 Error:
diff --git a/histogram/histogram.cu b/histogram/histogram.cu
index d0a3167..643b3e7 100644
--- a/histogram/histogram.cu
+++ b/histogram/histogram.cu
@@ -70,7 +70,9 @@ texture<unsigned char, 2> texImage;
 #include "histogramPerThread4x64.cuh"
 #include "histogramPerThread4x32.cuh"
 
+#ifndef __HIPCC__
 #include "histogramNPP.cuh"
+#endif
 
 using namespace cudahandbook::threading;
 
@@ -266,7 +268,7 @@ main(int argc, char *argv[])
     char *outputFilename = NULL;
 
     cudaArray *pArrayImage = NULL;
-    cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
+    //cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
 
     {
         g_numCPUCores = processorCount();
@@ -370,11 +372,12 @@ main(int argc, char *argv[])
         }
     }
 
-
+#ifndef __HIPCC__
     cuda(MallocArray( &pArrayImage, &desc, w, h ) );
     cuda(MemcpyToArray( pArrayImage, 0, 0, hidata, w*h, cudaMemcpyHostToDevice ) );
         
     cuda(BindTextureToArray( texImage, pArrayImage ) );
+#endif
 
     {
         cudaDeviceProp prop;
@@ -451,8 +454,9 @@ main(int argc, char *argv[])
         TEST_VECTOR( GPUhistogramPerThread4x32, false, 1, NULL );
         TEST_VECTOR( GPUhistogramPerThread4x32_PeriodicMerge, false, 1, NULL );
     }
-
+#ifndef __HIPCC__
     TEST_VECTOR( GPUhistogramNPP, false, 1, NULL );
+#endif
 
     ret = 0;
 Error:
diff --git a/histogram/histogramPerBlock.cuh b/histogram/histogramPerBlock.cuh
index d32baec..13846ae 100644
--- a/histogram/histogramPerBlock.cuh
+++ b/histogram/histogramPerBlock.cuh
@@ -91,8 +91,8 @@ GPUhistogramPerBlock(
     cudaError_t status;
     cudaEvent_t start = 0, stop = 0;
 
-    cuda(EventCreate( &start, 0 ) );
-    cuda(EventCreate( &stop, 0 ) );
+    cuda(EventCreate( &start ) );
+    cuda(EventCreate( &stop ) );
 
     cuda(EventRecord( start, 0 ) );
     //histogramPerBlock<<<blocks,threads>>>( pHist, x, y, w, h );
diff --git a/histogram/histogramPerBlockOffset.cuh b/histogram/histogramPerBlockOffset.cuh
index d373ce6..6030371 100644
--- a/histogram/histogramPerBlockOffset.cuh
+++ b/histogram/histogramPerBlockOffset.cuh
@@ -99,8 +99,8 @@ GPUhistogramPerBlock(
     cudaError_t status;
     cudaEvent_t start = 0, stop = 0;
 
-    cuda(EventCreate( &start, 0 ) );
-    cuda(EventCreate( &stop, 0 ) );
+    cuda(EventCreate( &start ) );
+    cuda(EventCreate( &stop ) );
 
     cuda(EventRecord( start, 0 ) );
     //histogramPerBlock<<<blocks,threads>>>( pHist, x, y, w, h );
diff --git a/histogram/histogramPerBlockReduce.cuh b/histogram/histogramPerBlockReduce.cuh
index e006a29..5da535f 100644
--- a/histogram/histogramPerBlockReduce.cuh
+++ b/histogram/histogramPerBlockReduce.cuh
@@ -97,8 +97,8 @@ GPUhistogramPerBlockReduce(
     cudaEvent_t start = 0, stop = 0;
     
 
-    cuda(EventCreate( &start, 0 ) );
-    cuda(EventCreate( &stop, 0 ) );
+    cuda(EventCreate( &start ) );
+    cuda(EventCreate( &stop ) );
 
     cuda(EventRecord( start, 0 ) );
     {
diff --git a/histogram/histogramPerGrid.cuh b/histogram/histogramPerGrid.cuh
index 2462a61..a40a88f 100644
--- a/histogram/histogramPerGrid.cuh
+++ b/histogram/histogramPerGrid.cuh
@@ -69,8 +69,8 @@ GPUhistogramPerGrid(
     cudaError_t status;
     cudaEvent_t start = 0, stop = 0;
 
-    cuda(EventCreate( &start, 0 ) );
-    cuda(EventCreate( &stop, 0 ) );
+    cuda(EventCreate( &start ) );
+    cuda(EventCreate( &stop ) );
 
     cuda(EventRecord( start, 0 ) );
 //    histogramPerGrid<<<blocks,threads>>>( pHist, w, h );
diff --git a/histogram/histogramPerThread4x32.cuh b/histogram/histogramPerThread4x32.cuh
index b4f1e71..7ded062 100644
--- a/histogram/histogramPerThread4x32.cuh
+++ b/histogram/histogramPerThread4x32.cuh
@@ -154,8 +154,8 @@ GPUhistogramPerThread4x32(
     int numthreads = threads.x*threads.y;
     int numblocks = bPeriodicMerge ? 256 : INTDIVIDE_CEILING( w*h, numthreads*(255/4) );
 
-    cuda(EventCreate( &start, 0 ) );
-    cuda(EventCreate( &stop, 0 ) );
+    cuda(EventCreate( &start ) );
+    cuda(EventCreate( &stop ) );
 
     cuda(Memset( pHist, 0, 256*sizeof(unsigned int) ) );
 
diff --git a/histogram/histogramPerThread4x64.cuh b/histogram/histogramPerThread4x64.cuh
index d76b08e..aede69e 100644
--- a/histogram/histogramPerThread4x64.cuh
+++ b/histogram/histogramPerThread4x64.cuh
@@ -93,8 +93,8 @@ GPUhistogramPerThread4x64(
     int numthreads = threads.x*threads.y;
     int numblocks = bPeriodicMerge ? 256 : INTDIVIDE_CEILING( w*h, numthreads*(255/4) );
 
-    cuda(EventCreate( &start, 0 ) );
-    cuda(EventCreate( &stop, 0 ) );
+    cuda(EventCreate( &start ) );
+    cuda(EventCreate( &stop ) );
 
     cuda(Memset( pHist, 0, 256*sizeof(unsigned int) ) );
 
diff --git a/histogram/histogramPerThread64.cuh b/histogram/histogramPerThread64.cuh
index 427c080..fd8c45a 100644
--- a/histogram/histogramPerThread64.cuh
+++ b/histogram/histogramPerThread64.cuh
@@ -108,8 +108,8 @@ GPUhistogramPerThread64(
     int numthreads = threads.x*threads.y;
     int numblocks = INTDIVIDE_CEILING( w*h, numthreads*255 );
 
-    cuda(EventCreate( &start, 0 ) );
-    cuda(EventCreate( &stop, 0 ) );
+    cuda(EventCreate( &start ) );
+    cuda(EventCreate( &stop ) );
 
     cuda(Memset( pHist, 0, 256*sizeof(unsigned int) ) );
 
diff --git a/scan/int/scan2Level.cuh b/scan/int/scan2Level.cuh
index 10ad0d0..d079bce 100644
--- a/scan/int/scan2Level.cuh
+++ b/scan/int/scan2Level.cuh
@@ -168,11 +168,8 @@ scan2Level( T *out, const T *in, size_t N, int b )
 
     cudaError_t status;
     T *gPartials = 0;
-    status = cudaGetSymbolAddress( 
-                (void **) &gPartials, 
-                g_globalPartials );
+    cuda(GetSymbolAddress( (void **) &gPartials, g_globalPartials ));
 
-    if ( cudaSuccess ==  status )
     {
         //
         // ceil(N/b) = number of partial sums to compute
@@ -218,6 +215,7 @@ scan2Level( T *out, const T *in, size_t N, int b )
             N, 
             elementsPerPartial );
     }
+Error:;
 }
 
 template<class T>
diff --git a/scan/int/testScan.cu b/scan/int/testScan.cu
index b8cb0ee..38e99ee 100644
--- a/scan/int/testScan.cu
+++ b/scan/int/testScan.cu
@@ -163,7 +163,7 @@ main( int argc, char *argv[] )
 
     {
         cudaDeviceProp prop;
-        cudaGetDeviceProperties( &prop, 0 );
+        cuda(GetDeviceProperties( &prop, 0 ));
         maxThreads = prop.maxThreadsPerBlock;
     }