Browse Source

Separate CUDA and HIP specific wrappers in chError.h

hipcc
Nicholas Wilt 3 years ago
parent
commit
4a9666b6ed
  1. 255
      chLib/chError.h
  2. 233
      chLib/chError_cuda.h
  3. 187
      chLib/chError_hip.h
  4. 8
      nbody2/bodybodyInteraction.cuh
  5. 26
      nbody2/nbody.cu
  6. 2
      nbody2/nbody.h

255
chLib/chError.h

@ -10,7 +10,7 @@ @@ -10,7 +10,7 @@
* * The more-concise formulation of these macros is due to
* Allan MacKinnon.
*
* Copyright (c) 2011-2016, Archaea Software, LLC.
* Copyright (c) 2011-2022, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -47,254 +47,13 @@ @@ -47,254 +47,13 @@
#endif
#ifdef __HIPCC__
#include <hip/hip_runtime.h>
#define cuda( fn ) do { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define CUDART_CHECK( fn ) do { \
status = (fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
typedef hipEvent_t cudaEvent_t;
typedef hipError_t cudaError_t;
#ifdef __cplusplus
template<typename T> hipError_t hipHostAlloc( T **pp, size_t N, unsigned int Flags ) {
return hipHostMalloc( (void **) pp, N, Flags );
}
template<typename T> hipError_t hipHostGetDevicePointer( T **pp, void *p, unsigned int Flags ) {
return hipHostGetDevicePointer( (void **) pp, p, Flags );
}
#endif
// entry points
#define cudaDeviceMapHost hipDeviceMapHost
#define cudaFree hipFree
#define cudaHostFree hipHostFree
#define cudaHostGetDevicePointer hipHostGetDevicePointer
#define cudaStreamDestroy hipStreamDestroy
#define cudaEventDestroy hipEventDestroy
// data types
typedef hipStream_t cudaStream_t;
// defines
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaHostAllocMapped 0
// error defines
#define cudaSuccess hipSuccess
#define cudaErrorUnknown hipErrorUnknown
#define cudaErrorInvalidValue hipErrorInvalidValue
#define cudaErrorMemoryAllocation hipErrorMemoryAllocation
#include "chError_hip.h"
#else
#ifndef NO_CUDA
#include <chCUDA.h>
template<typename T>
inline const char *
chGetErrorString( T status )
{
#ifdef __HIPCC__
return hipGetErrorString(status);
#else
return cudaGetErrorString(status);
#endif
}
template<>
inline const char *
chGetErrorString( CUresult status )
{
switch ( status ) {
#define ErrorValue(Define) case Define: return #Define;
ErrorValue(CUDA_SUCCESS)
ErrorValue(CUDA_ERROR_INVALID_VALUE)
ErrorValue(CUDA_ERROR_OUT_OF_MEMORY)
ErrorValue(CUDA_ERROR_NOT_INITIALIZED)
ErrorValue(CUDA_ERROR_DEINITIALIZED)
ErrorValue(CUDA_ERROR_PROFILER_DISABLED)
ErrorValue(CUDA_ERROR_PROFILER_NOT_INITIALIZED)
ErrorValue(CUDA_ERROR_PROFILER_ALREADY_STARTED)
ErrorValue(CUDA_ERROR_PROFILER_ALREADY_STOPPED)
ErrorValue(CUDA_ERROR_NO_DEVICE)
ErrorValue(CUDA_ERROR_INVALID_DEVICE)
ErrorValue(CUDA_ERROR_INVALID_IMAGE)
ErrorValue(CUDA_ERROR_INVALID_CONTEXT)
ErrorValue(CUDA_ERROR_CONTEXT_ALREADY_CURRENT)
ErrorValue(CUDA_ERROR_MAP_FAILED)
ErrorValue(CUDA_ERROR_UNMAP_FAILED)
ErrorValue(CUDA_ERROR_ARRAY_IS_MAPPED)
ErrorValue(CUDA_ERROR_ALREADY_MAPPED)
ErrorValue(CUDA_ERROR_NO_BINARY_FOR_GPU)
ErrorValue(CUDA_ERROR_ALREADY_ACQUIRED)
ErrorValue(CUDA_ERROR_NOT_MAPPED)
ErrorValue(CUDA_ERROR_NOT_MAPPED_AS_ARRAY)
ErrorValue(CUDA_ERROR_NOT_MAPPED_AS_POINTER)
ErrorValue(CUDA_ERROR_ECC_UNCORRECTABLE)
ErrorValue(CUDA_ERROR_UNSUPPORTED_LIMIT)
ErrorValue(CUDA_ERROR_CONTEXT_ALREADY_IN_USE)
ErrorValue(CUDA_ERROR_INVALID_SOURCE)
ErrorValue(CUDA_ERROR_FILE_NOT_FOUND)
ErrorValue(CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND)
ErrorValue(CUDA_ERROR_SHARED_OBJECT_INIT_FAILED)
ErrorValue(CUDA_ERROR_OPERATING_SYSTEM)
ErrorValue(CUDA_ERROR_INVALID_HANDLE)
ErrorValue(CUDA_ERROR_NOT_FOUND)
ErrorValue(CUDA_ERROR_NOT_READY)
ErrorValue(CUDA_ERROR_LAUNCH_FAILED)
ErrorValue(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES)
ErrorValue(CUDA_ERROR_LAUNCH_TIMEOUT)
ErrorValue(CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING)
ErrorValue(CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED)
ErrorValue(CUDA_ERROR_PEER_ACCESS_NOT_ENABLED)
ErrorValue(CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE)
ErrorValue(CUDA_ERROR_CONTEXT_IS_DESTROYED)
#if CUDA_VERSION >= 4010
ErrorValue(CUDA_ERROR_ASSERT)
ErrorValue(CUDA_ERROR_TOO_MANY_PEERS)
ErrorValue(CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED)
ErrorValue(CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED)
#include "chError_cuda.h"
#endif
ErrorValue(CUDA_ERROR_STUB_LIBRARY)
ErrorValue(CUDA_ERROR_PEER_ACCESS_UNSUPPORTED)
ErrorValue(CUDA_ERROR_DEVICE_NOT_LICENSED)
ErrorValue(CUDA_ERROR_INVALID_PTX)
ErrorValue(CUDA_ERROR_INVALID_GRAPHICS_CONTEXT)
ErrorValue(CUDA_ERROR_NVLINK_UNCORRECTABLE)
ErrorValue(CUDA_ERROR_JIT_COMPILER_NOT_FOUND)
ErrorValue(CUDA_ERROR_JIT_COMPILATION_DISABLED)
ErrorValue(CUDA_ERROR_UNSUPPORTED_PTX_VERSION)
ErrorValue(CUDA_ERROR_ILLEGAL_STATE)
ErrorValue(CUDA_ERROR_ILLEGAL_ADDRESS)
ErrorValue(CUDA_ERROR_HARDWARE_STACK_ERROR)
ErrorValue(CUDA_ERROR_ILLEGAL_INSTRUCTION)
ErrorValue(CUDA_ERROR_MISALIGNED_ADDRESS)
ErrorValue(CUDA_ERROR_INVALID_ADDRESS_SPACE)
ErrorValue(CUDA_ERROR_INVALID_PC)
ErrorValue(CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE)
ErrorValue(CUDA_ERROR_NOT_PERMITTED)
ErrorValue(CUDA_ERROR_NOT_SUPPORTED)
ErrorValue(CUDA_ERROR_SYSTEM_NOT_READY)
ErrorValue(CUDA_ERROR_SYSTEM_DRIVER_MISMATCH)
ErrorValue(CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_INVALIDATED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_MERGE)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_UNMATCHED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_UNJOINED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_ISOLATION)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_IMPLICIT)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD)
ErrorValue(CUDA_ERROR_TIMEOUT)
ErrorValue(CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE)
ErrorValue(CUDA_ERROR_CAPTURED_EVENT)
ErrorValue(CUDA_ERROR_UNKNOWN)
}
return "chGetErrorString - unknown error value";
}
//
// To use these macros, a local cudaError_t or CUresult called 'status'
// and a label Error: must be defined. In the debug build, the code will
// emit an error to stderr. In both debug and retail builds, the code will
// goto Error if there is an error.
//
#ifdef DEBUG
#define CUDART_CHECK( fn ) do { \
(status) = (fn); \
if ( hipSuccess != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define cuda( fn ) do { \
(status) = (hip##fn); \
if ( hipSuccess != (status) ) { \
fprintf( stderr, "HIP Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define cu( fn ) do { \
(status) = (hip##fn); \
if ( CUDA_SUCCESS != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t%s "\
"returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define CUDA_CHECK( fn ) do { \
(status) = (fn); \
if ( CUDA_SUCCESS != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t%s "\
"returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#else
#define CUDART_CHECK( fn ) do { \
status = (fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define cuda( fn ) do { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define CUDA_CHECK( fn ) do { \
(status) = (fn); \
if ( CUDA_SUCCESS != (status) ) { \
goto Error; \
} \
} while (0);
#define cu( fn ) do { \
(status) = (cu##fn); \
if ( CUDA_SUCCESS != (status) ) { \
goto Error; \
} \
} while (0);
#endif
#else
#if 0
//#else
template<typename T>
inline const char *
@ -324,7 +83,5 @@ chGetErrorString( T status ) @@ -324,7 +83,5 @@ chGetErrorString( T status )
#endif
#endif
#endif // __HIPCC__
#endif // __CHERROR_H__

233
chLib/chError_cuda.h

@ -0,0 +1,233 @@ @@ -0,0 +1,233 @@
/*
*
* chError_cuda.h
*
* Error handling for CUDA:
* cu() and cuda() macros implement goto-based error
* error handling *, and
* chGetErrorString() maps a driver API error to a string.
*
* * The more-concise formulation of these macros is due to
* Allan MacKinnon.
*
* Copyright (c) 2011-2016, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef __CHERROR_CUDA_H__
#define __CHERROR_CUDA_H__
#ifndef NO_CUDA
#include <chCUDA.h>
template<typename T>
inline const char *
chGetErrorString( T status )
{
return cudaGetErrorString(status);
}
template<>
inline const char *
chGetErrorString( CUresult status )
{
switch ( status ) {
#define ErrorValue(Define) case Define: return #Define;
ErrorValue(CUDA_SUCCESS)
ErrorValue(CUDA_ERROR_INVALID_VALUE)
ErrorValue(CUDA_ERROR_OUT_OF_MEMORY)
ErrorValue(CUDA_ERROR_NOT_INITIALIZED)
ErrorValue(CUDA_ERROR_DEINITIALIZED)
ErrorValue(CUDA_ERROR_PROFILER_DISABLED)
ErrorValue(CUDA_ERROR_PROFILER_NOT_INITIALIZED)
ErrorValue(CUDA_ERROR_PROFILER_ALREADY_STARTED)
ErrorValue(CUDA_ERROR_PROFILER_ALREADY_STOPPED)
ErrorValue(CUDA_ERROR_NO_DEVICE)
ErrorValue(CUDA_ERROR_INVALID_DEVICE)
ErrorValue(CUDA_ERROR_INVALID_IMAGE)
ErrorValue(CUDA_ERROR_INVALID_CONTEXT)
ErrorValue(CUDA_ERROR_CONTEXT_ALREADY_CURRENT)
ErrorValue(CUDA_ERROR_MAP_FAILED)
ErrorValue(CUDA_ERROR_UNMAP_FAILED)
ErrorValue(CUDA_ERROR_ARRAY_IS_MAPPED)
ErrorValue(CUDA_ERROR_ALREADY_MAPPED)
ErrorValue(CUDA_ERROR_NO_BINARY_FOR_GPU)
ErrorValue(CUDA_ERROR_ALREADY_ACQUIRED)
ErrorValue(CUDA_ERROR_NOT_MAPPED)
ErrorValue(CUDA_ERROR_NOT_MAPPED_AS_ARRAY)
ErrorValue(CUDA_ERROR_NOT_MAPPED_AS_POINTER)
ErrorValue(CUDA_ERROR_ECC_UNCORRECTABLE)
ErrorValue(CUDA_ERROR_UNSUPPORTED_LIMIT)
ErrorValue(CUDA_ERROR_CONTEXT_ALREADY_IN_USE)
ErrorValue(CUDA_ERROR_INVALID_SOURCE)
ErrorValue(CUDA_ERROR_FILE_NOT_FOUND)
ErrorValue(CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND)
ErrorValue(CUDA_ERROR_SHARED_OBJECT_INIT_FAILED)
ErrorValue(CUDA_ERROR_OPERATING_SYSTEM)
ErrorValue(CUDA_ERROR_INVALID_HANDLE)
ErrorValue(CUDA_ERROR_NOT_FOUND)
ErrorValue(CUDA_ERROR_NOT_READY)
ErrorValue(CUDA_ERROR_LAUNCH_FAILED)
ErrorValue(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES)
ErrorValue(CUDA_ERROR_LAUNCH_TIMEOUT)
ErrorValue(CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING)
ErrorValue(CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED)
ErrorValue(CUDA_ERROR_PEER_ACCESS_NOT_ENABLED)
ErrorValue(CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE)
ErrorValue(CUDA_ERROR_CONTEXT_IS_DESTROYED)
#if CUDA_VERSION >= 4010
ErrorValue(CUDA_ERROR_ASSERT)
ErrorValue(CUDA_ERROR_TOO_MANY_PEERS)
ErrorValue(CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED)
ErrorValue(CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED)
#endif
ErrorValue(CUDA_ERROR_STUB_LIBRARY)
ErrorValue(CUDA_ERROR_PEER_ACCESS_UNSUPPORTED)
ErrorValue(CUDA_ERROR_DEVICE_NOT_LICENSED)
ErrorValue(CUDA_ERROR_INVALID_PTX)
ErrorValue(CUDA_ERROR_INVALID_GRAPHICS_CONTEXT)
ErrorValue(CUDA_ERROR_NVLINK_UNCORRECTABLE)
ErrorValue(CUDA_ERROR_JIT_COMPILER_NOT_FOUND)
ErrorValue(CUDA_ERROR_JIT_COMPILATION_DISABLED)
ErrorValue(CUDA_ERROR_UNSUPPORTED_PTX_VERSION)
ErrorValue(CUDA_ERROR_ILLEGAL_STATE)
ErrorValue(CUDA_ERROR_ILLEGAL_ADDRESS)
ErrorValue(CUDA_ERROR_HARDWARE_STACK_ERROR)
ErrorValue(CUDA_ERROR_ILLEGAL_INSTRUCTION)
ErrorValue(CUDA_ERROR_MISALIGNED_ADDRESS)
ErrorValue(CUDA_ERROR_INVALID_ADDRESS_SPACE)
ErrorValue(CUDA_ERROR_INVALID_PC)
ErrorValue(CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE)
ErrorValue(CUDA_ERROR_NOT_PERMITTED)
ErrorValue(CUDA_ERROR_NOT_SUPPORTED)
ErrorValue(CUDA_ERROR_SYSTEM_NOT_READY)
ErrorValue(CUDA_ERROR_SYSTEM_DRIVER_MISMATCH)
ErrorValue(CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_INVALIDATED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_MERGE)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_UNMATCHED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_UNJOINED)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_ISOLATION)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_IMPLICIT)
ErrorValue(CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD)
ErrorValue(CUDA_ERROR_TIMEOUT)
ErrorValue(CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE)
ErrorValue(CUDA_ERROR_CAPTURED_EVENT)
ErrorValue(CUDA_ERROR_UNKNOWN)
}
return "chGetErrorString - unknown error value";
}
//
// To use these macros, a local cudaError_t or CUresult called 'status'
// and a label Error: must be defined. In the debug build, the code will
// emit an error to stderr. In both debug and retail builds, the code will
// goto Error if there is an error.
//
#ifdef DEBUG
#define CUDART_CHECK( fn ) do { \
(status) = (fn); \
if ( cudaSuccess != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define cuda( fn ) do { \
(status) = (cuda##fn); \
if ( cudaSuccess != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define cu( fn ) do { \
(status) = (cuda##fn); \
if ( CUDA_SUCCESS != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t%s "\
"returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define CUDA_CHECK( fn ) do { \
(status) = (fn); \
if ( CUDA_SUCCESS != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t%s "\
"returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#else
#define CUDART_CHECK( fn ) do { \
status = (fn); \
if ( cudaSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define cuda( fn ) do { \
status = (cuda##fn); \
if ( cudaSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define CUDA_CHECK( fn ) do { \
(status) = (fn); \
if ( CUDA_SUCCESS != (status) ) { \
goto Error; \
} \
} while (0);
#define cu( fn ) do { \
(status) = (cu##fn); \
if ( CUDA_SUCCESS != (status) ) { \
goto Error; \
} \
} while (0);
#endif // NO_CUDA
#endif
#endif // __CHERROR_CUDA_H__

187
chLib/chError_hip.h

@ -0,0 +1,187 @@ @@ -0,0 +1,187 @@
/*
*
* chError_hip.h
*
* Error handling for HIP:
* cu() and cuda() macros implement goto-based error
* error handling *, and
* chGetErrorString() maps a driver API error to a string.
*
* * The more-concise formulation of these macros is due to
* Allan MacKinnon.
*
* Copyright (c) 2011-2022, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef __CHERROR_HIP_H__
#define __CHERROR_HIP_H__
#include <hip/hip_runtime.h>
#define cuda( fn ) do { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define CUDART_CHECK( fn ) do { \
status = (fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
typedef hipEvent_t cudaEvent_t;
typedef hipError_t cudaError_t;
#ifdef __cplusplus
template<typename T> hipError_t hipHostAlloc( T **pp, size_t N, unsigned int Flags ) {
return hipHostMalloc( (void **) pp, N, Flags );
}
template<typename T> hipError_t hipHostGetDevicePointer( T **pp, void *p, unsigned int Flags ) {
return hipHostGetDevicePointer( (void **) pp, p, Flags );
}
#endif
// entry points
#define cudaDeviceMapHost hipDeviceMapHost
#define cudaFree hipFree
#define cudaHostFree hipHostFree
#define cudaHostGetDevicePointer hipHostGetDevicePointer
#define cudaStreamDestroy hipStreamDestroy
#define cudaEventDestroy hipEventDestroy
#define cudaGetErrorString hipGetErrorString
// data types
typedef hipStream_t cudaStream_t;
typedef hipDeviceProp_t cudaDeviceProp;
// defines
#define cudaMemcpyHostToDevice hipMemcpyHostToDevice
#define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost
#define cudaMemcpyDeviceToDevice hipMemcpyDeviceToDevice
#define cudaHostAllocMapped 0
#define cudaHostAllocPortable 0
// error defines
#define cudaSuccess hipSuccess
#define cudaErrorUnknown hipErrorUnknown
#define cudaErrorInvalidValue hipErrorInvalidValue
#define cudaErrorMemoryAllocation hipErrorMemoryAllocation
template<typename T>
inline const char *
chGetErrorString( T status )
{
return hipGetErrorString(status);
}
// To use these macros, a local cudaError_t or CUresult called 'status'
// and a label Error: must be defined. In the debug build, the code will
// emit an error to stderr. In both debug and retail builds, the code will
// goto Error if there is an error.
//
#ifdef DEBUG
#define CUDART_CHECK( fn ) do { \
(status) = (fn); \
if ( hipSuccess != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define cuda( fn ) do { \
(status) = (hip##fn); \
if ( hipSuccess != (status) ) { \
fprintf( stderr, "HIP Runtime Failure (line %d of file %s):\n\t" \
"%s returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define cu( fn ) do { \
(status) = (hip##fn); \
if ( CUDA_SUCCESS != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t%s "\
"returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#define CUDA_CHECK( fn ) do { \
(status) = (fn); \
if ( CUDA_SUCCESS != (status) ) { \
fprintf( stderr, "CUDA Runtime Failure (line %d of file %s):\n\t%s "\
"returned 0x%x (%s)\n", \
__LINE__, __FILE__, #fn, status, chGetErrorString(status) ); \
goto Error; \
} \
} while (0);
#else
#define CUDART_CHECK( fn ) do { \
status = (fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define cuda( fn ) do { \
status = (hip##fn); \
if ( hipSuccess != (status) ) { \
goto Error; \
} \
} while (0);
#define CUDA_CHECK( fn ) do { \
(status) = (fn); \
if ( CUDA_SUCCESS != (status) ) { \
goto Error; \
} \
} while (0);
#define cu( fn ) do { \
(status) = (cu##fn); \
if ( CUDA_SUCCESS != (status) ) { \
goto Error; \
} \
} while (0);
#endif
#endif // __CHERROR_HIP_H__

8
nbody2/bodybodyInteraction.cuh

@ -37,6 +37,14 @@ @@ -37,6 +37,14 @@
#ifndef __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__
#define __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__
#ifdef __HIPCC__
__host__ float
rsqrtf( float f )
{
return 1.0f / sqrtf( f );
}
#endif
template <typename T>
__host__ __device__ void bodyBodyInteraction(
T *fx, T *fy, T *fz,

26
nbody2/nbody.cu

@ -1059,7 +1059,7 @@ main( int argc, char *argv[] ) @@ -1059,7 +1059,7 @@ main( int argc, char *argv[] )
}
#endif
status = cudaGetDeviceCount( &g_numGPUs );
cuda(GetDeviceCount( &g_numGPUs ) );
g_bCUDAPresent = (cudaSuccess == status) && (g_numGPUs > 0);
if ( g_bCUDAPresent ) {
cudaDeviceProp prop;
@ -1236,30 +1236,14 @@ main( int argc, char *argv[] ) @@ -1236,30 +1236,14 @@ main( int argc, char *argv[] )
g_maxAlgorithm = multiGPU_MultiCPUThread;
}
gpuAlgo = new NBodyAlgorithm_FMA<float>;
//gpuAlgo = new NBodyAlgorithm_FMA<float>;
//gpuAlgo = new NBodyAlgorithm_AVX<float>;
//gpuAlgo = new NBodyAlgorithm_SSE<float>;
//gpuAlgo = new NBodyAlgorithm_SOA<float>;
//gpuAlgo = new NBodyAlgorithm_GPU<float>;
gpuAlgo = new NBodyAlgorithm_GPU<float>;
if ( ! gpuAlgo->Initialize( g_N, seed, g_softening ) )
goto Error;
#if 0
cuda(HostAlloc( (void **) &g_hostAOS_PosMass, 4*g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
for ( int i = 0; i < 3; i++ ) {
cuda(HostAlloc( (void **) &g_hostSOA_Pos[i], g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
cuda(HostAlloc( (void **) &g_hostSOA_Force[i], g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
}
cuda(HostAlloc( (void **) &g_hostAOS_Force, 3*g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
cuda(HostAlloc( (void **) &g_hostAOS_Force_Golden, 3*g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
cuda(HostAlloc( (void **) &g_hostAOS_VelInvMass, 4*g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
cuda(HostAlloc( (void **) &g_hostSOA_Mass, g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
cuda(HostAlloc( (void **) &g_hostSOA_InvMass, g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
cuda(Malloc( &g_dptrAOS_PosMass, 4*g_N*sizeof(float) ) );
cuda(Malloc( (void **) &g_dptrAOS_Force, 3*g_N*sizeof(float) ) );
#endif
if ( g_bGPUCrossCheck ) {
printf( "GPU cross check enabled (%d GPUs), disabling CPU\n", g_numGPUs );
g_bNoCPU = true;
@ -1269,7 +1253,11 @@ main( int argc, char *argv[] ) @@ -1269,7 +1253,11 @@ main( int argc, char *argv[] )
goto Error;
}
for ( int i = 0; i < g_numGPUs; i++ ) {
#ifdef __HIPCC__
cuda(HostMalloc( (void **) (&g_hostAOS_gpuCrossCheckForce[i]), 3*g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
#else
cuda(HostAlloc( (void **) (&g_hostAOS_gpuCrossCheckForce[i]), 3*g_N*sizeof(float), cudaHostAllocPortable|cudaHostAllocMapped ) );
#endif
}
}
}

2
nbody2/nbody.h

@ -39,7 +39,7 @@ @@ -39,7 +39,7 @@
//#include "nbody_CPU_SIMD.h"
#include <chThread.h>
#include <thrust/host_vector.h>
//#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
extern bool g_bCUDAPresent;

Loading…
Cancel
Save