Browse Source

Checkpoint working version

hipcc
Nicholas Wilt 3 years ago
parent
commit
647f8b2161
  1. 4
      nbody/nbody.cu
  2. 68
      nbody2/bodybodyInteraction.cuh
  3. 216
      nbody2/nbody.h
  4. 84
      nbody2/nbody_CPU_AOS.cpp
  5. 42
      nbody2/nbody_CPU_AOS.h
  6. 94
      nbody2/nbody_GPU_AOS.cuh

4
nbody/nbody.cu

@ -563,6 +563,10 @@ main( int argc, char *argv[] )
cuda(GetDeviceProperties( &prop, 0 ) ); cuda(GetDeviceProperties( &prop, 0 ) );
g_bSM30Present = prop.major >= 3; g_bSM30Present = prop.major >= 3;
} }
else {
fprintf( stderr, "nbody: no GPUs\n" );
exit(1);
}
g_bNoCPU = chCommandLineGetBool( "nocpu", argc, argv ); g_bNoCPU = chCommandLineGetBool( "nocpu", argc, argv );
if ( g_bNoCPU && ! g_bCUDAPresent ) { if ( g_bNoCPU && ! g_bCUDAPresent ) {
printf( "--nocpu specified, but no CUDA present...exiting\n" ); printf( "--nocpu specified, but no CUDA present...exiting\n" );

68
nbody2/bodybodyInteraction.cuh

@ -0,0 +1,68 @@
/*
*
* bodybodyInteraction.cuh
*
* CUDA header for function to compute body-body interaction.
* Also compatible with scalar (non-SIMD) CPU implementations.
*
* Copyright (c) 2011-2012, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__
#define __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__
template <typename T>
__host__ __device__ void bodyBodyInteraction(
T *fx, T *fy, T *fz,
T x0, T y0, T z0,
T x1, T y1, T z1, T mass1,
T softeningSquared)
{
T dx = x1 - x0;
T dy = y1 - y0;
T dz = z1 - z0;
T distSqr = dx*dx + dy*dy + dz*dz;
distSqr += softeningSquared;
//
// rsqrtf() maps to SFU instruction - to support
// double, this has to be changed.
//
T invDist = rsqrtf(distSqr);
T invDistCube = invDist * invDist * invDist;
T s = mass1 * invDistCube;
*fx = dx * s;
*fy = dy * s;
*fz = dz * s;
}
#endif

216
nbody2/nbody.h

@ -0,0 +1,216 @@
/*
*
* nbody.h
*
* Header file to declare globals in nbody.cu
*
* Copyright (c) 2011-2012, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef __CUDAHANDBOOK_NBODY_H__
#define __CUDAHANDBOOK_NBODY_H__
//#include "nbody_CPU_SIMD.h"
#include <chThread.h>
extern bool g_bCUDAPresent;
extern bool g_bGPUCrossCheck;
extern bool g_GPUCrosscheck;
#define NBODY_GOLDENFILE_VERSION 0x100
extern FILE *g_fGPUCrosscheckInput;
extern FILE *g_fGPUCrosscheckOutput;
extern float *g_hostAOS_PosMass;
extern float *g_hostAOS_VelInvMass;
extern float *g_hostAOS_Force;
// for GPU cross-check
const int g_maxGPUs = 32;
extern float *g_hostAOS_gpuCrossCheckForce[g_maxGPUs];
extern float *g_dptrAOS_PosMass;
extern float *g_dptrAOS_Force;
// Buffer to hold the golden version of the forces, used for comparison
// Along with timing results, we report the maximum relative error with
// respect to this array.
extern float *g_hostAOS_Force_Golden;
extern float *g_hostSOA_Pos[3];
extern float *g_hostSOA_Force[3];
extern float *g_hostSOA_Mass;
extern float *g_hostSOA_InvMass;
extern size_t g_N;
extern float g_softening;
extern float g_damping;
extern float g_dt;
template<typename T>
struct Body {
T x_, y_, z_, mass_;
};
template<typename T>
class NBodyAlgorithm {
public:
inline NBodyAlgorithm<T>() { }
virtual ~NBodyAlgorithm<T>() { }
virtual bool Initialize( size_t N );
size_t N() const { return N_; }
virtual Body<T> getBody( size_t i) const = 0;
private:
size_t N_;
};
template<typename T>
class NBodyAlgorithm_SOA : public NBodyAlgorithm<T> {
public:
NBodyAlgorithm_SOA<T>() { x_ = y_ = z_ = mass_ = nullptr; }
virtual ~NBodyAlgorithm_SOA<T>();
virtual bool Initialize( size_t N );
virtual Body<T> getBody( size_t i ) const;
private:
T *x_, *y_, *z_, *mass_;
};
template<typename T>
inline
NBodyAlgorithm_SOA<T>::~NBodyAlgorithm_SOA()
{
free( x_ );
free( y_ );
free( z_ );
free( mass_ );
}
template<typename T>
inline bool
NBodyAlgorithm_SOA<T>::Initialize( size_t N )
{
NBodyAlgorithm<T>::Initialize( N );
x_ = (T *) aligned_alloc( 64, N*sizeof(T) );
y_ = (T *) aligned_alloc( 64, N*sizeof(T) );
z_ = (T *) aligned_alloc( 64, N*sizeof(T) );
mass_ = (T *) aligned_alloc( 64, N*sizeof(T) );
if ( nullptr == x_ || nullptr==y_ || nullptr==z_ || nullptr==mass_ )
goto Error;
return true;
Error:
free( x_ );
free( y_ );
free( z_ );
free( mass_ );
return false;
}
enum nbodyAlgorithm_enum {
CPU_AOS = 0, /* This is the golden implementation */
CPU_AOS_tiled,
CPU_SOA,
#ifdef HAVE_SIMD
CPU_SIMD,
#endif
#ifdef HAVE_SIMD_THREADED
CPU_SIMD_threaded,
#endif
#ifdef HAVE_SIMD_OPENMP
CPU_SIMD_openmp,
#endif
GPU_AOS,
GPU_Shared,
GPU_Const,
multiGPU_SingleCPUThread,
multiGPU_MultiCPUThread,
// SM 3.0 only
GPU_Shuffle,
GPU_AOS_tiled,
GPU_AOS_tiled_const,
// GPU_Atomic
};
static const char *rgszAlgorithmNames[] = {
"CPU_AOS",
"CPU_AOS_tiled",
"CPU_SOA",
#ifdef HAVE_SIMD
"CPU_SIMD",
#endif
#ifdef HAVE_SIMD_THREADED
"CPU_SIMD_threaded",
#endif
#ifdef HAVE_SIMD_OPENMP
"CPU_SIMD_openmp",
#endif
"GPU_AOS",
"GPU_Shared",
"GPU_Const",
"multiGPU_SingleCPUThread",
"multiGPU_MultiCPUThread",
// SM 3.0 only
"GPU_Shuffle",
"GPU_AOS_tiled",
"GPU_AOS_tiled_const",
// "GPU_Atomic"
};
extern const char *rgszAlgorithmNames[];
extern enum nbodyAlgorithm_enum g_Algorithm;
//
// g_maxAlgorithm is used to determine when to rotate g_Algorithm back to CPU_AOS
// If CUDA is present, it is CPU_SIMD_threaded, otherwise GPU_Shuffle
// The CPU and GPU algorithms must be contiguous, and the logic in main() to
// initialize this value must be modified if any new algorithms are added.
//
extern enum nbodyAlgorithm_enum g_maxAlgorithm;
extern bool g_bCrossCheck;
extern bool g_bNoCPU;
extern cudahandbook::threading::workerThread *g_CPUThreadPool;
extern int g_numCPUCores;
extern int g_numGPUs;
extern cudahandbook::threading::workerThread *g_GPUThreadPool;
extern float ComputeGravitation_GPU_Shared ( float *force, float *posMass, float softeningSquared, size_t N );
extern float ComputeGravitation_multiGPU_singlethread( float *force, float *posMass, float softeningSquared, size_t N );
extern float ComputeGravitation_multiGPU_threaded ( float *force, float *posMass, float softeningSquared, size_t N );
#endif

84
nbody2/nbody_CPU_AOS.cpp

@ -0,0 +1,84 @@
/*
*
* nbody_CPU_AOS.h
*
* Scalar CPU implementation of the O(N^2) N-body calculation.
*
* Copyright (c) 2011-2012, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef NO_CUDA
#define NO_CUDA
#endif
#include <chCUDA.h>
#include <chTimer.h>
#include "bodybodyInteraction.cuh"
float
ComputeGravitation_AOS(
float *force,
float *posMass,
float softeningSquared,
size_t N
)
{
chTimerTimestamp start, end;
chTimerGetTime( &start );
for ( size_t i = 0; i < N; i++ )
{
float acc[3] = {0, 0, 0};
float myX = posMass[i*4+0];
float myY = posMass[i*4+1];
float myZ = posMass[i*4+2];
for ( size_t j = 0; j < N; j++ ) {
float fx, fy, fz;
float bodyX = posMass[j*4+0];
float bodyY = posMass[j*4+1];
float bodyZ = posMass[j*4+2];
float bodyMass = posMass[j*4+3];
bodyBodyInteraction<float>(
&fx, &fy, &fz,
myX, myY, myZ,
bodyX, bodyY, bodyZ, bodyMass,
softeningSquared );
acc[0] += fx;
acc[1] += fy;
acc[2] += fz;
}
force[3*i+0] = acc[0];
force[3*i+1] = acc[1];
force[3*i+2] = acc[2];
}
chTimerGetTime( &end );
return (float) chTimerElapsedTime( &start, &end ) * 1000.0f;
}

42
nbody2/nbody_CPU_AOS.h

@ -0,0 +1,42 @@
/*
*
* nbody_CPU_AOS.h
*
* Scalar CPU implementation of the O(N^2) N-body calculation.
*
* Copyright (c) 2011-2012, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
float
ComputeGravitation_AOS(
float *force,
float *posMass,
float softeningSquared,
size_t N
);

94
nbody2/nbody_GPU_AOS.cuh

@ -0,0 +1,94 @@
/*
*
* nbody_GPU_AOS.h
*
* CUDA implementation of the O(N^2) N-body calculation.
*
* Copyright (c) 2011-2012, Archaea Software, LLC.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
template<typename T>
__global__ void
ComputeNBodyGravitation_GPU_AOS(
T *force,
T *posMass,
size_t N,
T softeningSquared )
{
for ( int i = blockIdx.x*blockDim.x + threadIdx.x;
i < N;
i += blockDim.x*gridDim.x )
{
T acc[3] = {0};
float4 me = ((float4 *) posMass)[i];
T myX = me.x;
T myY = me.y;
T myZ = me.z;
for ( int j = 0; j < N; j++ ) {
float4 body = ((float4 *) posMass)[j];
float fx, fy, fz;
bodyBodyInteraction(
&fx, &fy, &fz,
myX, myY, myZ,
body.x, body.y, body.z, body.w,
softeningSquared);
acc[0] += fx;
acc[1] += fy;
acc[2] += fz;
}
force[3*i+0] = acc[0];
force[3*i+1] = acc[1];
force[3*i+2] = acc[2];
}
}
float
ComputeGravitation_GPU_AOS(
float *force,
float *posMass,
float softeningSquared,
size_t N
)
{
cudaError_t status;
cudaEvent_t evStart = 0, evStop = 0;
float ms = 0.0;
cuda(EventCreate( &evStart ) );
cuda(EventCreate( &evStop ) );
cuda(EventRecord( evStart, NULL ) );
ComputeNBodyGravitation_GPU_AOS<float> <<<300,256>>>(
force, posMass, N, softeningSquared );
cuda(EventRecord( evStop, NULL ) );
cuda(DeviceSynchronize() );
cuda(EventElapsedTime( &ms, evStart, evStop ) );
Error:
cudaEventDestroy( evStop );
cudaEventDestroy( evStart );
return ms;
}
Loading…
Cancel
Save