6 changed files with 508 additions and 0 deletions
@ -0,0 +1,68 @@
@@ -0,0 +1,68 @@
|
||||
/* |
||||
* |
||||
* bodybodyInteraction.cuh |
||||
* |
||||
* CUDA header for function to compute body-body interaction. |
||||
* Also compatible with scalar (non-SIMD) CPU implementations. |
||||
* |
||||
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions |
||||
* are met: |
||||
* |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in |
||||
* the documentation and/or other materials provided with the |
||||
* distribution. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
#ifndef __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__ |
||||
#define __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__ |
||||
|
||||
template <typename T> |
||||
__host__ __device__ void bodyBodyInteraction( |
||||
T *fx, T *fy, T *fz, |
||||
T x0, T y0, T z0, |
||||
T x1, T y1, T z1, T mass1, |
||||
T softeningSquared) |
||||
{ |
||||
T dx = x1 - x0; |
||||
T dy = y1 - y0; |
||||
T dz = z1 - z0; |
||||
|
||||
T distSqr = dx*dx + dy*dy + dz*dz; |
||||
distSqr += softeningSquared; |
||||
|
||||
// |
||||
// rsqrtf() maps to SFU instruction - to support |
||||
// double, this has to be changed. |
||||
// |
||||
T invDist = rsqrtf(distSqr); |
||||
|
||||
T invDistCube = invDist * invDist * invDist; |
||||
T s = mass1 * invDistCube; |
||||
|
||||
*fx = dx * s; |
||||
*fy = dy * s; |
||||
*fz = dz * s; |
||||
} |
||||
|
||||
#endif |
@ -0,0 +1,216 @@
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* |
||||
* nbody.h |
||||
* |
||||
* Header file to declare globals in nbody.cu |
||||
* |
||||
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions |
||||
* are met: |
||||
* |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in |
||||
* the documentation and/or other materials provided with the |
||||
* distribution. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
#ifndef __CUDAHANDBOOK_NBODY_H__ |
||||
#define __CUDAHANDBOOK_NBODY_H__ |
||||
|
||||
//#include "nbody_CPU_SIMD.h"
|
||||
|
||||
#include <chThread.h> |
||||
|
||||
extern bool g_bCUDAPresent; |
||||
extern bool g_bGPUCrossCheck; |
||||
|
||||
extern bool g_GPUCrosscheck; |
||||
#define NBODY_GOLDENFILE_VERSION 0x100 |
||||
extern FILE *g_fGPUCrosscheckInput; |
||||
extern FILE *g_fGPUCrosscheckOutput; |
||||
|
||||
extern float *g_hostAOS_PosMass; |
||||
extern float *g_hostAOS_VelInvMass; |
||||
extern float *g_hostAOS_Force; |
||||
|
||||
// for GPU cross-check
|
||||
const int g_maxGPUs = 32; |
||||
extern float *g_hostAOS_gpuCrossCheckForce[g_maxGPUs]; |
||||
|
||||
extern float *g_dptrAOS_PosMass; |
||||
extern float *g_dptrAOS_Force; |
||||
|
||||
|
||||
// Buffer to hold the golden version of the forces, used for comparison
|
||||
// Along with timing results, we report the maximum relative error with
|
||||
// respect to this array.
|
||||
extern float *g_hostAOS_Force_Golden; |
||||
|
||||
extern float *g_hostSOA_Pos[3]; |
||||
extern float *g_hostSOA_Force[3]; |
||||
extern float *g_hostSOA_Mass; |
||||
extern float *g_hostSOA_InvMass; |
||||
|
||||
extern size_t g_N; |
||||
|
||||
extern float g_softening; |
||||
extern float g_damping; |
||||
extern float g_dt; |
||||
|
||||
template<typename T> |
||||
struct Body { |
||||
T x_, y_, z_, mass_; |
||||
}; |
||||
|
||||
template<typename T> |
||||
class NBodyAlgorithm { |
||||
public: |
||||
inline NBodyAlgorithm<T>() { } |
||||
virtual ~NBodyAlgorithm<T>() { } |
||||
virtual bool Initialize( size_t N ); |
||||
|
||||
size_t N() const { return N_; } |
||||
virtual Body<T> getBody( size_t i) const = 0; |
||||
|
||||
private: |
||||
size_t N_; |
||||
}; |
||||
|
||||
template<typename T> |
||||
class NBodyAlgorithm_SOA : public NBodyAlgorithm<T> { |
||||
public: |
||||
NBodyAlgorithm_SOA<T>() { x_ = y_ = z_ = mass_ = nullptr; } |
||||
virtual ~NBodyAlgorithm_SOA<T>(); |
||||
|
||||
virtual bool Initialize( size_t N ); |
||||
virtual Body<T> getBody( size_t i ) const; |
||||
private: |
||||
T *x_, *y_, *z_, *mass_; |
||||
}; |
||||
|
||||
template<typename T> |
||||
inline |
||||
NBodyAlgorithm_SOA<T>::~NBodyAlgorithm_SOA() |
||||
{ |
||||
free( x_ ); |
||||
free( y_ ); |
||||
free( z_ ); |
||||
free( mass_ ); |
||||
} |
||||
|
||||
template<typename T> |
||||
inline bool |
||||
NBodyAlgorithm_SOA<T>::Initialize( size_t N ) |
||||
{ |
||||
NBodyAlgorithm<T>::Initialize( N ); |
||||
x_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||
y_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||
z_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||
mass_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||
if ( nullptr == x_ || nullptr==y_ || nullptr==z_ || nullptr==mass_ ) |
||||
goto Error; |
||||
return true; |
||||
Error: |
||||
free( x_ ); |
||||
free( y_ ); |
||||
free( z_ ); |
||||
free( mass_ ); |
||||
return false; |
||||
} |
||||
|
||||
enum nbodyAlgorithm_enum { |
||||
CPU_AOS = 0, /* This is the golden implementation */ |
||||
CPU_AOS_tiled, |
||||
CPU_SOA, |
||||
#ifdef HAVE_SIMD |
||||
CPU_SIMD, |
||||
#endif |
||||
#ifdef HAVE_SIMD_THREADED |
||||
CPU_SIMD_threaded, |
||||
#endif |
||||
#ifdef HAVE_SIMD_OPENMP |
||||
CPU_SIMD_openmp, |
||||
#endif |
||||
GPU_AOS, |
||||
GPU_Shared, |
||||
GPU_Const, |
||||
multiGPU_SingleCPUThread, |
||||
multiGPU_MultiCPUThread, |
||||
// SM 3.0 only
|
||||
GPU_Shuffle, |
||||
GPU_AOS_tiled, |
||||
GPU_AOS_tiled_const, |
||||
// GPU_Atomic
|
||||
}; |
||||
|
||||
|
||||
static const char *rgszAlgorithmNames[] = { |
||||
"CPU_AOS", |
||||
"CPU_AOS_tiled", |
||||
"CPU_SOA", |
||||
#ifdef HAVE_SIMD |
||||
"CPU_SIMD", |
||||
#endif |
||||
#ifdef HAVE_SIMD_THREADED |
||||
"CPU_SIMD_threaded", |
||||
#endif |
||||
#ifdef HAVE_SIMD_OPENMP |
||||
"CPU_SIMD_openmp", |
||||
#endif |
||||
"GPU_AOS", |
||||
"GPU_Shared", |
||||
"GPU_Const", |
||||
"multiGPU_SingleCPUThread", |
||||
"multiGPU_MultiCPUThread", |
||||
// SM 3.0 only
|
||||
"GPU_Shuffle", |
||||
"GPU_AOS_tiled", |
||||
"GPU_AOS_tiled_const", |
||||
// "GPU_Atomic"
|
||||
}; |
||||
|
||||
extern const char *rgszAlgorithmNames[]; |
||||
|
||||
extern enum nbodyAlgorithm_enum g_Algorithm; |
||||
|
||||
//
|
||||
// g_maxAlgorithm is used to determine when to rotate g_Algorithm back to CPU_AOS
|
||||
// If CUDA is present, it is CPU_SIMD_threaded, otherwise GPU_Shuffle
|
||||
// The CPU and GPU algorithms must be contiguous, and the logic in main() to
|
||||
// initialize this value must be modified if any new algorithms are added.
|
||||
//
|
||||
extern enum nbodyAlgorithm_enum g_maxAlgorithm; |
||||
extern bool g_bCrossCheck; |
||||
extern bool g_bNoCPU; |
||||
|
||||
extern cudahandbook::threading::workerThread *g_CPUThreadPool; |
||||
extern int g_numCPUCores; |
||||
|
||||
extern int g_numGPUs; |
||||
extern cudahandbook::threading::workerThread *g_GPUThreadPool; |
||||
|
||||
extern float ComputeGravitation_GPU_Shared ( float *force, float *posMass, float softeningSquared, size_t N ); |
||||
extern float ComputeGravitation_multiGPU_singlethread( float *force, float *posMass, float softeningSquared, size_t N ); |
||||
extern float ComputeGravitation_multiGPU_threaded ( float *force, float *posMass, float softeningSquared, size_t N ); |
||||
|
||||
|
||||
#endif |
@ -0,0 +1,84 @@
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* |
||||
* nbody_CPU_AOS.h |
||||
* |
||||
* Scalar CPU implementation of the O(N^2) N-body calculation. |
||||
* |
||||
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions |
||||
* are met: |
||||
* |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in |
||||
* the documentation and/or other materials provided with the |
||||
* distribution. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
#ifndef NO_CUDA |
||||
#define NO_CUDA |
||||
#endif |
||||
#include <chCUDA.h> |
||||
#include <chTimer.h> |
||||
|
||||
#include "bodybodyInteraction.cuh" |
||||
|
||||
float |
||||
ComputeGravitation_AOS( |
||||
float *force, |
||||
float *posMass, |
||||
float softeningSquared, |
||||
size_t N |
||||
) |
||||
{ |
||||
chTimerTimestamp start, end; |
||||
chTimerGetTime( &start ); |
||||
for ( size_t i = 0; i < N; i++ ) |
||||
{ |
||||
float acc[3] = {0, 0, 0}; |
||||
float myX = posMass[i*4+0]; |
||||
float myY = posMass[i*4+1]; |
||||
float myZ = posMass[i*4+2]; |
||||
|
||||
for ( size_t j = 0; j < N; j++ ) { |
||||
float fx, fy, fz; |
||||
float bodyX = posMass[j*4+0]; |
||||
float bodyY = posMass[j*4+1]; |
||||
float bodyZ = posMass[j*4+2]; |
||||
float bodyMass = posMass[j*4+3]; |
||||
|
||||
bodyBodyInteraction<float>( |
||||
&fx, &fy, &fz, |
||||
myX, myY, myZ, |
||||
bodyX, bodyY, bodyZ, bodyMass, |
||||
softeningSquared ); |
||||
acc[0] += fx; |
||||
acc[1] += fy; |
||||
acc[2] += fz; |
||||
} |
||||
|
||||
force[3*i+0] = acc[0]; |
||||
force[3*i+1] = acc[1]; |
||||
force[3*i+2] = acc[2]; |
||||
} |
||||
chTimerGetTime( &end ); |
||||
return (float) chTimerElapsedTime( &start, &end ) * 1000.0f; |
||||
} |
@ -0,0 +1,42 @@
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* |
||||
* nbody_CPU_AOS.h |
||||
* |
||||
* Scalar CPU implementation of the O(N^2) N-body calculation. |
||||
* |
||||
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions |
||||
* are met: |
||||
* |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in |
||||
* the documentation and/or other materials provided with the |
||||
* distribution. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
float |
||||
ComputeGravitation_AOS( |
||||
float *force, |
||||
float *posMass, |
||||
float softeningSquared, |
||||
size_t N |
||||
); |
@ -0,0 +1,94 @@
@@ -0,0 +1,94 @@
|
||||
/* |
||||
* |
||||
* nbody_GPU_AOS.h |
||||
* |
||||
* CUDA implementation of the O(N^2) N-body calculation. |
||||
* |
||||
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or without |
||||
* modification, are permitted provided that the following conditions |
||||
* are met: |
||||
* |
||||
* 1. Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* 2. Redistributions in binary form must reproduce the above copyright |
||||
* notice, this list of conditions and the following disclaimer in |
||||
* the documentation and/or other materials provided with the |
||||
* distribution. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||
* POSSIBILITY OF SUCH DAMAGE. |
||||
* |
||||
*/ |
||||
|
||||
template<typename T> |
||||
__global__ void |
||||
ComputeNBodyGravitation_GPU_AOS( |
||||
T *force, |
||||
T *posMass, |
||||
size_t N, |
||||
T softeningSquared ) |
||||
{ |
||||
for ( int i = blockIdx.x*blockDim.x + threadIdx.x; |
||||
i < N; |
||||
i += blockDim.x*gridDim.x ) |
||||
{ |
||||
T acc[3] = {0}; |
||||
float4 me = ((float4 *) posMass)[i]; |
||||
T myX = me.x; |
||||
T myY = me.y; |
||||
T myZ = me.z; |
||||
for ( int j = 0; j < N; j++ ) { |
||||
float4 body = ((float4 *) posMass)[j]; |
||||
float fx, fy, fz; |
||||
bodyBodyInteraction( |
||||
&fx, &fy, &fz, |
||||
myX, myY, myZ, |
||||
body.x, body.y, body.z, body.w, |
||||
softeningSquared); |
||||
acc[0] += fx; |
||||
acc[1] += fy; |
||||
acc[2] += fz; |
||||
} |
||||
force[3*i+0] = acc[0]; |
||||
force[3*i+1] = acc[1]; |
||||
force[3*i+2] = acc[2]; |
||||
} |
||||
} |
||||
|
||||
float |
||||
ComputeGravitation_GPU_AOS( |
||||
float *force, |
||||
float *posMass, |
||||
float softeningSquared, |
||||
size_t N |
||||
) |
||||
{ |
||||
cudaError_t status; |
||||
cudaEvent_t evStart = 0, evStop = 0; |
||||
float ms = 0.0; |
||||
cuda(EventCreate( &evStart ) ); |
||||
cuda(EventCreate( &evStop ) ); |
||||
cuda(EventRecord( evStart, NULL ) ); |
||||
ComputeNBodyGravitation_GPU_AOS<float> <<<300,256>>>( |
||||
force, posMass, N, softeningSquared ); |
||||
cuda(EventRecord( evStop, NULL ) ); |
||||
cuda(DeviceSynchronize() ); |
||||
cuda(EventElapsedTime( &ms, evStart, evStop ) ); |
||||
Error: |
||||
cudaEventDestroy( evStop ); |
||||
cudaEventDestroy( evStart ); |
||||
return ms; |
||||
} |
Loading…
Reference in new issue