6 changed files with 508 additions and 0 deletions
@ -0,0 +1,68 @@ |
|||||||
|
/* |
||||||
|
* |
||||||
|
* bodybodyInteraction.cuh |
||||||
|
* |
||||||
|
* CUDA header for function to compute body-body interaction. |
||||||
|
* Also compatible with scalar (non-SIMD) CPU implementations. |
||||||
|
* |
||||||
|
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in |
||||||
|
* the documentation and/or other materials provided with the |
||||||
|
* distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||||
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||||
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||||
|
* POSSIBILITY OF SUCH DAMAGE. |
||||||
|
* |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__ |
||||||
|
#define __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__ |
||||||
|
|
||||||
|
template <typename T> |
||||||
|
__host__ __device__ void bodyBodyInteraction( |
||||||
|
T *fx, T *fy, T *fz, |
||||||
|
T x0, T y0, T z0, |
||||||
|
T x1, T y1, T z1, T mass1, |
||||||
|
T softeningSquared) |
||||||
|
{ |
||||||
|
T dx = x1 - x0; |
||||||
|
T dy = y1 - y0; |
||||||
|
T dz = z1 - z0; |
||||||
|
|
||||||
|
T distSqr = dx*dx + dy*dy + dz*dz; |
||||||
|
distSqr += softeningSquared; |
||||||
|
|
||||||
|
// |
||||||
|
// rsqrtf() maps to SFU instruction - to support |
||||||
|
// double, this has to be changed. |
||||||
|
// |
||||||
|
T invDist = rsqrtf(distSqr); |
||||||
|
|
||||||
|
T invDistCube = invDist * invDist * invDist; |
||||||
|
T s = mass1 * invDistCube; |
||||||
|
|
||||||
|
*fx = dx * s; |
||||||
|
*fy = dy * s; |
||||||
|
*fz = dz * s; |
||||||
|
} |
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,216 @@ |
|||||||
|
/*
|
||||||
|
* |
||||||
|
* nbody.h |
||||||
|
* |
||||||
|
* Header file to declare globals in nbody.cu |
||||||
|
* |
||||||
|
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in |
||||||
|
* the documentation and/or other materials provided with the |
||||||
|
* distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||||
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||||
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||||
|
* POSSIBILITY OF SUCH DAMAGE. |
||||||
|
* |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef __CUDAHANDBOOK_NBODY_H__ |
||||||
|
#define __CUDAHANDBOOK_NBODY_H__ |
||||||
|
|
||||||
|
//#include "nbody_CPU_SIMD.h"
|
||||||
|
|
||||||
|
#include <chThread.h> |
||||||
|
|
||||||
|
extern bool g_bCUDAPresent; |
||||||
|
extern bool g_bGPUCrossCheck; |
||||||
|
|
||||||
|
extern bool g_GPUCrosscheck; |
||||||
|
#define NBODY_GOLDENFILE_VERSION 0x100 |
||||||
|
extern FILE *g_fGPUCrosscheckInput; |
||||||
|
extern FILE *g_fGPUCrosscheckOutput; |
||||||
|
|
||||||
|
extern float *g_hostAOS_PosMass; |
||||||
|
extern float *g_hostAOS_VelInvMass; |
||||||
|
extern float *g_hostAOS_Force; |
||||||
|
|
||||||
|
// for GPU cross-check
|
||||||
|
const int g_maxGPUs = 32; |
||||||
|
extern float *g_hostAOS_gpuCrossCheckForce[g_maxGPUs]; |
||||||
|
|
||||||
|
extern float *g_dptrAOS_PosMass; |
||||||
|
extern float *g_dptrAOS_Force; |
||||||
|
|
||||||
|
|
||||||
|
// Buffer to hold the golden version of the forces, used for comparison
|
||||||
|
// Along with timing results, we report the maximum relative error with
|
||||||
|
// respect to this array.
|
||||||
|
extern float *g_hostAOS_Force_Golden; |
||||||
|
|
||||||
|
extern float *g_hostSOA_Pos[3]; |
||||||
|
extern float *g_hostSOA_Force[3]; |
||||||
|
extern float *g_hostSOA_Mass; |
||||||
|
extern float *g_hostSOA_InvMass; |
||||||
|
|
||||||
|
extern size_t g_N; |
||||||
|
|
||||||
|
extern float g_softening; |
||||||
|
extern float g_damping; |
||||||
|
extern float g_dt; |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
struct Body { |
||||||
|
T x_, y_, z_, mass_; |
||||||
|
}; |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
class NBodyAlgorithm { |
||||||
|
public: |
||||||
|
inline NBodyAlgorithm<T>() { } |
||||||
|
virtual ~NBodyAlgorithm<T>() { } |
||||||
|
virtual bool Initialize( size_t N ); |
||||||
|
|
||||||
|
size_t N() const { return N_; } |
||||||
|
virtual Body<T> getBody( size_t i) const = 0; |
||||||
|
|
||||||
|
private: |
||||||
|
size_t N_; |
||||||
|
}; |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
class NBodyAlgorithm_SOA : public NBodyAlgorithm<T> { |
||||||
|
public: |
||||||
|
NBodyAlgorithm_SOA<T>() { x_ = y_ = z_ = mass_ = nullptr; } |
||||||
|
virtual ~NBodyAlgorithm_SOA<T>(); |
||||||
|
|
||||||
|
virtual bool Initialize( size_t N ); |
||||||
|
virtual Body<T> getBody( size_t i ) const; |
||||||
|
private: |
||||||
|
T *x_, *y_, *z_, *mass_; |
||||||
|
}; |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
inline |
||||||
|
NBodyAlgorithm_SOA<T>::~NBodyAlgorithm_SOA() |
||||||
|
{ |
||||||
|
free( x_ ); |
||||||
|
free( y_ ); |
||||||
|
free( z_ ); |
||||||
|
free( mass_ ); |
||||||
|
} |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
inline bool |
||||||
|
NBodyAlgorithm_SOA<T>::Initialize( size_t N ) |
||||||
|
{ |
||||||
|
NBodyAlgorithm<T>::Initialize( N ); |
||||||
|
x_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||||
|
y_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||||
|
z_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||||
|
mass_ = (T *) aligned_alloc( 64, N*sizeof(T) ); |
||||||
|
if ( nullptr == x_ || nullptr==y_ || nullptr==z_ || nullptr==mass_ ) |
||||||
|
goto Error; |
||||||
|
return true; |
||||||
|
Error: |
||||||
|
free( x_ ); |
||||||
|
free( y_ ); |
||||||
|
free( z_ ); |
||||||
|
free( mass_ ); |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
enum nbodyAlgorithm_enum { |
||||||
|
CPU_AOS = 0, /* This is the golden implementation */ |
||||||
|
CPU_AOS_tiled, |
||||||
|
CPU_SOA, |
||||||
|
#ifdef HAVE_SIMD |
||||||
|
CPU_SIMD, |
||||||
|
#endif |
||||||
|
#ifdef HAVE_SIMD_THREADED |
||||||
|
CPU_SIMD_threaded, |
||||||
|
#endif |
||||||
|
#ifdef HAVE_SIMD_OPENMP |
||||||
|
CPU_SIMD_openmp, |
||||||
|
#endif |
||||||
|
GPU_AOS, |
||||||
|
GPU_Shared, |
||||||
|
GPU_Const, |
||||||
|
multiGPU_SingleCPUThread, |
||||||
|
multiGPU_MultiCPUThread, |
||||||
|
// SM 3.0 only
|
||||||
|
GPU_Shuffle, |
||||||
|
GPU_AOS_tiled, |
||||||
|
GPU_AOS_tiled_const, |
||||||
|
// GPU_Atomic
|
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
static const char *rgszAlgorithmNames[] = { |
||||||
|
"CPU_AOS", |
||||||
|
"CPU_AOS_tiled", |
||||||
|
"CPU_SOA", |
||||||
|
#ifdef HAVE_SIMD |
||||||
|
"CPU_SIMD", |
||||||
|
#endif |
||||||
|
#ifdef HAVE_SIMD_THREADED |
||||||
|
"CPU_SIMD_threaded", |
||||||
|
#endif |
||||||
|
#ifdef HAVE_SIMD_OPENMP |
||||||
|
"CPU_SIMD_openmp", |
||||||
|
#endif |
||||||
|
"GPU_AOS", |
||||||
|
"GPU_Shared", |
||||||
|
"GPU_Const", |
||||||
|
"multiGPU_SingleCPUThread", |
||||||
|
"multiGPU_MultiCPUThread", |
||||||
|
// SM 3.0 only
|
||||||
|
"GPU_Shuffle", |
||||||
|
"GPU_AOS_tiled", |
||||||
|
"GPU_AOS_tiled_const", |
||||||
|
// "GPU_Atomic"
|
||||||
|
}; |
||||||
|
|
||||||
|
extern const char *rgszAlgorithmNames[]; |
||||||
|
|
||||||
|
extern enum nbodyAlgorithm_enum g_Algorithm; |
||||||
|
|
||||||
|
//
|
||||||
|
// g_maxAlgorithm is used to determine when to rotate g_Algorithm back to CPU_AOS
|
||||||
|
// If CUDA is present, it is CPU_SIMD_threaded, otherwise GPU_Shuffle
|
||||||
|
// The CPU and GPU algorithms must be contiguous, and the logic in main() to
|
||||||
|
// initialize this value must be modified if any new algorithms are added.
|
||||||
|
//
|
||||||
|
extern enum nbodyAlgorithm_enum g_maxAlgorithm; |
||||||
|
extern bool g_bCrossCheck; |
||||||
|
extern bool g_bNoCPU; |
||||||
|
|
||||||
|
extern cudahandbook::threading::workerThread *g_CPUThreadPool; |
||||||
|
extern int g_numCPUCores; |
||||||
|
|
||||||
|
extern int g_numGPUs; |
||||||
|
extern cudahandbook::threading::workerThread *g_GPUThreadPool; |
||||||
|
|
||||||
|
extern float ComputeGravitation_GPU_Shared ( float *force, float *posMass, float softeningSquared, size_t N ); |
||||||
|
extern float ComputeGravitation_multiGPU_singlethread( float *force, float *posMass, float softeningSquared, size_t N ); |
||||||
|
extern float ComputeGravitation_multiGPU_threaded ( float *force, float *posMass, float softeningSquared, size_t N ); |
||||||
|
|
||||||
|
|
||||||
|
#endif |
@ -0,0 +1,84 @@ |
|||||||
|
/*
|
||||||
|
* |
||||||
|
* nbody_CPU_AOS.h |
||||||
|
* |
||||||
|
* Scalar CPU implementation of the O(N^2) N-body calculation. |
||||||
|
* |
||||||
|
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in |
||||||
|
* the documentation and/or other materials provided with the |
||||||
|
* distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||||
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||||
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||||
|
* POSSIBILITY OF SUCH DAMAGE. |
||||||
|
* |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef NO_CUDA |
||||||
|
#define NO_CUDA |
||||||
|
#endif |
||||||
|
#include <chCUDA.h> |
||||||
|
#include <chTimer.h> |
||||||
|
|
||||||
|
#include "bodybodyInteraction.cuh" |
||||||
|
|
||||||
|
float |
||||||
|
ComputeGravitation_AOS( |
||||||
|
float *force, |
||||||
|
float *posMass, |
||||||
|
float softeningSquared, |
||||||
|
size_t N |
||||||
|
) |
||||||
|
{ |
||||||
|
chTimerTimestamp start, end; |
||||||
|
chTimerGetTime( &start ); |
||||||
|
for ( size_t i = 0; i < N; i++ ) |
||||||
|
{ |
||||||
|
float acc[3] = {0, 0, 0}; |
||||||
|
float myX = posMass[i*4+0]; |
||||||
|
float myY = posMass[i*4+1]; |
||||||
|
float myZ = posMass[i*4+2]; |
||||||
|
|
||||||
|
for ( size_t j = 0; j < N; j++ ) { |
||||||
|
float fx, fy, fz; |
||||||
|
float bodyX = posMass[j*4+0]; |
||||||
|
float bodyY = posMass[j*4+1]; |
||||||
|
float bodyZ = posMass[j*4+2]; |
||||||
|
float bodyMass = posMass[j*4+3]; |
||||||
|
|
||||||
|
bodyBodyInteraction<float>( |
||||||
|
&fx, &fy, &fz, |
||||||
|
myX, myY, myZ, |
||||||
|
bodyX, bodyY, bodyZ, bodyMass, |
||||||
|
softeningSquared ); |
||||||
|
acc[0] += fx; |
||||||
|
acc[1] += fy; |
||||||
|
acc[2] += fz; |
||||||
|
} |
||||||
|
|
||||||
|
force[3*i+0] = acc[0]; |
||||||
|
force[3*i+1] = acc[1]; |
||||||
|
force[3*i+2] = acc[2]; |
||||||
|
} |
||||||
|
chTimerGetTime( &end ); |
||||||
|
return (float) chTimerElapsedTime( &start, &end ) * 1000.0f; |
||||||
|
} |
@ -0,0 +1,42 @@ |
|||||||
|
/*
|
||||||
|
* |
||||||
|
* nbody_CPU_AOS.h |
||||||
|
* |
||||||
|
* Scalar CPU implementation of the O(N^2) N-body calculation. |
||||||
|
* |
||||||
|
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in |
||||||
|
* the documentation and/or other materials provided with the |
||||||
|
* distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||||
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||||
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||||
|
* POSSIBILITY OF SUCH DAMAGE. |
||||||
|
* |
||||||
|
*/ |
||||||
|
|
||||||
|
float |
||||||
|
ComputeGravitation_AOS( |
||||||
|
float *force, |
||||||
|
float *posMass, |
||||||
|
float softeningSquared, |
||||||
|
size_t N |
||||||
|
); |
@ -0,0 +1,94 @@ |
|||||||
|
/* |
||||||
|
* |
||||||
|
* nbody_GPU_AOS.h |
||||||
|
* |
||||||
|
* CUDA implementation of the O(N^2) N-body calculation. |
||||||
|
* |
||||||
|
* Copyright (c) 2011-2012, Archaea Software, LLC. |
||||||
|
* All rights reserved. |
||||||
|
* |
||||||
|
* Redistribution and use in source and binary forms, with or without |
||||||
|
* modification, are permitted provided that the following conditions |
||||||
|
* are met: |
||||||
|
* |
||||||
|
* 1. Redistributions of source code must retain the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer. |
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright |
||||||
|
* notice, this list of conditions and the following disclaimer in |
||||||
|
* the documentation and/or other materials provided with the |
||||||
|
* distribution. |
||||||
|
* |
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
||||||
|
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
||||||
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
||||||
|
* POSSIBILITY OF SUCH DAMAGE. |
||||||
|
* |
||||||
|
*/ |
||||||
|
|
||||||
|
template<typename T> |
||||||
|
__global__ void |
||||||
|
ComputeNBodyGravitation_GPU_AOS( |
||||||
|
T *force, |
||||||
|
T *posMass, |
||||||
|
size_t N, |
||||||
|
T softeningSquared ) |
||||||
|
{ |
||||||
|
for ( int i = blockIdx.x*blockDim.x + threadIdx.x; |
||||||
|
i < N; |
||||||
|
i += blockDim.x*gridDim.x ) |
||||||
|
{ |
||||||
|
T acc[3] = {0}; |
||||||
|
float4 me = ((float4 *) posMass)[i]; |
||||||
|
T myX = me.x; |
||||||
|
T myY = me.y; |
||||||
|
T myZ = me.z; |
||||||
|
for ( int j = 0; j < N; j++ ) { |
||||||
|
float4 body = ((float4 *) posMass)[j]; |
||||||
|
float fx, fy, fz; |
||||||
|
bodyBodyInteraction( |
||||||
|
&fx, &fy, &fz, |
||||||
|
myX, myY, myZ, |
||||||
|
body.x, body.y, body.z, body.w, |
||||||
|
softeningSquared); |
||||||
|
acc[0] += fx; |
||||||
|
acc[1] += fy; |
||||||
|
acc[2] += fz; |
||||||
|
} |
||||||
|
force[3*i+0] = acc[0]; |
||||||
|
force[3*i+1] = acc[1]; |
||||||
|
force[3*i+2] = acc[2]; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
float |
||||||
|
ComputeGravitation_GPU_AOS( |
||||||
|
float *force, |
||||||
|
float *posMass, |
||||||
|
float softeningSquared, |
||||||
|
size_t N |
||||||
|
) |
||||||
|
{ |
||||||
|
cudaError_t status; |
||||||
|
cudaEvent_t evStart = 0, evStop = 0; |
||||||
|
float ms = 0.0; |
||||||
|
cuda(EventCreate( &evStart ) ); |
||||||
|
cuda(EventCreate( &evStop ) ); |
||||||
|
cuda(EventRecord( evStart, NULL ) ); |
||||||
|
ComputeNBodyGravitation_GPU_AOS<float> <<<300,256>>>( |
||||||
|
force, posMass, N, softeningSquared ); |
||||||
|
cuda(EventRecord( evStop, NULL ) ); |
||||||
|
cuda(DeviceSynchronize() ); |
||||||
|
cuda(EventElapsedTime( &ms, evStart, evStop ) ); |
||||||
|
Error: |
||||||
|
cudaEventDestroy( evStop ); |
||||||
|
cudaEventDestroy( evStart ); |
||||||
|
return ms; |
||||||
|
} |
Loading…
Reference in new issue