diff --git a/nbody/nbody.cu b/nbody/nbody.cu index 56bc70b..92ff9bf 100644 --- a/nbody/nbody.cu +++ b/nbody/nbody.cu @@ -563,6 +563,10 @@ main( int argc, char *argv[] ) cuda(GetDeviceProperties( &prop, 0 ) ); g_bSM30Present = prop.major >= 3; } + else { + fprintf( stderr, "nbody: no GPUs\n" ); + exit(1); + } g_bNoCPU = chCommandLineGetBool( "nocpu", argc, argv ); if ( g_bNoCPU && ! g_bCUDAPresent ) { printf( "--nocpu specified, but no CUDA present...exiting\n" ); diff --git a/nbody2/bodybodyInteraction.cuh b/nbody2/bodybodyInteraction.cuh new file mode 100644 index 0000000..1de9572 --- /dev/null +++ b/nbody2/bodybodyInteraction.cuh @@ -0,0 +1,68 @@ +/* + * + * bodybodyInteraction.cuh + * + * CUDA header for function to compute body-body interaction. + * Also compatible with scalar (non-SIMD) CPU implementations. + * + * Copyright (c) 2011-2012, Archaea Software, LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__ +#define __CUDAHANDBOOK_BODYBODYINTERACTION_CUH__ + +template +__host__ __device__ void bodyBodyInteraction( + T *fx, T *fy, T *fz, + T x0, T y0, T z0, + T x1, T y1, T z1, T mass1, + T softeningSquared) +{ + T dx = x1 - x0; + T dy = y1 - y0; + T dz = z1 - z0; + + T distSqr = dx*dx + dy*dy + dz*dz; + distSqr += softeningSquared; + + // + // rsqrtf() maps to SFU instruction - to support + // double, this has to be changed. + // + T invDist = rsqrtf(distSqr); + + T invDistCube = invDist * invDist * invDist; + T s = mass1 * invDistCube; + + *fx = dx * s; + *fy = dy * s; + *fz = dz * s; +} + +#endif diff --git a/nbody2/nbody.h b/nbody2/nbody.h new file mode 100644 index 0000000..0efc665 --- /dev/null +++ b/nbody2/nbody.h @@ -0,0 +1,216 @@ +/* + * + * nbody.h + * + * Header file to declare globals in nbody.cu + * + * Copyright (c) 2011-2012, Archaea Software, LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __CUDAHANDBOOK_NBODY_H__ +#define __CUDAHANDBOOK_NBODY_H__ + +//#include "nbody_CPU_SIMD.h" + +#include + +extern bool g_bCUDAPresent; +extern bool g_bGPUCrossCheck; + +extern bool g_GPUCrosscheck; +#define NBODY_GOLDENFILE_VERSION 0x100 +extern FILE *g_fGPUCrosscheckInput; +extern FILE *g_fGPUCrosscheckOutput; + +extern float *g_hostAOS_PosMass; +extern float *g_hostAOS_VelInvMass; +extern float *g_hostAOS_Force; + +// for GPU cross-check +const int g_maxGPUs = 32; +extern float *g_hostAOS_gpuCrossCheckForce[g_maxGPUs]; + +extern float *g_dptrAOS_PosMass; +extern float *g_dptrAOS_Force; + + +// Buffer to hold the golden version of the forces, used for comparison +// Along with timing results, we report the maximum relative error with +// respect to this array. +extern float *g_hostAOS_Force_Golden; + +extern float *g_hostSOA_Pos[3]; +extern float *g_hostSOA_Force[3]; +extern float *g_hostSOA_Mass; +extern float *g_hostSOA_InvMass; + +extern size_t g_N; + +extern float g_softening; +extern float g_damping; +extern float g_dt; + +template +struct Body { + T x_, y_, z_, mass_; +}; + +template +class NBodyAlgorithm { +public: + inline NBodyAlgorithm() { } + virtual ~NBodyAlgorithm() { } + virtual bool Initialize( size_t N ); + + size_t N() const { return N_; } + virtual Body getBody( size_t i) const = 0; + +private: + size_t N_; +}; + +template +class NBodyAlgorithm_SOA : public NBodyAlgorithm { +public: + NBodyAlgorithm_SOA() { x_ = y_ = z_ = mass_ = nullptr; } + virtual ~NBodyAlgorithm_SOA(); + + virtual bool Initialize( size_t N ); + virtual Body getBody( size_t i ) const; +private: + T *x_, *y_, *z_, *mass_; +}; + +template +inline +NBodyAlgorithm_SOA::~NBodyAlgorithm_SOA() +{ + free( x_ ); + free( y_ ); + free( z_ ); + free( mass_ ); +} + +template +inline bool +NBodyAlgorithm_SOA::Initialize( size_t N ) +{ + NBodyAlgorithm::Initialize( N ); + x_ = (T *) aligned_alloc( 64, N*sizeof(T) ); + y_ = (T *) aligned_alloc( 64, N*sizeof(T) ); + z_ = (T *) aligned_alloc( 64, N*sizeof(T) ); + mass_ = (T *) aligned_alloc( 64, N*sizeof(T) ); + if ( nullptr == x_ || nullptr==y_ || nullptr==z_ || nullptr==mass_ ) + goto Error; + return true; +Error: + free( x_ ); + free( y_ ); + free( z_ ); + free( mass_ ); + return false; +} + +enum nbodyAlgorithm_enum { + CPU_AOS = 0, /* This is the golden implementation */ + CPU_AOS_tiled, + CPU_SOA, +#ifdef HAVE_SIMD + CPU_SIMD, +#endif +#ifdef HAVE_SIMD_THREADED + CPU_SIMD_threaded, +#endif +#ifdef HAVE_SIMD_OPENMP + CPU_SIMD_openmp, +#endif + GPU_AOS, + GPU_Shared, + GPU_Const, + multiGPU_SingleCPUThread, + multiGPU_MultiCPUThread, +// SM 3.0 only + GPU_Shuffle, + GPU_AOS_tiled, + GPU_AOS_tiled_const, +// GPU_Atomic +}; + + +static const char *rgszAlgorithmNames[] = { + "CPU_AOS", + "CPU_AOS_tiled", + "CPU_SOA", +#ifdef HAVE_SIMD + "CPU_SIMD", +#endif +#ifdef HAVE_SIMD_THREADED + "CPU_SIMD_threaded", +#endif +#ifdef HAVE_SIMD_OPENMP + "CPU_SIMD_openmp", +#endif + "GPU_AOS", + "GPU_Shared", + "GPU_Const", + "multiGPU_SingleCPUThread", + "multiGPU_MultiCPUThread", +// SM 3.0 only + "GPU_Shuffle", + "GPU_AOS_tiled", + "GPU_AOS_tiled_const", +// "GPU_Atomic" +}; + +extern const char *rgszAlgorithmNames[]; + +extern enum nbodyAlgorithm_enum g_Algorithm; + +// +// g_maxAlgorithm is used to determine when to rotate g_Algorithm back to CPU_AOS +// If CUDA is present, it is CPU_SIMD_threaded, otherwise GPU_Shuffle +// The CPU and GPU algorithms must be contiguous, and the logic in main() to +// initialize this value must be modified if any new algorithms are added. +// +extern enum nbodyAlgorithm_enum g_maxAlgorithm; +extern bool g_bCrossCheck; +extern bool g_bNoCPU; + +extern cudahandbook::threading::workerThread *g_CPUThreadPool; +extern int g_numCPUCores; + +extern int g_numGPUs; +extern cudahandbook::threading::workerThread *g_GPUThreadPool; + +extern float ComputeGravitation_GPU_Shared ( float *force, float *posMass, float softeningSquared, size_t N ); +extern float ComputeGravitation_multiGPU_singlethread( float *force, float *posMass, float softeningSquared, size_t N ); +extern float ComputeGravitation_multiGPU_threaded ( float *force, float *posMass, float softeningSquared, size_t N ); + + +#endif diff --git a/nbody2/nbody_CPU_AOS.cpp b/nbody2/nbody_CPU_AOS.cpp new file mode 100644 index 0000000..7e12f1c --- /dev/null +++ b/nbody2/nbody_CPU_AOS.cpp @@ -0,0 +1,84 @@ +/* + * + * nbody_CPU_AOS.h + * + * Scalar CPU implementation of the O(N^2) N-body calculation. + * + * Copyright (c) 2011-2012, Archaea Software, LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef NO_CUDA +#define NO_CUDA +#endif +#include +#include + +#include "bodybodyInteraction.cuh" + +float +ComputeGravitation_AOS( + float *force, + float *posMass, + float softeningSquared, + size_t N +) +{ + chTimerTimestamp start, end; + chTimerGetTime( &start ); + for ( size_t i = 0; i < N; i++ ) + { + float acc[3] = {0, 0, 0}; + float myX = posMass[i*4+0]; + float myY = posMass[i*4+1]; + float myZ = posMass[i*4+2]; + + for ( size_t j = 0; j < N; j++ ) { + float fx, fy, fz; + float bodyX = posMass[j*4+0]; + float bodyY = posMass[j*4+1]; + float bodyZ = posMass[j*4+2]; + float bodyMass = posMass[j*4+3]; + + bodyBodyInteraction( + &fx, &fy, &fz, + myX, myY, myZ, + bodyX, bodyY, bodyZ, bodyMass, + softeningSquared ); + acc[0] += fx; + acc[1] += fy; + acc[2] += fz; + } + + force[3*i+0] = acc[0]; + force[3*i+1] = acc[1]; + force[3*i+2] = acc[2]; + } + chTimerGetTime( &end ); + return (float) chTimerElapsedTime( &start, &end ) * 1000.0f; +} diff --git a/nbody2/nbody_CPU_AOS.h b/nbody2/nbody_CPU_AOS.h new file mode 100644 index 0000000..bdf1257 --- /dev/null +++ b/nbody2/nbody_CPU_AOS.h @@ -0,0 +1,42 @@ +/* + * + * nbody_CPU_AOS.h + * + * Scalar CPU implementation of the O(N^2) N-body calculation. + * + * Copyright (c) 2011-2012, Archaea Software, LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +float +ComputeGravitation_AOS( + float *force, + float *posMass, + float softeningSquared, + size_t N +); diff --git a/nbody2/nbody_GPU_AOS.cuh b/nbody2/nbody_GPU_AOS.cuh new file mode 100644 index 0000000..b52e417 --- /dev/null +++ b/nbody2/nbody_GPU_AOS.cuh @@ -0,0 +1,94 @@ +/* + * + * nbody_GPU_AOS.h + * + * CUDA implementation of the O(N^2) N-body calculation. + * + * Copyright (c) 2011-2012, Archaea Software, LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +template +__global__ void +ComputeNBodyGravitation_GPU_AOS( + T *force, + T *posMass, + size_t N, + T softeningSquared ) +{ + for ( int i = blockIdx.x*blockDim.x + threadIdx.x; + i < N; + i += blockDim.x*gridDim.x ) + { + T acc[3] = {0}; + float4 me = ((float4 *) posMass)[i]; + T myX = me.x; + T myY = me.y; + T myZ = me.z; + for ( int j = 0; j < N; j++ ) { + float4 body = ((float4 *) posMass)[j]; + float fx, fy, fz; + bodyBodyInteraction( + &fx, &fy, &fz, + myX, myY, myZ, + body.x, body.y, body.z, body.w, + softeningSquared); + acc[0] += fx; + acc[1] += fy; + acc[2] += fz; + } + force[3*i+0] = acc[0]; + force[3*i+1] = acc[1]; + force[3*i+2] = acc[2]; + } +} + +float +ComputeGravitation_GPU_AOS( + float *force, + float *posMass, + float softeningSquared, + size_t N +) +{ + cudaError_t status; + cudaEvent_t evStart = 0, evStop = 0; + float ms = 0.0; + cuda(EventCreate( &evStart ) ); + cuda(EventCreate( &evStop ) ); + cuda(EventRecord( evStart, NULL ) ); + ComputeNBodyGravitation_GPU_AOS <<<300,256>>>( + force, posMass, N, softeningSquared ); + cuda(EventRecord( evStop, NULL ) ); + cuda(DeviceSynchronize() ); + cuda(EventElapsedTime( &ms, evStart, evStop ) ); +Error: + cudaEventDestroy( evStop ); + cudaEventDestroy( evStart ); + return ms; +}