diff --git a/concurrency/cudaGetLastErrorIsAsynchronous.cu b/concurrency/cudaGetLastErrorIsAsynchronous.cu new file mode 100644 index 0000000..88f9a1b --- /dev/null +++ b/concurrency/cudaGetLastErrorIsAsynchronous.cu @@ -0,0 +1,111 @@ +/* + * + * nullKernelAsync.cu + * + * Microbenchmark for throughput of asynchronous kernel launch. + * + * Build with: nvcc -I ../chLib nullKernelAsync.cu + * Requires: No minimum SM requirement. + * + * Copyright (c) 2011-2014, Archaea Software, LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#include "chError.h" +#include "chTimer.h" + +#ifdef __HIPCC__ +#include + +#define cuda( fn ) do { \ + status = (hip##fn); \ + if ( hipSuccess != (status) ) { \ + goto Error; \ + } \ + } while (0); +#endif + +__global__ +void +NullKernel( volatile int *p, bool write, int a=0, int b=1, int c=2, int d=3, int e=4, int f=5, int g=6 ) +{ + if ( write && 0==threadIdx.x && 0==blockIdx.x ) { + *p = a+b+c+d+e+f+g; + } +} + +double +usPerLaunch( int cIterations ) +{ + cudaError_t status; + double microseconds, ret; + chTimerTimestamp start, stop; + cudaEvent_t ev=0; + + cuda(EventCreate( &ev ) ) + + cuda(Free(0)); + + chTimerGetTime( &start ); + for ( int i = 0; i < cIterations; i++ ) { + NullKernel<<<1,1>>>( NULL, false ); + } + NullKernel<<<1,1>>>( NULL, true ); + cuda(EventRecord( ev )); + status = cudaEventQuery( ev ); + std::cout << "cudaEventQuery returned " << status << std::endl; + status = cudaGetLastError(); + std::cout << "cudaGetLastError returned " << status << " (before cudaDeviceSynchronize())" << std::endl; + + // this returns error due to deliberate dereference of NULL on last kernel invocation + (void) cudaDeviceSynchronize(); + status = cudaGetLastError(); + std::cout << "cudaGetLastError returned " << status << " (after cudaDeviceSynchronize())" << std::endl; + cuda(EventDestroy(ev)); + chTimerGetTime( &stop ); + + microseconds = 1e6*chTimerElapsedTime( &start, &stop ); + ret = microseconds / (float) cIterations; + +Error: + return (status) ? 0.0 : ret; +} + +int +main( int argc, char *argv[] ) +{ + const int cIterations = 100000; + printf( "Measuring asynchronous launch time... " ); fflush( stdout ); + + printf( "%.2f us\n", usPerLaunch(cIterations) ); + + return 0; +} diff --git a/concurrency/nullKernelAsync.cu b/concurrency/nullKernelAsync.cu index 0d3ca09..7d00e75 100644 --- a/concurrency/nullKernelAsync.cu +++ b/concurrency/nullKernelAsync.cu @@ -37,14 +37,36 @@ */ #include +#include #include "chError.h" #include "chTimer.h" +<<<<<<< Updated upstream __global__ void NullKernel() { +======= +#ifdef __HIPCC__ +#include + +#define cuda( fn ) do { \ + status = (hip##fn); \ + if ( hipSuccess != (status) ) { \ + goto Error; \ + } \ + } while (0); +#endif + +__global__ +void +NullKernel( volatile int *p, bool write, int a=0, int b=1, int c=2, int d=3, int e=4, int f=5, int g=6 ) +{ + if ( write && 0==threadIdx.x && 0==blockIdx.x ) { + *p = a+b+c+d+e+f+g; + } +>>>>>>> Stashed changes } double @@ -53,6 +75,9 @@ usPerLaunch( int cIterations ) cudaError_t status; double microseconds, ret; chTimerTimestamp start, stop; + cudaEvent_t ev=0; + + cuda(EventCreate( &ev ) ) cuda(Free(0)); @@ -60,7 +85,15 @@ usPerLaunch( int cIterations ) for ( int i = 0; i < cIterations; i++ ) { NullKernel<<<1,1>>>(); } + NullKernel<<<1,1>>>( NULL, true ); + cuda(EventRecord( ev )); + status = cudaEventQuery( ev ); + std::cout << "cudaEventQuery returned " << status << std::endl; + status = cudaGetLastError(); + std::cout << "cudaGetLastError returned " << status << std::endl; + cuda(DeviceSynchronize()); + cuda(EventDestroy(ev)); chTimerGetTime( &stop ); microseconds = 1e6*chTimerElapsedTime( &start, &stop );