Source code that accompanies The CUDA Handbook.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

150 lines
3.4 KiB

/*
*
* Copyright (C) 2011 by Archaea Software, LLC.
* All rights reserved.
*
*/
#include <cuda.h>
#include <stdio.h>
#include <memory.h>
#include <chError.h>
#define NUM_FLOATS 256
CUdevice g_device;
CUcontext g_ctx;
CUmodule g_module;
CUfunction g_function;
CUtexref g_texref;
float
TexPromoteToFloat( char c )
{
if ( c == (char) 0x80 ) {
return -1.0f;
}
return (float) c / 127.0f;
}
float
TexPromoteToFloat( short s )
{
if ( s == (short) 0x8000 ) {
return -1.0f;
}
return (float) s / 32767.0f;
}
float
TexPromoteToFloat( unsigned char uc )
{
return (float) uc / 255.0f;
}
float
TexPromoteToFloat( unsigned short us )
{
return (float) us / 65535.0f;
}
void
PrintTex( float *host, size_t N )
{
CUdeviceptr device;
CUresult status;
memset( host, 0, N*sizeof(float) );
cu(MemHostGetDevicePointer( &device, host, 0 ));
{
int offset = 0;
cu(ParamSetv(g_function, offset, &device, sizeof(CUdeviceptr))); offset += sizeof(CUdeviceptr);
cu(ParamSetv(g_function, offset, &N, sizeof(size_t))); offset += sizeof(size_t);
cu(ParamSetSize(g_function, offset ));
}
cu(FuncSetBlockShape(g_function, 2, 1, 1));
cu(LaunchGrid(g_function, 384, 1));
cu(CtxSynchronize());
for ( int i = 0; i < N; i++ ) {
char c = (char) i;
printf( "%.2f ", host[i] );
if ( host[i] != TexPromoteToFloat( c ) )
_asm int 3
/*
// this works for unsigned char
if ( fabsf(host[i] - (float)i*(1.0f/255.0f) ) > 1e-5f )
_asm int 3
*/
}
printf( "\n" );
Error:;
}
static CUresult
LoadPTXModule( CUmodule *mod, const char *filename )
{
FILE *f = fopen( filename, "rb" );
long size;
void *p = 0;
CUresult status;
if ( ! f ) {
return CUDA_ERROR_NOT_FOUND;
}
fseek( f, 0, SEEK_END );
size = ftell( f );
fseek( f, 0, SEEK_SET );
p = malloc( size+1 );
if ( ! p ) {
return CUDA_ERROR_OUT_OF_MEMORY;
}
if ( 1 != fread( p, size, 1, f ) ) {
status = CUDA_ERROR_UNKNOWN;
goto Error;
}
((char *) p)[size] = '\0';
status = cuModuleLoadDataEx( mod, p, 0, NULL, NULL );
Error:
fclose( f );
free( p );
return status;
}
int
main( int argc, char *argv[] )
{
CUdeviceptr p = 0;
unsigned char *finHost;
CUdeviceptr finDevice;
float *foutHost;
CUdeviceptr foutDevice;
CUresult status;
cu(Init(0));
cu(DeviceGet(&g_device, 0));
cu(CtxCreate(&g_ctx, CU_CTX_MAP_HOST, g_device));
CUDA_CHECK(LoadPTXModule(&g_module, "tex1dfetch_int2float_kernel.ptx"));
cu(ModuleGetFunction(&g_function, g_module, "TexReadout"));
cu(ModuleGetTexRef(&g_texref, g_module, "tex1"));
cu(MemAlloc( &p, NUM_FLOATS*sizeof(float)) );
cu(MemHostAlloc( (void **) &finHost, NUM_FLOATS*sizeof(float), CU_MEMHOSTALLOC_DEVICEMAP));
cu(MemHostGetDevicePointer( &finDevice, finHost, 0 ));
cu(TexRefSetFormat(g_texref, CU_AD_FORMAT_SIGNED_INT8, 1));
//cu(TexRefSetFlags(g_texref, CU_TRSF_READ_AS_INTEGER));
cu(TexRefSetAddress(NULL, g_texref, finDevice, NUM_FLOATS*sizeof(float)));
cu(MemHostAlloc( (void **) &foutHost, NUM_FLOATS*sizeof(float), CU_MEMHOSTALLOC_DEVICEMAP));
cu(MemHostGetDevicePointer( &foutDevice, foutHost, 0 ));
for ( int i = 0; i < NUM_FLOATS; i++ ) {
finHost[i] = (unsigned char) i;//(float) i;
}
PrintTex( foutHost, NUM_FLOATS );
Error:
cuMemFree( p );
}