You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
624 lines
24 KiB
624 lines
24 KiB
/* |
|
* Copyright 1993-2014 NVIDIA Corporation. All rights reserved. |
|
* |
|
* NVIDIA Corporation and its licensors retain all intellectual property and |
|
* proprietary rights in and to this software and related documentation. |
|
* Any use, reproduction, disclosure, or distribution of this software |
|
* and related documentation without an express license agreement from |
|
* NVIDIA Corporation is strictly prohibited. |
|
* |
|
* Please refer to the applicable NVIDIA end user license agreement (EULA) |
|
* associated with this source code for terms and conditions that govern |
|
* your use of this NVIDIA software. |
|
* |
|
*/ |
|
|
|
// With these flags defined, this source file will dynamically |
|
// load the corresponding functions. Disabled by default. |
|
// #define CUDA_INIT_D3D9 |
|
// #define CUDA_INIT_D3D10 |
|
// #define CUDA_INIT_D3D11 |
|
// #define CUDA_INIT_OPENGL |
|
|
|
#include "cuda_drvapi_dynlink.h" |
|
|
|
#include <stdio.h> |
|
|
|
tcuInit *_cuInit; |
|
tcuDriverGetVersion *cuDriverGetVersion; |
|
tcuDeviceGet *cuDeviceGet; |
|
tcuDeviceGetCount *cuDeviceGetCount; |
|
tcuDeviceGetName *cuDeviceGetName; |
|
tcuDeviceComputeCapability *cuDeviceComputeCapability; |
|
tcuDeviceTotalMem *cuDeviceTotalMem; |
|
tcuDeviceGetProperties *cuDeviceGetProperties; |
|
tcuDeviceGetAttribute *cuDeviceGetAttribute; |
|
tcuGetErrorString *cuGetErrorString; |
|
tcuCtxCreate *cuCtxCreate; |
|
tcuCtxDestroy *cuCtxDestroy; |
|
tcuCtxAttach *cuCtxAttach; |
|
tcuCtxDetach *cuCtxDetach; |
|
tcuCtxPushCurrent *cuCtxPushCurrent; |
|
tcuCtxPopCurrent *cuCtxPopCurrent; |
|
tcuCtxGetCurrent *cuCtxGetCurrent; |
|
tcuCtxSetCurrent *cuCtxSetCurrent; |
|
tcuCtxGetDevice *cuCtxGetDevice; |
|
tcuCtxSynchronize *cuCtxSynchronize; |
|
tcuModuleLoad *cuModuleLoad; |
|
tcuModuleLoadData *cuModuleLoadData; |
|
tcuModuleLoadDataEx *cuModuleLoadDataEx; |
|
tcuModuleLoadFatBinary *cuModuleLoadFatBinary; |
|
tcuModuleUnload *cuModuleUnload; |
|
tcuModuleGetFunction *cuModuleGetFunction; |
|
tcuModuleGetGlobal *cuModuleGetGlobal; |
|
tcuModuleGetTexRef *cuModuleGetTexRef; |
|
tcuModuleGetSurfRef *cuModuleGetSurfRef; |
|
tcuMemGetInfo *cuMemGetInfo; |
|
tcuMemAlloc *cuMemAlloc; |
|
tcuMemAllocPitch *cuMemAllocPitch; |
|
tcuMemFree *cuMemFree; |
|
tcuMemGetAddressRange *cuMemGetAddressRange; |
|
tcuMemAllocHost *cuMemAllocHost; |
|
tcuMemFreeHost *cuMemFreeHost; |
|
tcuMemHostAlloc *cuMemHostAlloc; |
|
tcuMemHostGetFlags *cuMemHostGetFlags; |
|
|
|
tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer; |
|
tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId; |
|
tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId; |
|
tcuIpcGetEventHandle *cuIpcGetEventHandle; |
|
tcuIpcOpenEventHandle *cuIpcOpenEventHandle; |
|
tcuIpcGetMemHandle *cuIpcGetMemHandle; |
|
tcuIpcOpenMemHandle *cuIpcOpenMemHandle; |
|
tcuIpcCloseMemHandle *cuIpcCloseMemHandle; |
|
|
|
tcuMemHostRegister *cuMemHostRegister; |
|
tcuMemHostUnregister *cuMemHostUnregister; |
|
tcuMemcpyHtoD *cuMemcpyHtoD; |
|
tcuMemcpyDtoH *cuMemcpyDtoH; |
|
tcuMemcpyDtoD *cuMemcpyDtoD; |
|
tcuMemcpyDtoA *cuMemcpyDtoA; |
|
tcuMemcpyAtoD *cuMemcpyAtoD; |
|
tcuMemcpyHtoA *cuMemcpyHtoA; |
|
tcuMemcpyAtoH *cuMemcpyAtoH; |
|
tcuMemcpyAtoA *cuMemcpyAtoA; |
|
tcuMemcpy2D *cuMemcpy2D; |
|
tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned; |
|
tcuMemcpy3D *cuMemcpy3D; |
|
tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync; |
|
tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync; |
|
tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync; |
|
tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync; |
|
tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync; |
|
tcuMemcpy2DAsync *cuMemcpy2DAsync; |
|
tcuMemcpy3DAsync *cuMemcpy3DAsync; |
|
tcuMemcpy *cuMemcpy; |
|
tcuMemcpyPeer *cuMemcpyPeer; |
|
tcuMemsetD8 *cuMemsetD8; |
|
tcuMemsetD16 *cuMemsetD16; |
|
tcuMemsetD32 *cuMemsetD32; |
|
tcuMemsetD2D8 *cuMemsetD2D8; |
|
tcuMemsetD2D16 *cuMemsetD2D16; |
|
tcuMemsetD2D32 *cuMemsetD2D32; |
|
tcuFuncSetBlockShape *cuFuncSetBlockShape; |
|
tcuFuncSetSharedSize *cuFuncSetSharedSize; |
|
tcuFuncGetAttribute *cuFuncGetAttribute; |
|
tcuFuncSetCacheConfig *cuFuncSetCacheConfig; |
|
tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig; |
|
tcuLaunchKernel *cuLaunchKernel; |
|
tcuArrayCreate *cuArrayCreate; |
|
tcuArrayGetDescriptor *cuArrayGetDescriptor; |
|
tcuArrayDestroy *cuArrayDestroy; |
|
tcuArray3DCreate *cuArray3DCreate; |
|
tcuArray3DGetDescriptor *cuArray3DGetDescriptor; |
|
tcuTexRefCreate *cuTexRefCreate; |
|
tcuTexRefDestroy *cuTexRefDestroy; |
|
tcuTexRefSetArray *cuTexRefSetArray; |
|
tcuTexRefSetAddress *cuTexRefSetAddress; |
|
tcuTexRefSetAddress2D *cuTexRefSetAddress2D; |
|
tcuTexRefSetFormat *cuTexRefSetFormat; |
|
tcuTexRefSetAddressMode *cuTexRefSetAddressMode; |
|
tcuTexRefSetFilterMode *cuTexRefSetFilterMode; |
|
tcuTexRefSetFlags *cuTexRefSetFlags; |
|
tcuTexRefGetAddress *cuTexRefGetAddress; |
|
tcuTexRefGetArray *cuTexRefGetArray; |
|
tcuTexRefGetAddressMode *cuTexRefGetAddressMode; |
|
tcuTexRefGetFilterMode *cuTexRefGetFilterMode; |
|
tcuTexRefGetFormat *cuTexRefGetFormat; |
|
tcuTexRefGetFlags *cuTexRefGetFlags; |
|
tcuSurfRefSetArray *cuSurfRefSetArray; |
|
tcuSurfRefGetArray *cuSurfRefGetArray; |
|
tcuParamSetSize *cuParamSetSize; |
|
tcuParamSeti *cuParamSeti; |
|
tcuParamSetf *cuParamSetf; |
|
tcuParamSetv *cuParamSetv; |
|
tcuParamSetTexRef *cuParamSetTexRef; |
|
tcuLaunch *cuLaunch; |
|
tcuLaunchGrid *cuLaunchGrid; |
|
tcuLaunchGridAsync *cuLaunchGridAsync; |
|
tcuEventCreate *cuEventCreate; |
|
tcuEventRecord *cuEventRecord; |
|
tcuEventQuery *cuEventQuery; |
|
tcuEventSynchronize *cuEventSynchronize; |
|
tcuEventDestroy *cuEventDestroy; |
|
tcuEventElapsedTime *cuEventElapsedTime; |
|
tcuStreamCreate *cuStreamCreate; |
|
tcuStreamWaitEvent *cuStreamWaitEvent; |
|
tcuStreamAddCallback *cuStreamAddCallback; |
|
tcuStreamQuery *cuStreamQuery; |
|
tcuStreamSynchronize *cuStreamSynchronize; |
|
tcuStreamDestroy *cuStreamDestroy; |
|
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; |
|
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; |
|
tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer; |
|
tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; |
|
tcuGraphicsMapResources *cuGraphicsMapResources; |
|
tcuGraphicsUnmapResources *cuGraphicsUnmapResources; |
|
tcuGetExportTable *cuGetExportTable; |
|
tcuCtxSetLimit *cuCtxSetLimit; |
|
tcuCtxGetLimit *cuCtxGetLimit; |
|
tcuCtxGetCacheConfig *cuCtxGetCacheConfig; |
|
tcuCtxSetCacheConfig *cuCtxSetCacheConfig; |
|
tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig; |
|
tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig; |
|
tcuCtxGetApiVersion *cuCtxGetApiVersion; |
|
|
|
tcuMipmappedArrayCreate *cuMipmappedArrayCreate; |
|
tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel; |
|
tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy; |
|
|
|
tcuProfilerStop *cuProfilerStop; |
|
|
|
#ifdef CUDA_INIT_D3D9 |
|
// D3D9/CUDA interop (CUDA 1.x compatible API). These functions |
|
// are deprecated; please use the ones below |
|
tcuD3D9Begin *cuD3D9Begin; |
|
tcuD3D9End *cuD3DEnd; |
|
tcuD3D9RegisterVertexBuffer *cuD3D9RegisterVertexBuffer; |
|
tcuD3D9MapVertexBuffer *cuD3D9MapVertexBuffer; |
|
tcuD3D9UnmapVertexBuffer *cuD3D9UnmapVertexBuffer; |
|
tcuD3D9UnregisterVertexBuffer *cuD3D9UnregisterVertexBuffer; |
|
|
|
// D3D9/CUDA interop (CUDA 2.x compatible) |
|
tcuD3D9GetDirect3DDevice *cuD3D9GetDirect3DDevice; |
|
tcuD3D9RegisterResource *cuD3D9RegisterResource; |
|
tcuD3D9UnregisterResource *cuD3D9UnregisterResource; |
|
tcuD3D9MapResources *cuD3D9MapResources; |
|
tcuD3D9UnmapResources *cuD3D9UnmapResources; |
|
tcuD3D9ResourceSetMapFlags *cuD3D9ResourceSetMapFlags; |
|
tcuD3D9ResourceGetSurfaceDimensions *cuD3D9ResourceGetSurfaceDimensions; |
|
tcuD3D9ResourceGetMappedArray *cuD3D9ResourceGetMappedArray; |
|
tcuD3D9ResourceGetMappedPointer *cuD3D9ResourceGetMappedPointer; |
|
tcuD3D9ResourceGetMappedSize *cuD3D9ResourceGetMappedSize; |
|
tcuD3D9ResourceGetMappedPitch *cuD3D9ResourceGetMappedPitch; |
|
|
|
// D3D9/CUDA interop (CUDA 2.0+) |
|
tcuD3D9GetDevice *cuD3D9GetDevice; |
|
tcuD3D9CtxCreate *cuD3D9CtxCreate; |
|
tcuGraphicsD3D9RegisterResource *cuGraphicsD3D9RegisterResource; |
|
#endif |
|
|
|
#ifdef CUDA_INIT_D3D10 |
|
// D3D10/CUDA interop (CUDA 3.0+) |
|
tcuD3D10GetDevice *cuD3D10GetDevice; |
|
tcuD3D10CtxCreate *cuD3D10CtxCreate; |
|
tcuGraphicsD3D10RegisterResource *cuGraphicsD3D10RegisterResource; |
|
#endif |
|
|
|
|
|
#ifdef CUDA_INIT_D3D11 |
|
// D3D11/CUDA interop (CUDA 3.0+) |
|
tcuD3D11GetDevice *cuD3D11GetDevice; |
|
tcuD3D11CtxCreate *cuD3D11CtxCreate; |
|
tcuGraphicsD3D11RegisterResource *cuGraphicsD3D11RegisterResource; |
|
#endif |
|
|
|
// GL/CUDA interop |
|
#ifdef CUDA_INIT_OPENGL |
|
tcuGLCtxCreate *cuGLCtxCreate; |
|
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; |
|
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; |
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) |
|
tcuWGLGetDevice *cuWGLGetDevice; |
|
#endif |
|
#endif |
|
|
|
#define STRINGIFY(X) #X |
|
|
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) |
|
#include <Windows.h> |
|
|
|
#ifdef UNICODE |
|
static LPCWSTR __CudaLibName = L"nvcuda.dll"; |
|
#else |
|
static LPCSTR __CudaLibName = "nvcuda.dll"; |
|
#endif |
|
|
|
typedef HMODULE CUDADRIVER; |
|
|
|
static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance) |
|
{ |
|
*pInstance = LoadLibrary(__CudaLibName); |
|
|
|
if (*pInstance == NULL) { |
|
printf("LoadLibrary \"%s\" failed!\n", __CudaLibName); |
|
return CUDA_ERROR_UNKNOWN; |
|
} |
|
|
|
return CUDA_SUCCESS; |
|
} |
|
|
|
#define GET_PROC_EX(name, alias, required) \ |
|
alias = (t##name *)GetProcAddress(CudaDrvLib, #name); \ |
|
if (alias == NULL && required) { \ |
|
printf("Failed to find required function \"%s\" in %s\n", #name, __CudaLibName); \ |
|
return CUDA_ERROR_UNKNOWN; \ |
|
} |
|
|
|
#define GET_PROC_EX_V2(name, alias, required) \ |
|
alias = (t##name *)GetProcAddress(CudaDrvLib, STRINGIFY(name##_v2)); \ |
|
if (alias == NULL && required) { \ |
|
printf("Failed to find required function \"%s\" in %s\n", STRINGIFY(name##_v2), __CudaLibName); \ |
|
return CUDA_ERROR_UNKNOWN; \ |
|
} |
|
|
|
#define GET_PROC_EX_V3(name, alias, required) \ |
|
alias = (t##name *)GetProcAddress(CudaDrvLib, STRINGIFY(name##_v3)); \ |
|
if (alias == NULL && required) { \ |
|
printf("Failed to find required function \"%s\" in %s\n", STRINGIFY(name##_v3), __CudaLibName); \ |
|
return CUDA_ERROR_UNKNOWN; \ |
|
} |
|
|
|
#elif defined(__unix__) || defined(__QNX__) || defined(__APPLE__) || defined(__MACOSX) |
|
|
|
#include <dlfcn.h> |
|
|
|
#if defined(__APPLE__) || defined(__MACOSX) |
|
static char __CudaLibName[] = "/usr/local/cuda/lib/libcuda.dylib"; |
|
#elif defined(__ANDROID__) |
|
#if defined(__aarch64__) |
|
static char __CudaLibName[] = "/system/vendor/lib64/libcuda.so"; |
|
#elif defined(__arm__) |
|
static char __CudaLibName[] = "/system/vendor/lib/libcuda.so"; |
|
#endif |
|
#else |
|
static char __CudaLibName[] = "libcuda.so.1"; |
|
#endif |
|
|
|
typedef void *CUDADRIVER; |
|
|
|
static CUresult LOAD_LIBRARY(CUDADRIVER *pInstance) |
|
{ |
|
*pInstance = dlopen(__CudaLibName, RTLD_NOW); |
|
|
|
if (*pInstance == NULL) { |
|
printf("dlopen \"%s\" failed!\n", __CudaLibName); |
|
return CUDA_ERROR_UNKNOWN; |
|
} |
|
|
|
return CUDA_SUCCESS; |
|
} |
|
|
|
#define GET_PROC_EX(name, alias, required) \ |
|
alias = (t##name *)dlsym(CudaDrvLib, #name); \ |
|
if (alias == NULL && required) { \ |
|
printf("Failed to find required function \"%s\" in %s\n", #name, __CudaLibName); \ |
|
return CUDA_ERROR_UNKNOWN; \ |
|
} |
|
|
|
#define GET_PROC_EX_V2(name, alias, required) \ |
|
alias = (t##name *)dlsym(CudaDrvLib, STRINGIFY(name##_v2)); \ |
|
if (alias == NULL && required) { \ |
|
printf("Failed to find required function \"%s\" in %s\n", STRINGIFY(name##_v2), __CudaLibName); \ |
|
return CUDA_ERROR_UNKNOWN; \ |
|
} |
|
|
|
#define GET_PROC_EX_V3(name, alias, required) \ |
|
alias = (t##name *)dlsym(CudaDrvLib, STRINGIFY(name##_v3)); \ |
|
if (alias == NULL && required) { \ |
|
printf("Failed to find required function \"%s\" in %s\n", STRINGIFY(name##_v3), __CudaLibName); \ |
|
return CUDA_ERROR_UNKNOWN; \ |
|
} |
|
|
|
#else |
|
#error unsupported platform |
|
#endif |
|
|
|
#define CHECKED_CALL(call) \ |
|
do { \ |
|
CUresult result = (call); \ |
|
if (CUDA_SUCCESS != result) { \ |
|
return result; \ |
|
} \ |
|
} while (0) |
|
|
|
#define GET_PROC_REQUIRED(name) GET_PROC_EX(name, name, 1) |
|
#define GET_PROC_OPTIONAL(name) GET_PROC_EX(name, name, 0) |
|
#define GET_PROC(name) GET_PROC_REQUIRED(name) |
|
#define GET_PROC_V2(name) GET_PROC_EX_V2(name, name, 1) |
|
#define GET_PROC_V3(name) GET_PROC_EX_V3(name, name, 1) |
|
|
|
CUresult CUDAAPI cuInit(unsigned int Flags, int cudaVersion) |
|
{ |
|
CUDADRIVER CudaDrvLib; |
|
int driverVer = 1000; |
|
|
|
CHECKED_CALL(LOAD_LIBRARY(&CudaDrvLib)); |
|
|
|
// cuInit is required; alias it to _cuInit |
|
GET_PROC_EX(cuInit, _cuInit, 1); |
|
CHECKED_CALL(_cuInit(Flags)); |
|
|
|
// available since 2.2. if not present, version 1.0 is assumed |
|
GET_PROC_OPTIONAL(cuDriverGetVersion); |
|
|
|
if (cuDriverGetVersion) { |
|
CHECKED_CALL(cuDriverGetVersion(&driverVer)); |
|
} |
|
|
|
// fetch all function pointers |
|
GET_PROC(cuDeviceGet); |
|
GET_PROC(cuDeviceGetCount); |
|
GET_PROC(cuDeviceGetName); |
|
GET_PROC(cuDeviceComputeCapability); |
|
GET_PROC(cuDeviceGetProperties); |
|
GET_PROC(cuDeviceGetAttribute); |
|
GET_PROC(cuGetErrorString); |
|
GET_PROC(cuCtxDestroy); |
|
GET_PROC(cuCtxAttach); |
|
GET_PROC(cuCtxDetach); |
|
GET_PROC(cuCtxPushCurrent); |
|
GET_PROC(cuCtxPopCurrent); |
|
GET_PROC(cuCtxGetDevice); |
|
GET_PROC(cuCtxSynchronize); |
|
GET_PROC(cuCtxSetLimit); |
|
GET_PROC(cuCtxGetCacheConfig); |
|
GET_PROC(cuCtxSetCacheConfig); |
|
GET_PROC(cuCtxGetApiVersion); |
|
GET_PROC(cuModuleLoad); |
|
GET_PROC(cuModuleLoadData); |
|
GET_PROC(cuModuleUnload); |
|
GET_PROC(cuModuleGetFunction); |
|
GET_PROC(cuModuleGetTexRef); |
|
GET_PROC(cuMemFreeHost); |
|
GET_PROC(cuMemHostAlloc); |
|
GET_PROC(cuFuncSetBlockShape); |
|
GET_PROC(cuFuncSetSharedSize); |
|
GET_PROC(cuFuncGetAttribute); |
|
GET_PROC(cuArrayDestroy); |
|
GET_PROC(cuTexRefCreate); |
|
GET_PROC(cuTexRefDestroy); |
|
GET_PROC(cuTexRefSetArray); |
|
GET_PROC(cuTexRefSetFormat); |
|
GET_PROC(cuTexRefSetAddressMode); |
|
GET_PROC(cuTexRefSetFilterMode); |
|
GET_PROC(cuTexRefSetFlags); |
|
GET_PROC(cuTexRefGetArray); |
|
GET_PROC(cuTexRefGetAddressMode); |
|
GET_PROC(cuTexRefGetFilterMode); |
|
GET_PROC(cuTexRefGetFormat); |
|
GET_PROC(cuTexRefGetFlags); |
|
GET_PROC(cuParamSetSize); |
|
GET_PROC(cuParamSeti); |
|
GET_PROC(cuParamSetf); |
|
GET_PROC(cuParamSetv); |
|
GET_PROC(cuParamSetTexRef); |
|
GET_PROC(cuLaunch); |
|
GET_PROC(cuLaunchGrid); |
|
GET_PROC(cuLaunchGridAsync); |
|
GET_PROC(cuEventCreate); |
|
GET_PROC(cuEventRecord); |
|
GET_PROC(cuEventQuery); |
|
GET_PROC(cuEventSynchronize); |
|
GET_PROC(cuEventDestroy); |
|
GET_PROC(cuEventElapsedTime); |
|
GET_PROC(cuStreamCreate); |
|
GET_PROC(cuStreamWaitEvent); |
|
GET_PROC(cuStreamAddCallback); |
|
GET_PROC(cuStreamQuery); |
|
GET_PROC(cuStreamSynchronize); |
|
GET_PROC(cuStreamDestroy); |
|
|
|
// These are CUDA 5.0 new functions |
|
if (driverVer >= 5000) { |
|
GET_PROC(cuMipmappedArrayCreate); |
|
GET_PROC(cuMipmappedArrayDestroy); |
|
GET_PROC(cuMipmappedArrayGetLevel); |
|
} |
|
|
|
// These are CUDA 4.2 new functions |
|
if (driverVer >= 4020) { |
|
GET_PROC(cuFuncSetSharedMemConfig); |
|
GET_PROC(cuCtxGetSharedMemConfig); |
|
GET_PROC(cuCtxSetSharedMemConfig); |
|
} |
|
|
|
// These are CUDA 4.1 new functions |
|
if (cudaVersion >= 4010 && __CUDA_API_VERSION >= 4010) { |
|
GET_PROC(cuDeviceGetByPCIBusId); |
|
GET_PROC(cuDeviceGetPCIBusId); |
|
GET_PROC(cuIpcGetEventHandle); |
|
GET_PROC(cuIpcOpenEventHandle); |
|
GET_PROC(cuIpcGetMemHandle); |
|
GET_PROC(cuIpcOpenMemHandle); |
|
GET_PROC(cuIpcCloseMemHandle); |
|
} |
|
|
|
// These could be _v2 interfaces |
|
if (cudaVersion >= 4000 && __CUDA_API_VERSION >= 4000) { |
|
GET_PROC_V2(cuCtxDestroy); |
|
GET_PROC_V2(cuCtxPopCurrent); |
|
GET_PROC_V2(cuCtxPushCurrent); |
|
GET_PROC_V2(cuStreamDestroy); |
|
GET_PROC_V2(cuEventDestroy); |
|
} |
|
|
|
if (cudaVersion >= 3020 && __CUDA_API_VERSION >= 3020) { |
|
GET_PROC_V2(cuDeviceTotalMem); |
|
GET_PROC_V2(cuCtxCreate); |
|
GET_PROC_V2(cuModuleGetGlobal); |
|
GET_PROC_V2(cuMemGetInfo); |
|
GET_PROC_V2(cuMemAlloc); |
|
GET_PROC_V2(cuMemAllocPitch); |
|
GET_PROC_V2(cuMemFree); |
|
GET_PROC_V2(cuMemGetAddressRange); |
|
GET_PROC_V2(cuMemAllocHost); |
|
GET_PROC_V2(cuMemHostGetDevicePointer); |
|
GET_PROC_V2(cuMemcpyHtoD); |
|
GET_PROC_V2(cuMemcpyDtoH); |
|
GET_PROC_V2(cuMemcpyDtoD); |
|
GET_PROC_V2(cuMemcpyDtoA); |
|
GET_PROC_V2(cuMemcpyAtoD); |
|
GET_PROC_V2(cuMemcpyHtoA); |
|
GET_PROC_V2(cuMemcpyAtoH); |
|
GET_PROC_V2(cuMemcpyAtoA); |
|
GET_PROC_V2(cuMemcpy2D); |
|
GET_PROC_V2(cuMemcpy2DUnaligned); |
|
GET_PROC_V2(cuMemcpy3D); |
|
GET_PROC_V2(cuMemcpyHtoDAsync); |
|
GET_PROC_V2(cuMemcpyDtoHAsync); |
|
GET_PROC_V2(cuMemcpyHtoAAsync); |
|
GET_PROC_V2(cuMemcpyAtoHAsync); |
|
GET_PROC_V2(cuMemcpy2DAsync); |
|
GET_PROC_V2(cuMemcpy3DAsync); |
|
GET_PROC_V2(cuMemsetD8); |
|
GET_PROC_V2(cuMemsetD16); |
|
GET_PROC_V2(cuMemsetD32); |
|
GET_PROC_V2(cuMemsetD2D8); |
|
GET_PROC_V2(cuMemsetD2D16); |
|
GET_PROC_V2(cuMemsetD2D32); |
|
GET_PROC_V2(cuArrayCreate); |
|
GET_PROC_V2(cuArrayGetDescriptor); |
|
GET_PROC_V2(cuArray3DCreate); |
|
GET_PROC_V2(cuArray3DGetDescriptor); |
|
GET_PROC_V2(cuTexRefSetAddress); |
|
GET_PROC_V2(cuTexRefGetAddress); |
|
|
|
if (cudaVersion >= 4010 && __CUDA_API_VERSION >= 4010) { |
|
GET_PROC_V3(cuTexRefSetAddress2D); |
|
} |
|
else { |
|
GET_PROC_V2(cuTexRefSetAddress2D); |
|
} |
|
} |
|
else { |
|
// versions earlier than 3020 |
|
GET_PROC(cuDeviceTotalMem); |
|
GET_PROC(cuCtxCreate); |
|
GET_PROC(cuModuleGetGlobal); |
|
GET_PROC(cuMemGetInfo); |
|
GET_PROC(cuMemAlloc); |
|
GET_PROC(cuMemAllocPitch); |
|
GET_PROC(cuMemFree); |
|
GET_PROC(cuMemGetAddressRange); |
|
GET_PROC(cuMemAllocHost); |
|
GET_PROC(cuMemHostGetDevicePointer); |
|
GET_PROC(cuMemcpyHtoD); |
|
GET_PROC(cuMemcpyDtoH); |
|
GET_PROC(cuMemcpyDtoD); |
|
GET_PROC(cuMemcpyDtoA); |
|
GET_PROC(cuMemcpyAtoD); |
|
GET_PROC(cuMemcpyHtoA); |
|
GET_PROC(cuMemcpyAtoH); |
|
GET_PROC(cuMemcpyAtoA); |
|
GET_PROC(cuMemcpy2D); |
|
GET_PROC(cuMemcpy2DUnaligned); |
|
GET_PROC(cuMemcpy3D); |
|
GET_PROC(cuMemcpyHtoDAsync); |
|
GET_PROC(cuMemcpyDtoHAsync); |
|
GET_PROC(cuMemcpyHtoAAsync); |
|
GET_PROC(cuMemcpyAtoHAsync); |
|
GET_PROC(cuMemcpy2DAsync); |
|
GET_PROC(cuMemcpy3DAsync); |
|
GET_PROC(cuMemsetD8); |
|
GET_PROC(cuMemsetD16); |
|
GET_PROC(cuMemsetD32); |
|
GET_PROC(cuMemsetD2D8); |
|
GET_PROC(cuMemsetD2D16); |
|
GET_PROC(cuMemsetD2D32); |
|
GET_PROC(cuArrayCreate); |
|
GET_PROC(cuArrayGetDescriptor); |
|
GET_PROC(cuArray3DCreate); |
|
GET_PROC(cuArray3DGetDescriptor); |
|
GET_PROC(cuTexRefSetAddress); |
|
GET_PROC(cuTexRefSetAddress2D); |
|
GET_PROC(cuTexRefGetAddress); |
|
} |
|
|
|
// The following functions are specific to CUDA versions |
|
if (driverVer >= 4000) { |
|
GET_PROC(cuCtxSetCurrent); |
|
GET_PROC(cuCtxGetCurrent); |
|
GET_PROC(cuMemHostRegister); |
|
GET_PROC(cuMemHostUnregister); |
|
GET_PROC(cuMemcpy); |
|
GET_PROC(cuMemcpyPeer); |
|
GET_PROC(cuLaunchKernel); |
|
GET_PROC(cuProfilerStop); |
|
} |
|
|
|
if (driverVer >= 3010) { |
|
GET_PROC(cuModuleGetSurfRef); |
|
GET_PROC(cuSurfRefSetArray); |
|
GET_PROC(cuSurfRefGetArray); |
|
GET_PROC(cuCtxSetLimit); |
|
GET_PROC(cuCtxGetLimit); |
|
} |
|
|
|
if (driverVer >= 3000) { |
|
GET_PROC(cuMemcpyDtoDAsync); |
|
GET_PROC(cuFuncSetCacheConfig); |
|
#ifdef CUDA_INIT_D3D11 |
|
GET_PROC(cuD3D11GetDevice); |
|
GET_PROC(cuD3D11CtxCreate); |
|
GET_PROC(cuGraphicsD3D11RegisterResource); |
|
#endif |
|
GET_PROC(cuGraphicsUnregisterResource); |
|
GET_PROC(cuGraphicsSubResourceGetMappedArray); |
|
|
|
if (cudaVersion >= 3020 && __CUDA_API_VERSION >= 3020) { |
|
GET_PROC_V2(cuGraphicsResourceGetMappedPointer); |
|
} |
|
else { |
|
GET_PROC(cuGraphicsResourceGetMappedPointer); |
|
} |
|
|
|
GET_PROC(cuGraphicsResourceSetMapFlags); |
|
GET_PROC(cuGraphicsMapResources); |
|
GET_PROC(cuGraphicsUnmapResources); |
|
GET_PROC(cuGetExportTable); |
|
} |
|
|
|
if (driverVer >= 2030) { |
|
GET_PROC(cuMemHostGetFlags); |
|
#ifdef CUDA_INIT_D3D10 |
|
GET_PROC(cuD3D10GetDevice); |
|
GET_PROC(cuD3D10CtxCreate); |
|
GET_PROC(cuGraphicsD3D10RegisterResource); |
|
#endif |
|
#ifdef CUDA_INIT_OPENGL |
|
GET_PROC(cuGraphicsGLRegisterBuffer); |
|
GET_PROC(cuGraphicsGLRegisterImage); |
|
#endif |
|
} |
|
|
|
if (driverVer >= 2010) { |
|
GET_PROC(cuModuleLoadDataEx); |
|
GET_PROC(cuModuleLoadFatBinary); |
|
#ifdef CUDA_INIT_OPENGL |
|
GET_PROC(cuGLCtxCreate); |
|
GET_PROC(cuGraphicsGLRegisterBuffer); |
|
GET_PROC(cuGraphicsGLRegisterImage); |
|
#ifdef WIN32 |
|
GET_PROC(cuWGLGetDevice); |
|
#endif |
|
#endif |
|
#ifdef CUDA_INIT_D3D9 |
|
GET_PROC(cuD3D9GetDevice); |
|
GET_PROC(cuD3D9CtxCreate); |
|
GET_PROC(cuGraphicsD3D9RegisterResource); |
|
#endif |
|
} |
|
|
|
return CUDA_SUCCESS; |
|
}
|
|
|