You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
748 lines
24 KiB
748 lines
24 KiB
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
|
* modification, are permitted provided that the following conditions |
|
* are met: |
|
* * Redistributions of source code must retain the above copyright |
|
* notice, this list of conditions and the following disclaimer. |
|
* * Redistributions in binary form must reproduce the above copyright |
|
* notice, this list of conditions and the following disclaimer in the |
|
* documentation and/or other materials provided with the distribution. |
|
* * Neither the name of NVIDIA CORPORATION nor the names of its |
|
* contributors may be used to endorse or promote products derived |
|
* from this software without specific prior written permission. |
|
* |
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY |
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
*/ |
|
|
|
// USE_TEXSUBIMAGE2D uses glTexSubImage2D() to update the final result |
|
// commenting it will make the sample use the other way : |
|
// map a texture in CUDA and blit the result into it |
|
#define USE_TEXSUBIMAGE2D |
|
|
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) |
|
#define WINDOWS_LEAN_AND_MEAN |
|
#define NOMINMAX |
|
#include <windows.h> |
|
#pragma warning(disable : 4996) |
|
#endif |
|
|
|
// OpenGL Graphics includes |
|
#include <helper_gl.h> |
|
#if defined(__APPLE__) || defined(MACOSX) |
|
#pragma clang diagnostic ignored "-Wdeprecated-declarations" |
|
#include <GLUT/glut.h> |
|
// Sorry for Apple : unsigned int sampler is not available to you, yet... |
|
// Let's switch to the use of PBO and glTexSubImage |
|
#define USE_TEXSUBIMAGE2D |
|
#else |
|
#include <GL/freeglut.h> |
|
#endif |
|
|
|
// CUDA includes |
|
#include <cuda_gl_interop.h> |
|
#include <cuda_runtime.h> |
|
|
|
// CUDA utilities and system includes |
|
#include <helper_cuda.h> |
|
#include <helper_functions.h> |
|
#include <rendercheck_gl.h> |
|
|
|
// Shared Library Test Functions |
|
#define MAX_EPSILON 10 |
|
#define REFRESH_DELAY 10 // ms |
|
|
|
const char *sSDKname = "simpleCUDA2GL"; |
|
|
|
unsigned int g_TotalErrors = 0; |
|
|
|
// CheckFBO/BackBuffer class objects |
|
CheckRender *g_CheckRender = NULL; |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
// constants / global variables |
|
unsigned int window_width = 512; |
|
unsigned int window_height = 512; |
|
unsigned int image_width = 512; |
|
unsigned int image_height = 512; |
|
int iGLUTWindowHandle = 0; // handle to the GLUT window |
|
|
|
// pbo and fbo variables |
|
#ifdef USE_TEXSUBIMAGE2D |
|
GLuint pbo_dest; |
|
struct cudaGraphicsResource *cuda_pbo_dest_resource; |
|
#else |
|
unsigned int *cuda_dest_resource; |
|
GLuint shDrawTex; // draws a texture |
|
struct cudaGraphicsResource *cuda_tex_result_resource; |
|
#endif |
|
|
|
GLuint fbo_source; |
|
struct cudaGraphicsResource *cuda_tex_screen_resource; |
|
|
|
unsigned int size_tex_data; |
|
unsigned int num_texels; |
|
unsigned int num_values; |
|
|
|
// (offscreen) render target fbo variables |
|
GLuint tex_screen; // where we render the image |
|
GLuint tex_cudaResult; // where we will copy the CUDA result |
|
|
|
char *ref_file = NULL; |
|
bool enable_cuda = true; |
|
|
|
int *pArgc = NULL; |
|
char **pArgv = NULL; |
|
|
|
// Timer |
|
static int fpsCount = 0; |
|
static int fpsLimit = 1; |
|
StopWatchInterface *timer = NULL; |
|
|
|
#ifndef USE_TEXTURE_RGBA8UI |
|
#pragma message("Note: Using Texture fmt GL_RGBA16F_ARB") |
|
#else |
|
// NOTE: the current issue with regular RGBA8 internal format of textures |
|
// is that HW stores them as BGRA8. Therefore CUDA will see BGRA where users |
|
// expected RGBA8. To prevent this issue, the driver team decided to prevent |
|
// this to happen |
|
// instead, use RGBA8UI which required the additional work of scaling the |
|
// fragment shader |
|
// output from 0-1 to 0-255. This is why we have some GLSL code, in this case |
|
#pragma message("Note: Using Texture RGBA8UI + GLSL for rendering") |
|
#endif |
|
GLuint shDraw; |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
extern "C" void launch_cudaProcess(dim3 grid, dim3 block, int sbytes, unsigned int *g_odata, int imgw); |
|
|
|
// Forward declarations |
|
void runStdProgram(int argc, char **argv); |
|
void FreeResource(); |
|
void Cleanup(int iExitCode); |
|
|
|
// GL functionality |
|
bool initGL(int *argc, char **argv); |
|
|
|
#ifdef USE_TEXSUBIMAGE2D |
|
void createPBO(GLuint *pbo, struct cudaGraphicsResource **pbo_resource); |
|
void deletePBO(GLuint *pbo); |
|
#endif |
|
|
|
void createTextureDst(GLuint *tex_cudaResult, unsigned int size_x, unsigned int size_y); |
|
void deleteTexture(GLuint *tex); |
|
|
|
// rendering callbacks |
|
void display(); |
|
void idle(); |
|
void keyboard(unsigned char key, int x, int y); |
|
void reshape(int w, int h); |
|
void mainMenu(int i); |
|
|
|
#ifdef USE_TEXSUBIMAGE2D |
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! Create PBO |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void createPBO(GLuint *pbo, struct cudaGraphicsResource **pbo_resource) |
|
{ |
|
// set up vertex data parameter |
|
num_texels = image_width * image_height; |
|
num_values = num_texels * 4; |
|
size_tex_data = sizeof(GLubyte) * num_values; |
|
void *data = malloc(size_tex_data); |
|
|
|
// create buffer object |
|
glGenBuffers(1, pbo); |
|
glBindBuffer(GL_ARRAY_BUFFER, *pbo); |
|
glBufferData(GL_ARRAY_BUFFER, size_tex_data, data, GL_DYNAMIC_DRAW); |
|
free(data); |
|
|
|
glBindBuffer(GL_ARRAY_BUFFER, 0); |
|
|
|
// register this buffer object with CUDA |
|
checkCudaErrors(cudaGraphicsGLRegisterBuffer(pbo_resource, *pbo, cudaGraphicsMapFlagsNone)); |
|
|
|
SDK_CHECK_ERROR_GL(); |
|
} |
|
|
|
void deletePBO(GLuint *pbo) |
|
{ |
|
glDeleteBuffers(1, pbo); |
|
SDK_CHECK_ERROR_GL(); |
|
*pbo = 0; |
|
} |
|
#endif |
|
|
|
const GLenum fbo_targets[] = {GL_COLOR_ATTACHMENT0_EXT, |
|
GL_COLOR_ATTACHMENT1_EXT, |
|
GL_COLOR_ATTACHMENT2_EXT, |
|
GL_COLOR_ATTACHMENT3_EXT}; |
|
|
|
#ifndef USE_TEXSUBIMAGE2D |
|
static const char *glsl_drawtex_vertshader_src = "void main(void)\n" |
|
"{\n" |
|
" gl_Position = gl_Vertex;\n" |
|
" gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n" |
|
"}\n"; |
|
|
|
static const char *glsl_drawtex_fragshader_src = "#version 130\n" |
|
"uniform usampler2D texImage;\n" |
|
"void main()\n" |
|
"{\n" |
|
" vec4 c = texture(texImage, gl_TexCoord[0].xy);\n" |
|
" gl_FragColor = c / 255.0;\n" |
|
"}\n"; |
|
#endif |
|
|
|
static const char *glsl_draw_fragshader_src = |
|
// WARNING: seems like the gl_FragColor doesn't want to output >1 colors... |
|
// you need version 1.3 so you can define a uvec4 output... |
|
// but MacOSX complains about not supporting 1.3 !! |
|
// for now, the mode where we use RGBA8UI may not work properly for Apple : only |
|
// RGBA16F works (default) |
|
#if defined(__APPLE__) || defined(MACOSX) |
|
"void main()\n" |
|
"{" |
|
" gl_FragColor = vec4(gl_Color * 255.0);\n" |
|
"}\n"; |
|
#else |
|
"#version 130\n" |
|
"out uvec4 FragColor;\n" |
|
"void main()\n" |
|
"{" |
|
" FragColor = uvec4(gl_Color.xyz * 255.0, 255.0);\n" |
|
"}\n"; |
|
#endif |
|
|
|
// copy image and process using CUDA |
|
void generateCUDAImage() |
|
{ |
|
// run the Cuda kernel |
|
unsigned int *out_data; |
|
|
|
#ifdef USE_TEXSUBIMAGE2D |
|
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_dest_resource, 0)); |
|
size_t num_bytes; |
|
checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&out_data, &num_bytes, cuda_pbo_dest_resource)); |
|
// printf("CUDA mapped pointer of pbo_out: May access %ld bytes, expected %d\n", |
|
// num_bytes, size_tex_data); |
|
#else |
|
out_data = cuda_dest_resource; |
|
#endif |
|
// calculate grid size |
|
dim3 block(16, 16, 1); |
|
// dim3 block(16, 16, 1); |
|
dim3 grid(image_width / block.x, image_height / block.y, 1); |
|
// execute CUDA kernel |
|
launch_cudaProcess(grid, block, 0, out_data, image_width); |
|
|
|
// CUDA generated data in cuda memory or in a mapped PBO made of BGRA 8 bits |
|
// 2 solutions, here : |
|
// - use glTexSubImage2D(), there is the potential to loose performance in |
|
// possible hidden conversion |
|
// - map the texture and blit the result thanks to CUDA API |
|
#ifdef USE_TEXSUBIMAGE2D |
|
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_dest_resource, 0)); |
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_dest); |
|
|
|
glBindTexture(GL_TEXTURE_2D, tex_cudaResult); |
|
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, image_width, image_height, GL_RGBA, GL_UNSIGNED_BYTE, NULL); |
|
SDK_CHECK_ERROR_GL(); |
|
glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0); |
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); |
|
#else |
|
// We want to copy cuda_dest_resource data to the texture |
|
// map buffer objects to get CUDA device pointers |
|
cudaArray *texture_ptr; |
|
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0)); |
|
checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0)); |
|
|
|
int num_texels = image_width * image_height; |
|
int num_values = num_texels * 4; |
|
int size_tex_data = sizeof(GLubyte) * num_values; |
|
checkCudaErrors(cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice)); |
|
|
|
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_tex_result_resource, 0)); |
|
#endif |
|
} |
|
|
|
// display image to the screen as textured quad |
|
void displayImage(GLuint texture) |
|
{ |
|
glBindTexture(GL_TEXTURE_2D, texture); |
|
glEnable(GL_TEXTURE_2D); |
|
glDisable(GL_DEPTH_TEST); |
|
glDisable(GL_LIGHTING); |
|
glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); |
|
|
|
glMatrixMode(GL_PROJECTION); |
|
glPushMatrix(); |
|
glLoadIdentity(); |
|
glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0); |
|
|
|
glMatrixMode(GL_MODELVIEW); |
|
glLoadIdentity(); |
|
|
|
glViewport(0, 0, window_width, window_height); |
|
|
|
// if the texture is a 8 bits UI, scale the fetch with a GLSL shader |
|
#ifndef USE_TEXSUBIMAGE2D |
|
glUseProgram(shDrawTex); |
|
GLint id = glGetUniformLocation(shDrawTex, "texImage"); |
|
glUniform1i(id, 0); // texture unit 0 to "texImage" |
|
SDK_CHECK_ERROR_GL(); |
|
#endif |
|
|
|
glBegin(GL_QUADS); |
|
glTexCoord2f(0.0, 0.0); |
|
glVertex3f(-1.0, -1.0, 0.5); |
|
glTexCoord2f(1.0, 0.0); |
|
glVertex3f(1.0, -1.0, 0.5); |
|
glTexCoord2f(1.0, 1.0); |
|
glVertex3f(1.0, 1.0, 0.5); |
|
glTexCoord2f(0.0, 1.0); |
|
glVertex3f(-1.0, 1.0, 0.5); |
|
glEnd(); |
|
|
|
glMatrixMode(GL_PROJECTION); |
|
glPopMatrix(); |
|
|
|
glDisable(GL_TEXTURE_2D); |
|
|
|
#ifndef USE_TEXSUBIMAGE2D |
|
glUseProgram(0); |
|
#endif |
|
SDK_CHECK_ERROR_GL(); |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! Display callback |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void display() |
|
{ |
|
sdkStartTimer(&timer); |
|
|
|
if (enable_cuda) { |
|
generateCUDAImage(); |
|
displayImage(tex_cudaResult); |
|
} |
|
|
|
// NOTE: I needed to add this call so the timing is consistent. |
|
// Need to investigate why |
|
cudaDeviceSynchronize(); |
|
sdkStopTimer(&timer); |
|
|
|
// flip backbuffer |
|
glutSwapBuffers(); |
|
|
|
// If specified, Check rendering against reference, |
|
if (ref_file && g_CheckRender && g_CheckRender->IsQAReadback()) { |
|
static int pass = 0; |
|
|
|
if (pass > 0) { |
|
g_CheckRender->readback(window_width, window_height); |
|
char currentOutputPPM[256]; |
|
sprintf(currentOutputPPM, "kilt.ppm"); |
|
g_CheckRender->savePPM(currentOutputPPM, true, NULL); |
|
|
|
if (!g_CheckRender->PPMvsPPM(currentOutputPPM, sdkFindFilePath(ref_file, pArgv[0]), MAX_EPSILON, 0.30f)) { |
|
g_TotalErrors++; |
|
} |
|
|
|
Cleanup((g_TotalErrors == 0) ? EXIT_SUCCESS : EXIT_FAILURE); |
|
} |
|
|
|
pass++; |
|
} |
|
|
|
// Update fps counter, fps/title display and log |
|
if (++fpsCount == fpsLimit) { |
|
char cTitle[256]; |
|
float fps = 1000.0f / sdkGetAverageTimerValue(&timer); |
|
sprintf(cTitle, "CUDA GL Post Processing (%d x %d): %.1f fps", window_width, window_height, fps); |
|
glutSetWindowTitle(cTitle); |
|
// printf("%s\n", cTitle); |
|
fpsCount = 0; |
|
fpsLimit = (int)((fps > 1.0f) ? fps : 1.0f); |
|
sdkResetTimer(&timer); |
|
} |
|
} |
|
|
|
void timerEvent(int value) |
|
{ |
|
glutPostRedisplay(); |
|
glutTimerFunc(REFRESH_DELAY, timerEvent, 0); |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! Keyboard events handler |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void keyboard(unsigned char key, int /*x*/, int /*y*/) |
|
{ |
|
switch (key) { |
|
case (27): |
|
Cleanup(EXIT_SUCCESS); |
|
break; |
|
|
|
case ' ': |
|
enable_cuda ^= 1; |
|
#ifdef USE_TEXTURE_RGBA8UI |
|
|
|
if (enable_cuda) { |
|
glClearColorIuiEXT(128, 128, 128, 255); |
|
} |
|
else { |
|
glClearColor(0.5, 0.5, 0.5, 1.0); |
|
} |
|
|
|
#endif |
|
break; |
|
} |
|
} |
|
|
|
void reshape(int w, int h) |
|
{ |
|
window_width = w; |
|
window_height = h; |
|
} |
|
|
|
void mainMenu(int i) { keyboard((unsigned char)i, 0, 0); } |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void createTextureDst(GLuint *tex_cudaResult, unsigned int size_x, unsigned int size_y) |
|
{ |
|
// create a texture |
|
glGenTextures(1, tex_cudaResult); |
|
glBindTexture(GL_TEXTURE_2D, *tex_cudaResult); |
|
|
|
// set basic parameters |
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); |
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); |
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); |
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); |
|
|
|
#ifdef USE_TEXSUBIMAGE2D |
|
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size_x, size_y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); |
|
SDK_CHECK_ERROR_GL(); |
|
#else |
|
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI_EXT, size_x, size_y, 0, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE, NULL); |
|
SDK_CHECK_ERROR_GL(); |
|
// register this texture with CUDA |
|
checkCudaErrors(cudaGraphicsGLRegisterImage( |
|
&cuda_tex_result_resource, *tex_cudaResult, GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard)); |
|
#endif |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void deleteTexture(GLuint *tex) |
|
{ |
|
glDeleteTextures(1, tex); |
|
SDK_CHECK_ERROR_GL(); |
|
|
|
*tex = 0; |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
// Program main |
|
//////////////////////////////////////////////////////////////////////////////// |
|
int main(int argc, char **argv) |
|
{ |
|
#if defined(__linux__) |
|
char *Xstatus = getenv("DISPLAY"); |
|
if (Xstatus == NULL) { |
|
printf("Waiving execution as X server is not running\n"); |
|
exit(EXIT_WAIVED); |
|
} |
|
setenv("DISPLAY", ":0", 0); |
|
#endif |
|
|
|
printf("%s Starting...\n\n", argv[0]); |
|
|
|
if (checkCmdLineFlag(argc, (const char **)argv, "file")) { |
|
getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file); |
|
} |
|
|
|
pArgc = &argc; |
|
pArgv = argv; |
|
|
|
// use command-line specified CUDA device, otherwise use device with highest |
|
// Gflops/s |
|
if (checkCmdLineFlag(argc, (const char **)argv, "device")) { |
|
printf("[%s]\n", argv[0]); |
|
printf(" Does not explicitly support -device=n\n"); |
|
printf(" This sample requires OpenGL. Only -file=<reference> are " |
|
"supported\n"); |
|
printf("exiting...\n"); |
|
exit(EXIT_WAIVED); |
|
} |
|
|
|
if (ref_file) { |
|
printf("(Test with OpenGL verification)\n"); |
|
runStdProgram(argc, argv); |
|
} |
|
else { |
|
printf("(Interactive OpenGL Demo)\n"); |
|
runStdProgram(argc, argv); |
|
} |
|
|
|
exit(EXIT_SUCCESS); |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void FreeResource() |
|
{ |
|
sdkDeleteTimer(&timer); |
|
|
|
// unregister this buffer object with CUDA |
|
// checkCudaErrors(cudaGraphicsUnregisterResource(cuda_tex_screen_resource)); |
|
#ifdef USE_TEXSUBIMAGE2D |
|
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_pbo_dest_resource)); |
|
deletePBO(&pbo_dest); |
|
#else |
|
cudaFree(cuda_dest_resource); |
|
#endif |
|
deleteTexture(&tex_screen); |
|
deleteTexture(&tex_cudaResult); |
|
|
|
if (iGLUTWindowHandle) { |
|
glutDestroyWindow(iGLUTWindowHandle); |
|
} |
|
|
|
// finalize logs and leave |
|
printf("simpleCUDA2GL Exiting...\n"); |
|
} |
|
|
|
void Cleanup(int iExitCode) |
|
{ |
|
FreeResource(); |
|
printf("PPM Images are %s\n", (iExitCode == EXIT_SUCCESS) ? "Matching" : "Not Matching"); |
|
exit(iExitCode); |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! |
|
//////////////////////////////////////////////////////////////////////////////// |
|
GLuint compileGLSLprogram(const char *vertex_shader_src, const char *fragment_shader_src) |
|
{ |
|
GLuint v, f, p = 0; |
|
|
|
p = glCreateProgram(); |
|
|
|
if (vertex_shader_src) { |
|
v = glCreateShader(GL_VERTEX_SHADER); |
|
glShaderSource(v, 1, &vertex_shader_src, NULL); |
|
glCompileShader(v); |
|
|
|
// check if shader compiled |
|
GLint compiled = 0; |
|
glGetShaderiv(v, GL_COMPILE_STATUS, &compiled); |
|
|
|
if (!compiled) { |
|
// #ifdef NV_REPORT_COMPILE_ERRORS |
|
char temp[256] = ""; |
|
glGetShaderInfoLog(v, 256, NULL, temp); |
|
printf("Vtx Compile failed:\n%s\n", temp); |
|
// #endif |
|
glDeleteShader(v); |
|
return 0; |
|
} |
|
else { |
|
glAttachShader(p, v); |
|
} |
|
} |
|
|
|
if (fragment_shader_src) { |
|
f = glCreateShader(GL_FRAGMENT_SHADER); |
|
glShaderSource(f, 1, &fragment_shader_src, NULL); |
|
glCompileShader(f); |
|
|
|
// check if shader compiled |
|
GLint compiled = 0; |
|
glGetShaderiv(f, GL_COMPILE_STATUS, &compiled); |
|
|
|
if (!compiled) { |
|
// #ifdef NV_REPORT_COMPILE_ERRORS |
|
char temp[256] = ""; |
|
glGetShaderInfoLog(f, 256, NULL, temp); |
|
printf("frag Compile failed:\n%s\n", temp); |
|
// #endif |
|
glDeleteShader(f); |
|
return 0; |
|
} |
|
else { |
|
glAttachShader(p, f); |
|
} |
|
} |
|
|
|
glLinkProgram(p); |
|
|
|
int infologLength = 0; |
|
int charsWritten = 0; |
|
|
|
glGetProgramiv(p, GL_INFO_LOG_LENGTH, (GLint *)&infologLength); |
|
|
|
if (infologLength > 0) { |
|
char *infoLog = (char *)malloc(infologLength); |
|
glGetProgramInfoLog(p, infologLength, (GLsizei *)&charsWritten, infoLog); |
|
printf("Shader compilation error: %s\n", infoLog); |
|
free(infoLog); |
|
} |
|
|
|
return p; |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! Allocate the "render target" of CUDA |
|
//////////////////////////////////////////////////////////////////////////////// |
|
#ifndef USE_TEXSUBIMAGE2D |
|
void initCUDABuffers() |
|
{ |
|
// set up vertex data parameter |
|
num_texels = image_width * image_height; |
|
num_values = num_texels * 4; |
|
size_tex_data = sizeof(GLubyte) * num_values; |
|
checkCudaErrors(cudaMalloc((void **)&cuda_dest_resource, size_tex_data)); |
|
// checkCudaErrors(cudaHostAlloc((void**)&cuda_dest_resource, size_tex_data, |
|
// )); |
|
} |
|
#endif |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void initGLBuffers() |
|
{ |
|
// create pbo |
|
#ifdef USE_TEXSUBIMAGE2D |
|
createPBO(&pbo_dest, &cuda_pbo_dest_resource); |
|
#endif |
|
// create texture that will receive the result of CUDA |
|
createTextureDst(&tex_cudaResult, image_width, image_height); |
|
// load shader programs |
|
shDraw = compileGLSLprogram(NULL, glsl_draw_fragshader_src); |
|
|
|
#ifndef USE_TEXSUBIMAGE2D |
|
shDrawTex = compileGLSLprogram(glsl_drawtex_vertshader_src, glsl_drawtex_fragshader_src); |
|
#endif |
|
SDK_CHECK_ERROR_GL(); |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! Run standard demo loop with or without GL verification |
|
//////////////////////////////////////////////////////////////////////////////// |
|
void runStdProgram(int argc, char **argv) |
|
{ |
|
// First initialize OpenGL context, so we can properly set the GL for CUDA. |
|
// This is necessary in order to achieve optimal performance with OpenGL/CUDA |
|
// interop. |
|
if (false == initGL(&argc, argv)) { |
|
return; |
|
} |
|
|
|
// Now initialize CUDA context (GL context has been created already) |
|
findCudaDevice(argc, (const char **)argv); |
|
|
|
sdkCreateTimer(&timer); |
|
sdkResetTimer(&timer); |
|
|
|
// register callbacks |
|
glutDisplayFunc(display); |
|
glutKeyboardFunc(keyboard); |
|
glutReshapeFunc(reshape); |
|
glutTimerFunc(REFRESH_DELAY, timerEvent, 0); |
|
|
|
// create menu |
|
glutCreateMenu(mainMenu); |
|
glutAddMenuEntry("Quit (esc)", '\033'); |
|
glutAttachMenu(GLUT_RIGHT_BUTTON); |
|
|
|
initGLBuffers(); |
|
#ifndef USE_TEXSUBIMAGE2D |
|
initCUDABuffers(); |
|
#endif |
|
|
|
// Creating the Auto-Validation Code |
|
if (ref_file) { |
|
g_CheckRender = new CheckBackBuffer(window_width, window_height, 4); |
|
g_CheckRender->setPixelFormat(GL_RGBA); |
|
g_CheckRender->setExecPath(argv[0]); |
|
g_CheckRender->EnableQAReadback(true); |
|
} |
|
|
|
printf("\n" |
|
"\tControls\n" |
|
"\t(right click mouse button for Menu)\n" |
|
"\t[esc] - Quit\n\n"); |
|
|
|
// start rendering mainloop |
|
glutMainLoop(); |
|
|
|
// Normally unused return path |
|
Cleanup(EXIT_SUCCESS); |
|
} |
|
|
|
//////////////////////////////////////////////////////////////////////////////// |
|
//! Initialize GL |
|
//////////////////////////////////////////////////////////////////////////////// |
|
bool initGL(int *argc, char **argv) |
|
{ |
|
// Create GL context |
|
glutInit(argc, argv); |
|
glutInitDisplayMode(GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH); |
|
glutInitWindowSize(window_width, window_height); |
|
iGLUTWindowHandle = glutCreateWindow("CUDA OpenGL post-processing"); |
|
|
|
// initialize necessary OpenGL extensions |
|
if (!isGLVersionSupported(2, 0) |
|
|| !areGLExtensionsSupported("GL_ARB_pixel_buffer_object " |
|
"GL_EXT_framebuffer_object")) { |
|
printf("ERROR: Support for necessary OpenGL extensions missing."); |
|
fflush(stderr); |
|
return false; |
|
} |
|
|
|
// default initialization |
|
#ifndef USE_TEXTURE_RGBA8UI |
|
glClearColor(0.5, 0.5, 0.5, 1.0); |
|
#else |
|
glClearColorIuiEXT(128, 128, 128, 255); |
|
#endif |
|
glDisable(GL_DEPTH_TEST); |
|
|
|
// viewport |
|
glViewport(0, 0, window_width, window_height); |
|
|
|
// projection |
|
glMatrixMode(GL_PROJECTION); |
|
glLoadIdentity(); |
|
gluPerspective(60.0, (GLfloat)window_width / (GLfloat)window_height, 0.1f, 10.0f); |
|
|
|
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); |
|
|
|
glEnable(GL_LIGHT0); |
|
float red[] = {1.0f, 0.1f, 0.1f, 1.0f}; |
|
float white[] = {1.0f, 1.0f, 1.0f, 1.0f}; |
|
glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, red); |
|
glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, white); |
|
glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, 60.0f); |
|
|
|
SDK_CHECK_ERROR_GL(); |
|
|
|
return true; |
|
}
|
|
|