You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
526 lines
23 KiB
526 lines
23 KiB
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
|
* modification, are permitted provided that the following conditions |
|
* are met: |
|
* * Redistributions of source code must retain the above copyright |
|
* notice, this list of conditions and the following disclaimer. |
|
* * Redistributions in binary form must reproduce the above copyright |
|
* notice, this list of conditions and the following disclaimer in the |
|
* documentation and/or other materials provided with the distribution. |
|
* * Neither the name of NVIDIA CORPORATION nor the names of its |
|
* contributors may be used to endorse or promote products derived |
|
* from this software without specific prior written permission. |
|
* |
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY |
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
*/ |
|
|
|
#include <algorithm> |
|
#include <chrono> |
|
#include <iomanip> |
|
#include <iostream> |
|
|
|
#include "SineWaveSimulation.h" |
|
#include "VulkanBaseApp.h" |
|
#include "helper_cuda.h" |
|
#include "linmath.h" |
|
|
|
typedef float vec2[2]; |
|
std::string execution_path; |
|
|
|
#ifndef NDEBUG |
|
#define ENABLE_VALIDATION (false) |
|
#else |
|
#define ENABLE_VALIDATION (true) |
|
#endif |
|
|
|
class VulkanCudaSineWave : public VulkanBaseApp |
|
{ |
|
typedef struct UniformBufferObject_st |
|
{ |
|
mat4x4 modelViewProj; |
|
} UniformBufferObject; |
|
|
|
VkBuffer m_heightBuffer, m_xyBuffer, m_indexBuffer; |
|
VkDeviceMemory m_heightMemory, m_xyMemory, m_indexMemory; |
|
UniformBufferObject m_ubo; |
|
VkSemaphore m_vkWaitSemaphore, m_vkSignalSemaphore; |
|
SineWaveSimulation m_sim; |
|
cudaStream_t m_stream; |
|
cudaExternalSemaphore_t m_cudaWaitSemaphore, m_cudaSignalSemaphore, m_cudaTimelineSemaphore; |
|
cudaExternalMemory_t m_cudaVertMem; |
|
float *m_cudaHeightMap; |
|
using chrono_tp = std::chrono::time_point<std::chrono::high_resolution_clock>; |
|
chrono_tp m_lastTime; |
|
size_t m_lastFrame; |
|
|
|
public: |
|
VulkanCudaSineWave(size_t width, size_t height) |
|
: VulkanBaseApp("vulkanCudaSineWave", ENABLE_VALIDATION) |
|
, m_heightBuffer(VK_NULL_HANDLE) |
|
, m_xyBuffer(VK_NULL_HANDLE) |
|
, m_indexBuffer(VK_NULL_HANDLE) |
|
, m_heightMemory(VK_NULL_HANDLE) |
|
, m_xyMemory(VK_NULL_HANDLE) |
|
, m_indexMemory(VK_NULL_HANDLE) |
|
, m_ubo() |
|
, m_sim(width, height) |
|
, m_stream(0) |
|
, m_vkWaitSemaphore(VK_NULL_HANDLE) |
|
, m_vkSignalSemaphore(VK_NULL_HANDLE) |
|
, m_cudaWaitSemaphore() |
|
, m_cudaSignalSemaphore() |
|
, m_cudaTimelineSemaphore() |
|
, m_cudaVertMem() |
|
, m_cudaHeightMap(nullptr) |
|
, m_lastFrame(0) |
|
{ |
|
// Our index buffer can only index 32-bits of the vertex buffer |
|
if ((width * height) > (1ULL << 32ULL)) { |
|
throw std::runtime_error("Requested height and width is too large for this sample!"); |
|
} |
|
// Add our compiled vulkan shader files |
|
char *vertex_shader_path = sdkFindFilePath("vert.spv", execution_path.c_str()); |
|
char *fragment_shader_path = sdkFindFilePath("frag.spv", execution_path.c_str()); |
|
m_shaderFiles.push_back(std::make_pair(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader_path)); |
|
m_shaderFiles.push_back(std::make_pair(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader_path)); |
|
} |
|
~VulkanCudaSineWave() |
|
{ |
|
// Make sure there's no pending work before we start tearing down |
|
checkCudaErrors(cudaStreamSynchronize(m_stream)); |
|
|
|
#ifdef _VK_TIMELINE_SEMAPHORE |
|
if (m_vkTimelineSemaphore != VK_NULL_HANDLE) { |
|
checkCudaErrors(cudaDestroyExternalSemaphore(m_cudaTimelineSemaphore)); |
|
vkDestroySemaphore(m_device, m_vkTimelineSemaphore, nullptr); |
|
} |
|
#endif /* _VK_TIMELINE_SEMAPHORE */ |
|
|
|
if (m_vkSignalSemaphore != VK_NULL_HANDLE) { |
|
checkCudaErrors(cudaDestroyExternalSemaphore(m_cudaSignalSemaphore)); |
|
vkDestroySemaphore(m_device, m_vkSignalSemaphore, nullptr); |
|
} |
|
if (m_vkWaitSemaphore != VK_NULL_HANDLE) { |
|
checkCudaErrors(cudaDestroyExternalSemaphore(m_cudaWaitSemaphore)); |
|
vkDestroySemaphore(m_device, m_vkWaitSemaphore, nullptr); |
|
} |
|
|
|
if (m_xyBuffer != VK_NULL_HANDLE) { |
|
vkDestroyBuffer(m_device, m_xyBuffer, nullptr); |
|
} |
|
if (m_xyMemory != VK_NULL_HANDLE) { |
|
vkFreeMemory(m_device, m_xyMemory, nullptr); |
|
} |
|
|
|
if (m_heightBuffer != VK_NULL_HANDLE) { |
|
vkDestroyBuffer(m_device, m_heightBuffer, nullptr); |
|
} |
|
if (m_heightMemory != VK_NULL_HANDLE) { |
|
vkFreeMemory(m_device, m_heightMemory, nullptr); |
|
} |
|
if (m_cudaHeightMap) { |
|
checkCudaErrors(cudaDestroyExternalMemory(m_cudaVertMem)); |
|
} |
|
|
|
if (m_indexBuffer != VK_NULL_HANDLE) { |
|
vkDestroyBuffer(m_device, m_indexBuffer, nullptr); |
|
} |
|
if (m_indexMemory != VK_NULL_HANDLE) { |
|
vkFreeMemory(m_device, m_indexMemory, nullptr); |
|
} |
|
} |
|
|
|
void fillRenderingCommandBuffer(VkCommandBuffer &commandBuffer) |
|
{ |
|
VkBuffer vertexBuffers[] = {m_heightBuffer, m_xyBuffer}; |
|
VkDeviceSize offsets[] = {0, 0}; |
|
vkCmdBindVertexBuffers( |
|
commandBuffer, 0, sizeof(vertexBuffers) / sizeof(vertexBuffers[0]), vertexBuffers, offsets); |
|
vkCmdBindIndexBuffer(commandBuffer, m_indexBuffer, 0, VK_INDEX_TYPE_UINT32); |
|
vkCmdDrawIndexed(commandBuffer, (uint32_t)((m_sim.getWidth() - 1) * (m_sim.getHeight() - 1) * 6), 1, 0, 0, 0); |
|
} |
|
|
|
void getVertexDescriptions(std::vector<VkVertexInputBindingDescription> &bindingDesc, |
|
std::vector<VkVertexInputAttributeDescription> &attribDesc) |
|
{ |
|
bindingDesc.resize(2); |
|
attribDesc.resize(2); |
|
|
|
bindingDesc[0].binding = 0; |
|
bindingDesc[0].stride = sizeof(float); |
|
bindingDesc[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; |
|
|
|
bindingDesc[1].binding = 1; |
|
bindingDesc[1].stride = sizeof(vec2); |
|
bindingDesc[1].inputRate = VK_VERTEX_INPUT_RATE_VERTEX; |
|
|
|
attribDesc[0].binding = 0; |
|
attribDesc[0].location = 0; |
|
attribDesc[0].format = VK_FORMAT_R32_SFLOAT; |
|
attribDesc[0].offset = 0; |
|
|
|
attribDesc[1].binding = 1; |
|
attribDesc[1].location = 1; |
|
attribDesc[1].format = VK_FORMAT_R32G32_SFLOAT; |
|
attribDesc[1].offset = 0; |
|
} |
|
|
|
void getAssemblyStateInfo(VkPipelineInputAssemblyStateCreateInfo &info) |
|
{ |
|
info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; |
|
info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; |
|
info.primitiveRestartEnable = VK_FALSE; |
|
} |
|
|
|
void getWaitFrameSemaphores(std::vector<VkSemaphore> &wait, std::vector<VkPipelineStageFlags> &waitStages) const |
|
{ |
|
if (m_currentFrame != 0) { |
|
// Have vulkan wait until cuda is done with the vertex buffer before |
|
// rendering, We don't do this on the first frame, as the wait semaphore |
|
// hasn't been initialized yet |
|
wait.push_back(m_vkWaitSemaphore); |
|
// We want to wait until all the pipeline commands are complete before |
|
// letting cuda work |
|
waitStages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); |
|
} |
|
} |
|
|
|
void getSignalFrameSemaphores(std::vector<VkSemaphore> &signal) const |
|
{ |
|
// Add this semaphore for vulkan to signal once the vertex buffer is ready |
|
// for cuda to modify |
|
signal.push_back(m_vkSignalSemaphore); |
|
} |
|
|
|
void initVulkanApp() |
|
{ |
|
int cuda_device = -1; |
|
|
|
// Select cuda device where vulkan is running. |
|
cuda_device = m_sim.initCuda(m_vkDeviceUUID, VK_UUID_SIZE); |
|
if (cuda_device == -1) { |
|
printf("Error: No CUDA-Vulkan interop capable device found\n"); |
|
exit(EXIT_FAILURE); |
|
} |
|
|
|
m_sim.initCudaLaunchConfig(cuda_device); |
|
|
|
// Create the cuda stream we'll be using |
|
checkCudaErrors(cudaStreamCreateWithFlags(&m_stream, cudaStreamNonBlocking)); |
|
|
|
const size_t nVerts = m_sim.getWidth() * m_sim.getHeight(); |
|
const size_t nInds = (m_sim.getWidth() - 1) * (m_sim.getHeight() - 1) * 6; |
|
|
|
// Create the height map cuda will write to |
|
createExternalBuffer(nVerts * sizeof(float), |
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, |
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, |
|
getDefaultMemHandleType(), |
|
m_heightBuffer, |
|
m_heightMemory); |
|
|
|
// Create the vertex buffer that will hold the xy coordinates for the grid |
|
createBuffer(nVerts * sizeof(vec2), |
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, |
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, |
|
m_xyBuffer, |
|
m_xyMemory); |
|
|
|
// Create the index buffer that references from both buffers above |
|
createBuffer(nInds * sizeof(uint32_t), |
|
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, |
|
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, |
|
m_indexBuffer, |
|
m_indexMemory); |
|
|
|
// Import the height map into cuda and retrieve a device pointer to use |
|
importCudaExternalMemory((void **)&m_cudaHeightMap, |
|
m_cudaVertMem, |
|
m_heightMemory, |
|
nVerts * sizeof(*m_cudaHeightMap), |
|
getDefaultMemHandleType()); |
|
// Set the height map to use in the simulation |
|
m_sim.initSimulation(m_cudaHeightMap); |
|
|
|
{ |
|
// Set up the initial values for the vertex buffers with Vulkan |
|
void *stagingBase; |
|
VkBuffer stagingBuffer; |
|
VkDeviceMemory stagingMemory; |
|
VkDeviceSize stagingSz = std::max(nVerts * sizeof(vec2), nInds * sizeof(uint32_t)); |
|
createBuffer(stagingSz, |
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, |
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, |
|
stagingBuffer, |
|
stagingMemory); |
|
|
|
vkMapMemory(m_device, stagingMemory, 0, stagingSz, 0, &stagingBase); |
|
|
|
memset(stagingBase, 0, nVerts * sizeof(float)); |
|
copyBuffer(m_heightBuffer, stagingBuffer, nVerts * sizeof(float)); |
|
|
|
for (size_t y = 0; y < m_sim.getHeight(); y++) { |
|
for (size_t x = 0; x < m_sim.getWidth(); x++) { |
|
vec2 *stagedVert = (vec2 *)stagingBase; |
|
stagedVert[y * m_sim.getWidth() + x][0] = (2.0f * x) / (m_sim.getWidth() - 1) - 1; |
|
stagedVert[y * m_sim.getWidth() + x][1] = (2.0f * y) / (m_sim.getHeight() - 1) - 1; |
|
} |
|
} |
|
copyBuffer(m_xyBuffer, stagingBuffer, nVerts * sizeof(vec2)); |
|
|
|
{ |
|
uint32_t *indices = (uint32_t *)stagingBase; |
|
for (size_t y = 0; y < m_sim.getHeight() - 1; y++) { |
|
for (size_t x = 0; x < m_sim.getWidth() - 1; x++) { |
|
indices[0] = (uint32_t)((y + 0) * m_sim.getWidth() + (x + 0)); |
|
indices[1] = (uint32_t)((y + 1) * m_sim.getWidth() + (x + 0)); |
|
indices[2] = (uint32_t)((y + 0) * m_sim.getWidth() + (x + 1)); |
|
indices[3] = (uint32_t)((y + 1) * m_sim.getWidth() + (x + 0)); |
|
indices[4] = (uint32_t)((y + 1) * m_sim.getWidth() + (x + 1)); |
|
indices[5] = (uint32_t)((y + 0) * m_sim.getWidth() + (x + 1)); |
|
indices += 6; |
|
} |
|
} |
|
} |
|
copyBuffer(m_indexBuffer, stagingBuffer, nInds * sizeof(uint32_t)); |
|
|
|
vkUnmapMemory(m_device, stagingMemory); |
|
vkDestroyBuffer(m_device, stagingBuffer, nullptr); |
|
vkFreeMemory(m_device, stagingMemory, nullptr); |
|
} |
|
|
|
#ifdef _VK_TIMELINE_SEMAPHORE |
|
// Create the timeline semaphore to sync cuda and vulkan access to vertex |
|
// buffer |
|
createExternalSemaphore(m_vkTimelineSemaphore, getDefaultSemaphoreHandleType()); |
|
// Import the timeline semaphore cuda will use to sync cuda and vulkan |
|
// access to vertex buffer |
|
importCudaExternalSemaphore(m_cudaTimelineSemaphore, m_vkTimelineSemaphore, getDefaultSemaphoreHandleType()); |
|
#else |
|
// Create the semaphore vulkan will signal when it's done with the vertex |
|
// buffer |
|
createExternalSemaphore(m_vkSignalSemaphore, getDefaultSemaphoreHandleType()); |
|
// Create the semaphore vulkan will wait for before using the vertex buffer |
|
createExternalSemaphore(m_vkWaitSemaphore, getDefaultSemaphoreHandleType()); |
|
// Import the semaphore cuda will use -- vulkan's signal will be cuda's wait |
|
importCudaExternalSemaphore(m_cudaWaitSemaphore, m_vkSignalSemaphore, getDefaultSemaphoreHandleType()); |
|
// Import the semaphore cuda will use -- cuda's signal will be vulkan's wait |
|
importCudaExternalSemaphore(m_cudaSignalSemaphore, m_vkWaitSemaphore, getDefaultSemaphoreHandleType()); |
|
#endif /* _VK_TIMELINE_SEMAPHORE */ |
|
} |
|
|
|
void importCudaExternalMemory(void **cudaPtr, |
|
cudaExternalMemory_t &cudaMem, |
|
VkDeviceMemory &vkMem, |
|
VkDeviceSize size, |
|
VkExternalMemoryHandleTypeFlagBits handleType) |
|
{ |
|
cudaExternalMemoryHandleDesc externalMemoryHandleDesc = {}; |
|
|
|
if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT) { |
|
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeOpaqueWin32; |
|
} |
|
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) { |
|
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeOpaqueWin32Kmt; |
|
} |
|
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { |
|
externalMemoryHandleDesc.type = cudaExternalMemoryHandleTypeOpaqueFd; |
|
} |
|
else { |
|
throw std::runtime_error("Unknown handle type requested!"); |
|
} |
|
|
|
externalMemoryHandleDesc.size = size; |
|
|
|
#ifdef _WIN64 |
|
externalMemoryHandleDesc.handle.win32.handle = (HANDLE)getMemHandle(vkMem, handleType); |
|
#else |
|
externalMemoryHandleDesc.handle.fd = (int)(uintptr_t)getMemHandle(vkMem, handleType); |
|
#endif |
|
|
|
checkCudaErrors(cudaImportExternalMemory(&cudaMem, &externalMemoryHandleDesc)); |
|
|
|
cudaExternalMemoryBufferDesc externalMemBufferDesc = {}; |
|
externalMemBufferDesc.offset = 0; |
|
externalMemBufferDesc.size = size; |
|
externalMemBufferDesc.flags = 0; |
|
|
|
checkCudaErrors(cudaExternalMemoryGetMappedBuffer(cudaPtr, cudaMem, &externalMemBufferDesc)); |
|
} |
|
|
|
void importCudaExternalSemaphore(cudaExternalSemaphore_t &cudaSem, |
|
VkSemaphore &vkSem, |
|
VkExternalSemaphoreHandleTypeFlagBits handleType) |
|
{ |
|
cudaExternalSemaphoreHandleDesc externalSemaphoreHandleDesc = {}; |
|
|
|
#ifdef _VK_TIMELINE_SEMAPHORE |
|
if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT) { |
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32; |
|
} |
|
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) { |
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32; |
|
} |
|
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { |
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd; |
|
} |
|
#else |
|
if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT) { |
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueWin32; |
|
} |
|
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) { |
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt; |
|
} |
|
else if (handleType & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) { |
|
externalSemaphoreHandleDesc.type = cudaExternalSemaphoreHandleTypeOpaqueFd; |
|
} |
|
#endif /* _VK_TIMELINE_SEMAPHORE */ |
|
else { |
|
throw std::runtime_error("Unknown handle type requested!"); |
|
} |
|
|
|
#ifdef _WIN64 |
|
externalSemaphoreHandleDesc.handle.win32.handle = (HANDLE)getSemaphoreHandle(vkSem, handleType); |
|
#else |
|
externalSemaphoreHandleDesc.handle.fd = (int)(uintptr_t)getSemaphoreHandle(vkSem, handleType); |
|
#endif |
|
|
|
externalSemaphoreHandleDesc.flags = 0; |
|
|
|
checkCudaErrors(cudaImportExternalSemaphore(&cudaSem, &externalSemaphoreHandleDesc)); |
|
} |
|
|
|
VkDeviceSize getUniformSize() const { return sizeof(UniformBufferObject); } |
|
|
|
void updateUniformBuffer(uint32_t imageIndex) |
|
{ |
|
{ |
|
mat4x4 view, proj; |
|
vec3 eye = {1.75f, 1.75f, 1.25f}; |
|
vec3 center = {0.0f, 0.0f, -0.25f}; |
|
vec3 up = {0.0f, 0.0f, 1.0f}; |
|
|
|
mat4x4_perspective(proj, |
|
(float)degreesToRadians(45.0f), |
|
m_swapChainExtent.width / (float)m_swapChainExtent.height, |
|
0.1f, |
|
10.0f); |
|
proj[1][1] *= -1.0f; // Flip y axis |
|
|
|
mat4x4_look_at(view, eye, center, up); |
|
mat4x4_mul(m_ubo.modelViewProj, proj, view); |
|
} |
|
|
|
void *data; |
|
vkMapMemory(m_device, m_uniformMemory[imageIndex], 0, getUniformSize(), 0, &data); |
|
memcpy(data, &m_ubo, sizeof(m_ubo)); |
|
vkUnmapMemory(m_device, m_uniformMemory[imageIndex]); |
|
} |
|
|
|
std::vector<const char *> getRequiredExtensions() const |
|
{ |
|
std::vector<const char *> extensions; |
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); |
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME); |
|
return extensions; |
|
} |
|
|
|
std::vector<const char *> getRequiredDeviceExtensions() const |
|
{ |
|
std::vector<const char *> extensions; |
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME); |
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME); |
|
extensions.push_back(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME); |
|
#ifdef _WIN64 |
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); |
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME); |
|
#else |
|
extensions.push_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME); |
|
extensions.push_back(VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME); |
|
#endif /* _WIN64 */ |
|
return extensions; |
|
} |
|
|
|
void drawFrame() |
|
{ |
|
static chrono_tp startTime = std::chrono::high_resolution_clock::now(); |
|
|
|
chrono_tp currentTime = std::chrono::high_resolution_clock::now(); |
|
float time = std::chrono::duration<float, std::chrono::seconds::period>(currentTime - startTime).count(); |
|
|
|
if (m_currentFrame == 0) { |
|
m_lastTime = startTime; |
|
} |
|
|
|
float frame_time = std::chrono::duration<float, std::chrono::seconds::period>(currentTime - m_lastTime).count(); |
|
|
|
// Have vulkan draw the current frame... |
|
VulkanBaseApp::drawFrame(); |
|
|
|
#ifdef _VK_TIMELINE_SEMAPHORE |
|
static uint64_t waitValue = 1; |
|
static uint64_t signalValue = 2; |
|
|
|
cudaExternalSemaphoreWaitParams waitParams = {}; |
|
waitParams.flags = 0; |
|
waitParams.params.fence.value = waitValue; |
|
|
|
cudaExternalSemaphoreSignalParams signalParams = {}; |
|
signalParams.flags = 0; |
|
signalParams.params.fence.value = signalValue; |
|
// Wait for vulkan to complete it's work |
|
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&m_cudaTimelineSemaphore, &waitParams, 1, m_stream)); |
|
// Now step the simulation |
|
m_sim.stepSimulation(time, m_stream); |
|
// Signal vulkan to continue with the updated buffers |
|
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&m_cudaTimelineSemaphore, &signalParams, 1, m_stream)); |
|
|
|
waitValue += 2; |
|
signalValue += 2; |
|
#else |
|
cudaExternalSemaphoreWaitParams waitParams = {}; |
|
waitParams.flags = 0; |
|
waitParams.params.fence.value = 0; |
|
|
|
cudaExternalSemaphoreSignalParams signalParams = {}; |
|
signalParams.flags = 0; |
|
signalParams.params.fence.value = 0; |
|
|
|
// Wait for vulkan to complete it's work |
|
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&m_cudaWaitSemaphore, &waitParams, 1, m_stream)); |
|
// Now step the simulation |
|
m_sim.stepSimulation(time, m_stream); |
|
// Signal vulkan to continue with the updated buffers |
|
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&m_cudaSignalSemaphore, &signalParams, 1, m_stream)); |
|
#endif /* _VK_TIMELINE_SEMAPHORE */ |
|
|
|
// Output a naive measurement of the frames per second every five seconds |
|
if (frame_time > 5) { |
|
std::cout << "Average FPS (over " << std::fixed << std::setprecision(2) << frame_time |
|
<< " seconds): " << std::fixed << std::setprecision(2) |
|
<< ((m_currentFrame - m_lastFrame) / frame_time) << std::endl; |
|
m_lastFrame = m_currentFrame; |
|
m_lastTime = currentTime; |
|
} |
|
} |
|
}; |
|
|
|
int main(int argc, char **argv) |
|
{ |
|
execution_path = argv[0]; |
|
VulkanCudaSineWave app((1ULL << 8ULL), (1ULL << 8ULL)); |
|
app.init(); |
|
app.mainLoop(); |
|
return 0; |
|
}
|
|
|