From 3ee825c2ef8cfb052d551352e2a650b3fb4f26fd Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Mon, 28 Oct 2024 13:43:24 +0100 Subject: [PATCH] [dxvk] Initialize additional command buffers on demand The vast majority of submissions will only use one or two command buffers rather than all five, and only the main command buffer will almost always actually be used. This also saves us a bunch of CPU-side tracking. --- src/dxvk/dxvk_cmdlist.cpp | 70 ++++++++++++++++++++++-------------- src/dxvk/dxvk_cmdlist.h | 74 +++++++++++++++++++++++---------------- 2 files changed, 87 insertions(+), 57 deletions(-) diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp index 0b6ff1fe7..c1edfef0c 100644 --- a/src/dxvk/dxvk_cmdlist.cpp +++ b/src/dxvk/dxvk_cmdlist.cpp @@ -185,6 +185,11 @@ namespace dxvk { DxvkTimelineSemaphoreValues& timelines) { VkResult status = VK_SUCCESS; + static const std::array SdmaCmdBuffers = + { DxvkCmdBuffer::SdmaBarriers, DxvkCmdBuffer::SdmaBuffer }; + static const std::array InitCmdBuffers = + { DxvkCmdBuffer::InitBarriers, DxvkCmdBuffer::InitBuffer }; + const auto& graphics = m_device->queues().graphics; const auto& transfer = m_device->queues().transfer; const auto& sparse = m_device->queues().sparse; @@ -225,11 +230,10 @@ namespace dxvk { } // Execute transfer command buffer, if any - if (cmd.usedFlags.test(DxvkCmdBuffer::SdmaBarriers)) - m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::SdmaBarriers)]); - - if (cmd.usedFlags.test(DxvkCmdBuffer::SdmaBuffer)) - m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::SdmaBuffer)]); + for (auto cmdBuffer : SdmaCmdBuffers) { + if (cmd.cmdBuffers[uint32_t(cmdBuffer)]) + m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(cmdBuffer)]); + } // If we had either a transfer command or a semaphore wait, submit to the // transfer queue so that all subsequent commands get stalled as necessary. @@ -251,14 +255,14 @@ namespace dxvk { 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); } - // Submit graphics commands - if (cmd.usedFlags.test(DxvkCmdBuffer::InitBarriers)) - m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::InitBarriers)]); + // Submit initialization commands, if any + for (auto cmdBuffer : InitCmdBuffers) { + if (cmd.cmdBuffers[uint32_t(cmdBuffer)]) + m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(cmdBuffer)]); + } - if (cmd.usedFlags.test(DxvkCmdBuffer::InitBuffer)) - m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::InitBuffer)]); - - if (cmd.usedFlags.test(DxvkCmdBuffer::ExecBuffer)) + // Only submit the main command buffer if it has actually been used + if (cmd.execCommands) m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)]); if (isLast) { @@ -309,22 +313,23 @@ namespace dxvk { void DxvkCommandList::init() { + // Make sure the main command buffer is initialized since we can + // reasonably expect that to always get used. Saves some checks + // during command recording. m_cmd = DxvkCommandSubmissionInfo(); - - // Grab a fresh set of command buffers from the pools - for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) - m_cmd.cmdBuffers[i] = allocateCommandBuffer(DxvkCmdBuffer(i)); + m_cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)] = allocateCommandBuffer(DxvkCmdBuffer::ExecBuffer); } void DxvkCommandList::finalize() { - if (m_cmdSubmissions.empty() || m_cmd.usedFlags != 0) - m_cmdSubmissions.push_back(m_cmd); + m_cmdSubmissions.push_back(m_cmd); // For consistency, end all command buffers here, // regardless of whether they have been used. - for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) - endCommandBuffer(m_cmd.cmdBuffers[i]); + for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) { + if (m_cmd.cmdBuffers[i]) + endCommandBuffer(m_cmd.cmdBuffers[i]); + } // Reset all command buffer handles m_cmd = DxvkCommandSubmissionInfo(); @@ -336,19 +341,32 @@ namespace dxvk { void DxvkCommandList::next() { - if (m_cmd.usedFlags != 0 || m_cmd.sparseBind) - m_cmdSubmissions.push_back(m_cmd); + bool push = m_cmd.sparseBind || m_cmd.execCommands; - // Only replace used command buffer to save resources for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) { - if (m_cmd.usedFlags.test(DxvkCmdBuffer(i))) { + DxvkCmdBuffer cmdBuffer = DxvkCmdBuffer(i); + + if (cmdBuffer == DxvkCmdBuffer::ExecBuffer && !m_cmd.execCommands) + continue; + + if (m_cmd.cmdBuffers[i]) { endCommandBuffer(m_cmd.cmdBuffers[i]); - m_cmd.cmdBuffers[i] = allocateCommandBuffer(DxvkCmdBuffer(i)); + + m_cmd.cmdBuffers[i] = cmdBuffer == DxvkCmdBuffer::ExecBuffer + ? allocateCommandBuffer(cmdBuffer) + : VK_NULL_HANDLE; + + push = true; } } + if (!push) + return; + + m_cmdSubmissions.push_back(m_cmd); + + m_cmd.execCommands = VK_FALSE; m_cmd.syncSdma = VK_FALSE; - m_cmd.usedFlags = 0; m_cmd.sparseBind = VK_FALSE; } diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index b4975bb2d..e7ccb2b24 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -54,8 +54,6 @@ namespace dxvk { Count }; - using DxvkCmdBufferFlags = Flags; - /** * \brief Queue command submission * @@ -140,9 +138,10 @@ namespace dxvk { * mask of command buffers that were actually used. */ struct DxvkCommandSubmissionInfo { - DxvkCmdBufferFlags usedFlags = 0; - VkBool32 syncSdma = VK_FALSE; - VkBool32 sparseBind = VK_FALSE; + bool execCommands = false; + bool syncSdma = false; + bool sparseBind = false; + bool reserved = false; uint32_t sparseCmd = 0; std::array cmdBuffers = { }; @@ -405,7 +404,7 @@ namespace dxvk { VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vkd->vkCmdBeginQuery(getCmdBuffer(), queryPool, query, flags); } @@ -416,7 +415,7 @@ namespace dxvk { uint32_t query, VkQueryControlFlags flags, uint32_t index) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vkd->vkCmdBeginQueryIndexedEXT(getCmdBuffer(), queryPool, query, flags, index); @@ -425,7 +424,7 @@ namespace dxvk { void cmdBeginRendering( const VkRenderingInfo* pRenderingInfo) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vkd->vkCmdBeginRendering(getCmdBuffer(), pRenderingInfo); } @@ -521,7 +520,7 @@ namespace dxvk { } void cmdLaunchCuKernel(VkCuLaunchInfoNVX launchInfo) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vkd->vkCmdCuLaunchKernelNVX(getCmdBuffer(), &launchInfo); } @@ -529,7 +528,7 @@ namespace dxvk { void cmdBlitImage( const VkBlitImageInfo2* pBlitInfo) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vkd->vkCmdBlitImage2(getCmdBuffer(), pBlitInfo); } @@ -552,7 +551,7 @@ namespace dxvk { const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdClearColorImage(getCmdBuffer(cmdBuffer), image, imageLayout, pColor, @@ -567,7 +566,7 @@ namespace dxvk { const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdClearDepthStencilImage(getCmdBuffer(cmdBuffer), image, imageLayout, pDepthStencil, @@ -578,7 +577,7 @@ namespace dxvk { void cmdCopyBuffer( DxvkCmdBuffer cmdBuffer, const VkCopyBufferInfo2* copyInfo) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdCopyBuffer2(getCmdBuffer(cmdBuffer), copyInfo); } @@ -587,7 +586,7 @@ namespace dxvk { void cmdCopyBufferToImage( DxvkCmdBuffer cmdBuffer, const VkCopyBufferToImageInfo2* copyInfo) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdCopyBufferToImage2(getCmdBuffer(cmdBuffer), copyInfo); } @@ -596,7 +595,7 @@ namespace dxvk { void cmdCopyImage( DxvkCmdBuffer cmdBuffer, const VkCopyImageInfo2* copyInfo) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdCopyImage2(getCmdBuffer(cmdBuffer), copyInfo); } @@ -605,7 +604,7 @@ namespace dxvk { void cmdCopyImageToBuffer( DxvkCmdBuffer cmdBuffer, const VkCopyImageToBufferInfo2* copyInfo) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdCopyImageToBuffer2(getCmdBuffer(cmdBuffer), copyInfo); } @@ -620,7 +619,7 @@ namespace dxvk { VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdCopyQueryPoolResults(getCmdBuffer(cmdBuffer), queryPool, firstQuery, queryCount, @@ -633,7 +632,7 @@ namespace dxvk { uint32_t x, uint32_t y, uint32_t z) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdDispatch(getCmdBuffer(cmdBuffer), x, y, z); } @@ -643,7 +642,7 @@ namespace dxvk { DxvkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdDispatchIndirect(getCmdBuffer(cmdBuffer), buffer, offset); } @@ -767,7 +766,7 @@ namespace dxvk { VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdFillBuffer(getCmdBuffer(cmdBuffer), dstBuffer, dstOffset, size, data); @@ -777,7 +776,7 @@ namespace dxvk { void cmdPipelineBarrier( DxvkCmdBuffer cmdBuffer, const VkDependencyInfo* dependencyInfo) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdPipelineBarrier2(getCmdBuffer(cmdBuffer), dependencyInfo); } @@ -800,7 +799,7 @@ namespace dxvk { VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdResetQueryPool(getCmdBuffer(cmdBuffer), queryPool, firstQuery, queryCount); @@ -809,7 +808,7 @@ namespace dxvk { void cmdResolveImage( const VkResolveImageInfo2* resolveInfo) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vkd->vkCmdResolveImage2(getCmdBuffer(), resolveInfo); } @@ -821,7 +820,7 @@ namespace dxvk { VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdUpdateBuffer(getCmdBuffer(cmdBuffer), dstBuffer, dstOffset, dataSize, pData); @@ -903,7 +902,7 @@ namespace dxvk { void cmdSetEvent( VkEvent event, const VkDependencyInfo* dependencyInfo) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vkd->vkCmdSetEvent2(getCmdBuffer(), event, dependencyInfo); } @@ -1001,7 +1000,7 @@ namespace dxvk { VkPipelineStageFlagBits2 pipelineStage, VkQueryPool queryPool, uint32_t query) { - m_cmd.usedFlags.set(cmdBuffer); + m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer; m_vkd->vkCmdWriteTimestamp2(getCmdBuffer(cmdBuffer), pipelineStage, queryPool, query); @@ -1010,14 +1009,14 @@ namespace dxvk { void cmdBeginDebugUtilsLabel( VkDebugUtilsLabelEXT* pLabelInfo) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vki->vkCmdBeginDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo); } void cmdEndDebugUtilsLabel() { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vki->vkCmdEndDebugUtilsLabelEXT(getCmdBuffer()); } @@ -1025,7 +1024,7 @@ namespace dxvk { void cmdInsertDebugUtilsLabel( VkDebugUtilsLabelEXT* pLabelInfo) { - m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); + m_cmd.execCommands = true; m_vki->vkCmdInsertDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo); } @@ -1104,8 +1103,21 @@ namespace dxvk { std::vector m_pipelines; - force_inline VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer = DxvkCmdBuffer::ExecBuffer) const { - return m_cmd.cmdBuffers[uint32_t(cmdBuffer)]; + force_inline VkCommandBuffer getCmdBuffer() const { + // Allocation logic will always provide an execution buffer + return m_cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)]; + } + + force_inline VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer) { + VkCommandBuffer buffer = m_cmd.cmdBuffers[uint32_t(cmdBuffer)]; + + if (likely(cmdBuffer == DxvkCmdBuffer::ExecBuffer || buffer)) + return buffer; + + // Allocate a new command buffer if necessary + buffer = allocateCommandBuffer(cmdBuffer); + m_cmd.cmdBuffers[uint32_t(cmdBuffer)] = buffer; + return buffer; } DxvkSparseBindSubmission& getSparseBindSubmission() {