1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-02 11:52:10 +01:00

[dxvk] Initialize additional command buffers on demand

The vast majority of submissions will only use one or two command buffers
rather than all five, and only the main command buffer will almost always
actually be used. This also saves us a bunch of CPU-side tracking.
This commit is contained in:
Philip Rebohle 2024-10-28 13:43:24 +01:00
parent 3aa5693b97
commit 3ee825c2ef
2 changed files with 87 additions and 57 deletions

View File

@ -185,6 +185,11 @@ namespace dxvk {
DxvkTimelineSemaphoreValues& timelines) { DxvkTimelineSemaphoreValues& timelines) {
VkResult status = VK_SUCCESS; VkResult status = VK_SUCCESS;
static const std::array<DxvkCmdBuffer, 2> SdmaCmdBuffers =
{ DxvkCmdBuffer::SdmaBarriers, DxvkCmdBuffer::SdmaBuffer };
static const std::array<DxvkCmdBuffer, 2> InitCmdBuffers =
{ DxvkCmdBuffer::InitBarriers, DxvkCmdBuffer::InitBuffer };
const auto& graphics = m_device->queues().graphics; const auto& graphics = m_device->queues().graphics;
const auto& transfer = m_device->queues().transfer; const auto& transfer = m_device->queues().transfer;
const auto& sparse = m_device->queues().sparse; const auto& sparse = m_device->queues().sparse;
@ -225,11 +230,10 @@ namespace dxvk {
} }
// Execute transfer command buffer, if any // Execute transfer command buffer, if any
if (cmd.usedFlags.test(DxvkCmdBuffer::SdmaBarriers)) for (auto cmdBuffer : SdmaCmdBuffers) {
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::SdmaBarriers)]); if (cmd.cmdBuffers[uint32_t(cmdBuffer)])
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(cmdBuffer)]);
if (cmd.usedFlags.test(DxvkCmdBuffer::SdmaBuffer)) }
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::SdmaBuffer)]);
// If we had either a transfer command or a semaphore wait, submit to the // If we had either a transfer command or a semaphore wait, submit to the
// transfer queue so that all subsequent commands get stalled as necessary. // transfer queue so that all subsequent commands get stalled as necessary.
@ -251,14 +255,14 @@ namespace dxvk {
0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT);
} }
// Submit graphics commands // Submit initialization commands, if any
if (cmd.usedFlags.test(DxvkCmdBuffer::InitBarriers)) for (auto cmdBuffer : InitCmdBuffers) {
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::InitBarriers)]); if (cmd.cmdBuffers[uint32_t(cmdBuffer)])
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(cmdBuffer)]);
}
if (cmd.usedFlags.test(DxvkCmdBuffer::InitBuffer)) // Only submit the main command buffer if it has actually been used
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::InitBuffer)]); if (cmd.execCommands)
if (cmd.usedFlags.test(DxvkCmdBuffer::ExecBuffer))
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)]); m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)]);
if (isLast) { if (isLast) {
@ -309,22 +313,23 @@ namespace dxvk {
void DxvkCommandList::init() { void DxvkCommandList::init() {
// Make sure the main command buffer is initialized since we can
// reasonably expect that to always get used. Saves some checks
// during command recording.
m_cmd = DxvkCommandSubmissionInfo(); m_cmd = DxvkCommandSubmissionInfo();
m_cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)] = allocateCommandBuffer(DxvkCmdBuffer::ExecBuffer);
// Grab a fresh set of command buffers from the pools
for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++)
m_cmd.cmdBuffers[i] = allocateCommandBuffer(DxvkCmdBuffer(i));
} }
void DxvkCommandList::finalize() { void DxvkCommandList::finalize() {
if (m_cmdSubmissions.empty() || m_cmd.usedFlags != 0) m_cmdSubmissions.push_back(m_cmd);
m_cmdSubmissions.push_back(m_cmd);
// For consistency, end all command buffers here, // For consistency, end all command buffers here,
// regardless of whether they have been used. // regardless of whether they have been used.
for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) {
endCommandBuffer(m_cmd.cmdBuffers[i]); if (m_cmd.cmdBuffers[i])
endCommandBuffer(m_cmd.cmdBuffers[i]);
}
// Reset all command buffer handles // Reset all command buffer handles
m_cmd = DxvkCommandSubmissionInfo(); m_cmd = DxvkCommandSubmissionInfo();
@ -336,19 +341,32 @@ namespace dxvk {
void DxvkCommandList::next() { void DxvkCommandList::next() {
if (m_cmd.usedFlags != 0 || m_cmd.sparseBind) bool push = m_cmd.sparseBind || m_cmd.execCommands;
m_cmdSubmissions.push_back(m_cmd);
// Only replace used command buffer to save resources
for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) { for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) {
if (m_cmd.usedFlags.test(DxvkCmdBuffer(i))) { DxvkCmdBuffer cmdBuffer = DxvkCmdBuffer(i);
if (cmdBuffer == DxvkCmdBuffer::ExecBuffer && !m_cmd.execCommands)
continue;
if (m_cmd.cmdBuffers[i]) {
endCommandBuffer(m_cmd.cmdBuffers[i]); endCommandBuffer(m_cmd.cmdBuffers[i]);
m_cmd.cmdBuffers[i] = allocateCommandBuffer(DxvkCmdBuffer(i));
m_cmd.cmdBuffers[i] = cmdBuffer == DxvkCmdBuffer::ExecBuffer
? allocateCommandBuffer(cmdBuffer)
: VK_NULL_HANDLE;
push = true;
} }
} }
if (!push)
return;
m_cmdSubmissions.push_back(m_cmd);
m_cmd.execCommands = VK_FALSE;
m_cmd.syncSdma = VK_FALSE; m_cmd.syncSdma = VK_FALSE;
m_cmd.usedFlags = 0;
m_cmd.sparseBind = VK_FALSE; m_cmd.sparseBind = VK_FALSE;
} }

View File

@ -54,8 +54,6 @@ namespace dxvk {
Count Count
}; };
using DxvkCmdBufferFlags = Flags<DxvkCmdBuffer>;
/** /**
* \brief Queue command submission * \brief Queue command submission
* *
@ -140,9 +138,10 @@ namespace dxvk {
* mask of command buffers that were actually used. * mask of command buffers that were actually used.
*/ */
struct DxvkCommandSubmissionInfo { struct DxvkCommandSubmissionInfo {
DxvkCmdBufferFlags usedFlags = 0; bool execCommands = false;
VkBool32 syncSdma = VK_FALSE; bool syncSdma = false;
VkBool32 sparseBind = VK_FALSE; bool sparseBind = false;
bool reserved = false;
uint32_t sparseCmd = 0; uint32_t sparseCmd = 0;
std::array<VkCommandBuffer, uint32_t(DxvkCmdBuffer::Count)> cmdBuffers = { }; std::array<VkCommandBuffer, uint32_t(DxvkCmdBuffer::Count)> cmdBuffers = { };
@ -405,7 +404,7 @@ namespace dxvk {
VkQueryPool queryPool, VkQueryPool queryPool,
uint32_t query, uint32_t query,
VkQueryControlFlags flags) { VkQueryControlFlags flags) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vkd->vkCmdBeginQuery(getCmdBuffer(), queryPool, query, flags); m_vkd->vkCmdBeginQuery(getCmdBuffer(), queryPool, query, flags);
} }
@ -416,7 +415,7 @@ namespace dxvk {
uint32_t query, uint32_t query,
VkQueryControlFlags flags, VkQueryControlFlags flags,
uint32_t index) { uint32_t index) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vkd->vkCmdBeginQueryIndexedEXT(getCmdBuffer(), m_vkd->vkCmdBeginQueryIndexedEXT(getCmdBuffer(),
queryPool, query, flags, index); queryPool, query, flags, index);
@ -425,7 +424,7 @@ namespace dxvk {
void cmdBeginRendering( void cmdBeginRendering(
const VkRenderingInfo* pRenderingInfo) { const VkRenderingInfo* pRenderingInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vkd->vkCmdBeginRendering(getCmdBuffer(), pRenderingInfo); m_vkd->vkCmdBeginRendering(getCmdBuffer(), pRenderingInfo);
} }
@ -521,7 +520,7 @@ namespace dxvk {
} }
void cmdLaunchCuKernel(VkCuLaunchInfoNVX launchInfo) { void cmdLaunchCuKernel(VkCuLaunchInfoNVX launchInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vkd->vkCmdCuLaunchKernelNVX(getCmdBuffer(), &launchInfo); m_vkd->vkCmdCuLaunchKernelNVX(getCmdBuffer(), &launchInfo);
} }
@ -529,7 +528,7 @@ namespace dxvk {
void cmdBlitImage( void cmdBlitImage(
const VkBlitImageInfo2* pBlitInfo) { const VkBlitImageInfo2* pBlitInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vkd->vkCmdBlitImage2(getCmdBuffer(), pBlitInfo); m_vkd->vkCmdBlitImage2(getCmdBuffer(), pBlitInfo);
} }
@ -552,7 +551,7 @@ namespace dxvk {
const VkClearColorValue* pColor, const VkClearColorValue* pColor,
uint32_t rangeCount, uint32_t rangeCount,
const VkImageSubresourceRange* pRanges) { const VkImageSubresourceRange* pRanges) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdClearColorImage(getCmdBuffer(cmdBuffer), m_vkd->vkCmdClearColorImage(getCmdBuffer(cmdBuffer),
image, imageLayout, pColor, image, imageLayout, pColor,
@ -567,7 +566,7 @@ namespace dxvk {
const VkClearDepthStencilValue* pDepthStencil, const VkClearDepthStencilValue* pDepthStencil,
uint32_t rangeCount, uint32_t rangeCount,
const VkImageSubresourceRange* pRanges) { const VkImageSubresourceRange* pRanges) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdClearDepthStencilImage(getCmdBuffer(cmdBuffer), m_vkd->vkCmdClearDepthStencilImage(getCmdBuffer(cmdBuffer),
image, imageLayout, pDepthStencil, image, imageLayout, pDepthStencil,
@ -578,7 +577,7 @@ namespace dxvk {
void cmdCopyBuffer( void cmdCopyBuffer(
DxvkCmdBuffer cmdBuffer, DxvkCmdBuffer cmdBuffer,
const VkCopyBufferInfo2* copyInfo) { const VkCopyBufferInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyBuffer2(getCmdBuffer(cmdBuffer), copyInfo); m_vkd->vkCmdCopyBuffer2(getCmdBuffer(cmdBuffer), copyInfo);
} }
@ -587,7 +586,7 @@ namespace dxvk {
void cmdCopyBufferToImage( void cmdCopyBufferToImage(
DxvkCmdBuffer cmdBuffer, DxvkCmdBuffer cmdBuffer,
const VkCopyBufferToImageInfo2* copyInfo) { const VkCopyBufferToImageInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyBufferToImage2(getCmdBuffer(cmdBuffer), copyInfo); m_vkd->vkCmdCopyBufferToImage2(getCmdBuffer(cmdBuffer), copyInfo);
} }
@ -596,7 +595,7 @@ namespace dxvk {
void cmdCopyImage( void cmdCopyImage(
DxvkCmdBuffer cmdBuffer, DxvkCmdBuffer cmdBuffer,
const VkCopyImageInfo2* copyInfo) { const VkCopyImageInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyImage2(getCmdBuffer(cmdBuffer), copyInfo); m_vkd->vkCmdCopyImage2(getCmdBuffer(cmdBuffer), copyInfo);
} }
@ -605,7 +604,7 @@ namespace dxvk {
void cmdCopyImageToBuffer( void cmdCopyImageToBuffer(
DxvkCmdBuffer cmdBuffer, DxvkCmdBuffer cmdBuffer,
const VkCopyImageToBufferInfo2* copyInfo) { const VkCopyImageToBufferInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyImageToBuffer2(getCmdBuffer(cmdBuffer), copyInfo); m_vkd->vkCmdCopyImageToBuffer2(getCmdBuffer(cmdBuffer), copyInfo);
} }
@ -620,7 +619,7 @@ namespace dxvk {
VkDeviceSize dstOffset, VkDeviceSize dstOffset,
VkDeviceSize stride, VkDeviceSize stride,
VkQueryResultFlags flags) { VkQueryResultFlags flags) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyQueryPoolResults(getCmdBuffer(cmdBuffer), m_vkd->vkCmdCopyQueryPoolResults(getCmdBuffer(cmdBuffer),
queryPool, firstQuery, queryCount, queryPool, firstQuery, queryCount,
@ -633,7 +632,7 @@ namespace dxvk {
uint32_t x, uint32_t x,
uint32_t y, uint32_t y,
uint32_t z) { uint32_t z) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdDispatch(getCmdBuffer(cmdBuffer), x, y, z); m_vkd->vkCmdDispatch(getCmdBuffer(cmdBuffer), x, y, z);
} }
@ -643,7 +642,7 @@ namespace dxvk {
DxvkCmdBuffer cmdBuffer, DxvkCmdBuffer cmdBuffer,
VkBuffer buffer, VkBuffer buffer,
VkDeviceSize offset) { VkDeviceSize offset) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdDispatchIndirect(getCmdBuffer(cmdBuffer), buffer, offset); m_vkd->vkCmdDispatchIndirect(getCmdBuffer(cmdBuffer), buffer, offset);
} }
@ -767,7 +766,7 @@ namespace dxvk {
VkDeviceSize dstOffset, VkDeviceSize dstOffset,
VkDeviceSize size, VkDeviceSize size,
uint32_t data) { uint32_t data) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdFillBuffer(getCmdBuffer(cmdBuffer), m_vkd->vkCmdFillBuffer(getCmdBuffer(cmdBuffer),
dstBuffer, dstOffset, size, data); dstBuffer, dstOffset, size, data);
@ -777,7 +776,7 @@ namespace dxvk {
void cmdPipelineBarrier( void cmdPipelineBarrier(
DxvkCmdBuffer cmdBuffer, DxvkCmdBuffer cmdBuffer,
const VkDependencyInfo* dependencyInfo) { const VkDependencyInfo* dependencyInfo) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdPipelineBarrier2(getCmdBuffer(cmdBuffer), dependencyInfo); m_vkd->vkCmdPipelineBarrier2(getCmdBuffer(cmdBuffer), dependencyInfo);
} }
@ -800,7 +799,7 @@ namespace dxvk {
VkQueryPool queryPool, VkQueryPool queryPool,
uint32_t firstQuery, uint32_t firstQuery,
uint32_t queryCount) { uint32_t queryCount) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdResetQueryPool(getCmdBuffer(cmdBuffer), m_vkd->vkCmdResetQueryPool(getCmdBuffer(cmdBuffer),
queryPool, firstQuery, queryCount); queryPool, firstQuery, queryCount);
@ -809,7 +808,7 @@ namespace dxvk {
void cmdResolveImage( void cmdResolveImage(
const VkResolveImageInfo2* resolveInfo) { const VkResolveImageInfo2* resolveInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vkd->vkCmdResolveImage2(getCmdBuffer(), resolveInfo); m_vkd->vkCmdResolveImage2(getCmdBuffer(), resolveInfo);
} }
@ -821,7 +820,7 @@ namespace dxvk {
VkDeviceSize dstOffset, VkDeviceSize dstOffset,
VkDeviceSize dataSize, VkDeviceSize dataSize,
const void* pData) { const void* pData) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdUpdateBuffer(getCmdBuffer(cmdBuffer), m_vkd->vkCmdUpdateBuffer(getCmdBuffer(cmdBuffer),
dstBuffer, dstOffset, dataSize, pData); dstBuffer, dstOffset, dataSize, pData);
@ -903,7 +902,7 @@ namespace dxvk {
void cmdSetEvent( void cmdSetEvent(
VkEvent event, VkEvent event,
const VkDependencyInfo* dependencyInfo) { const VkDependencyInfo* dependencyInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vkd->vkCmdSetEvent2(getCmdBuffer(), event, dependencyInfo); m_vkd->vkCmdSetEvent2(getCmdBuffer(), event, dependencyInfo);
} }
@ -1001,7 +1000,7 @@ namespace dxvk {
VkPipelineStageFlagBits2 pipelineStage, VkPipelineStageFlagBits2 pipelineStage,
VkQueryPool queryPool, VkQueryPool queryPool,
uint32_t query) { uint32_t query) {
m_cmd.usedFlags.set(cmdBuffer); m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdWriteTimestamp2(getCmdBuffer(cmdBuffer), m_vkd->vkCmdWriteTimestamp2(getCmdBuffer(cmdBuffer),
pipelineStage, queryPool, query); pipelineStage, queryPool, query);
@ -1010,14 +1009,14 @@ namespace dxvk {
void cmdBeginDebugUtilsLabel( void cmdBeginDebugUtilsLabel(
VkDebugUtilsLabelEXT* pLabelInfo) { VkDebugUtilsLabelEXT* pLabelInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vki->vkCmdBeginDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo); m_vki->vkCmdBeginDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo);
} }
void cmdEndDebugUtilsLabel() { void cmdEndDebugUtilsLabel() {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vki->vkCmdEndDebugUtilsLabelEXT(getCmdBuffer()); m_vki->vkCmdEndDebugUtilsLabelEXT(getCmdBuffer());
} }
@ -1025,7 +1024,7 @@ namespace dxvk {
void cmdInsertDebugUtilsLabel( void cmdInsertDebugUtilsLabel(
VkDebugUtilsLabelEXT* pLabelInfo) { VkDebugUtilsLabelEXT* pLabelInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer); m_cmd.execCommands = true;
m_vki->vkCmdInsertDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo); m_vki->vkCmdInsertDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo);
} }
@ -1104,8 +1103,21 @@ namespace dxvk {
std::vector<DxvkGraphicsPipeline*> m_pipelines; std::vector<DxvkGraphicsPipeline*> m_pipelines;
force_inline VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer = DxvkCmdBuffer::ExecBuffer) const { force_inline VkCommandBuffer getCmdBuffer() const {
return m_cmd.cmdBuffers[uint32_t(cmdBuffer)]; // Allocation logic will always provide an execution buffer
return m_cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)];
}
force_inline VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer) {
VkCommandBuffer buffer = m_cmd.cmdBuffers[uint32_t(cmdBuffer)];
if (likely(cmdBuffer == DxvkCmdBuffer::ExecBuffer || buffer))
return buffer;
// Allocate a new command buffer if necessary
buffer = allocateCommandBuffer(cmdBuffer);
m_cmd.cmdBuffers[uint32_t(cmdBuffer)] = buffer;
return buffer;
} }
DxvkSparseBindSubmission& getSparseBindSubmission() { DxvkSparseBindSubmission& getSparseBindSubmission() {