1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-20 08:52:22 +01:00

[dxvk] Initialize additional command buffers on demand

The vast majority of submissions will only use one or two command buffers
rather than all five, and only the main command buffer will almost always
actually be used. This also saves us a bunch of CPU-side tracking.
This commit is contained in:
Philip Rebohle 2024-10-28 13:43:24 +01:00 committed by Philip Rebohle
parent 34b82a2b5b
commit 7cd8a14673
2 changed files with 87 additions and 57 deletions

View File

@ -185,6 +185,11 @@ namespace dxvk {
DxvkTimelineSemaphoreValues& timelines) {
VkResult status = VK_SUCCESS;
static const std::array<DxvkCmdBuffer, 2> SdmaCmdBuffers =
{ DxvkCmdBuffer::SdmaBarriers, DxvkCmdBuffer::SdmaBuffer };
static const std::array<DxvkCmdBuffer, 2> InitCmdBuffers =
{ DxvkCmdBuffer::InitBarriers, DxvkCmdBuffer::InitBuffer };
const auto& graphics = m_device->queues().graphics;
const auto& transfer = m_device->queues().transfer;
const auto& sparse = m_device->queues().sparse;
@ -225,11 +230,10 @@ namespace dxvk {
}
// Execute transfer command buffer, if any
if (cmd.usedFlags.test(DxvkCmdBuffer::SdmaBarriers))
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::SdmaBarriers)]);
if (cmd.usedFlags.test(DxvkCmdBuffer::SdmaBuffer))
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::SdmaBuffer)]);
for (auto cmdBuffer : SdmaCmdBuffers) {
if (cmd.cmdBuffers[uint32_t(cmdBuffer)])
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(cmdBuffer)]);
}
// If we had either a transfer command or a semaphore wait, submit to the
// transfer queue so that all subsequent commands get stalled as necessary.
@ -251,14 +255,14 @@ namespace dxvk {
0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT);
}
// Submit graphics commands
if (cmd.usedFlags.test(DxvkCmdBuffer::InitBarriers))
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::InitBarriers)]);
// Submit initialization commands, if any
for (auto cmdBuffer : InitCmdBuffers) {
if (cmd.cmdBuffers[uint32_t(cmdBuffer)])
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(cmdBuffer)]);
}
if (cmd.usedFlags.test(DxvkCmdBuffer::InitBuffer))
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::InitBuffer)]);
if (cmd.usedFlags.test(DxvkCmdBuffer::ExecBuffer))
// Only submit the main command buffer if it has actually been used
if (cmd.execCommands)
m_commandSubmission.executeCommandBuffer(cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)]);
if (isLast) {
@ -309,22 +313,23 @@ namespace dxvk {
void DxvkCommandList::init() {
// Make sure the main command buffer is initialized since we can
// reasonably expect that to always get used. Saves some checks
// during command recording.
m_cmd = DxvkCommandSubmissionInfo();
// Grab a fresh set of command buffers from the pools
for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++)
m_cmd.cmdBuffers[i] = allocateCommandBuffer(DxvkCmdBuffer(i));
m_cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)] = allocateCommandBuffer(DxvkCmdBuffer::ExecBuffer);
}
void DxvkCommandList::finalize() {
if (m_cmdSubmissions.empty() || m_cmd.usedFlags != 0)
m_cmdSubmissions.push_back(m_cmd);
m_cmdSubmissions.push_back(m_cmd);
// For consistency, end all command buffers here,
// regardless of whether they have been used.
for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++)
endCommandBuffer(m_cmd.cmdBuffers[i]);
for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) {
if (m_cmd.cmdBuffers[i])
endCommandBuffer(m_cmd.cmdBuffers[i]);
}
// Reset all command buffer handles
m_cmd = DxvkCommandSubmissionInfo();
@ -336,19 +341,32 @@ namespace dxvk {
void DxvkCommandList::next() {
if (m_cmd.usedFlags != 0 || m_cmd.sparseBind)
m_cmdSubmissions.push_back(m_cmd);
bool push = m_cmd.sparseBind || m_cmd.execCommands;
// Only replace used command buffer to save resources
for (uint32_t i = 0; i < m_cmd.cmdBuffers.size(); i++) {
if (m_cmd.usedFlags.test(DxvkCmdBuffer(i))) {
DxvkCmdBuffer cmdBuffer = DxvkCmdBuffer(i);
if (cmdBuffer == DxvkCmdBuffer::ExecBuffer && !m_cmd.execCommands)
continue;
if (m_cmd.cmdBuffers[i]) {
endCommandBuffer(m_cmd.cmdBuffers[i]);
m_cmd.cmdBuffers[i] = allocateCommandBuffer(DxvkCmdBuffer(i));
m_cmd.cmdBuffers[i] = cmdBuffer == DxvkCmdBuffer::ExecBuffer
? allocateCommandBuffer(cmdBuffer)
: VK_NULL_HANDLE;
push = true;
}
}
if (!push)
return;
m_cmdSubmissions.push_back(m_cmd);
m_cmd.execCommands = VK_FALSE;
m_cmd.syncSdma = VK_FALSE;
m_cmd.usedFlags = 0;
m_cmd.sparseBind = VK_FALSE;
}

View File

@ -54,8 +54,6 @@ namespace dxvk {
Count
};
using DxvkCmdBufferFlags = Flags<DxvkCmdBuffer>;
/**
* \brief Queue command submission
*
@ -140,9 +138,10 @@ namespace dxvk {
* mask of command buffers that were actually used.
*/
struct DxvkCommandSubmissionInfo {
DxvkCmdBufferFlags usedFlags = 0;
VkBool32 syncSdma = VK_FALSE;
VkBool32 sparseBind = VK_FALSE;
bool execCommands = false;
bool syncSdma = false;
bool sparseBind = false;
bool reserved = false;
uint32_t sparseCmd = 0;
std::array<VkCommandBuffer, uint32_t(DxvkCmdBuffer::Count)> cmdBuffers = { };
@ -405,7 +404,7 @@ namespace dxvk {
VkQueryPool queryPool,
uint32_t query,
VkQueryControlFlags flags) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vkd->vkCmdBeginQuery(getCmdBuffer(), queryPool, query, flags);
}
@ -416,7 +415,7 @@ namespace dxvk {
uint32_t query,
VkQueryControlFlags flags,
uint32_t index) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vkd->vkCmdBeginQueryIndexedEXT(getCmdBuffer(),
queryPool, query, flags, index);
@ -425,7 +424,7 @@ namespace dxvk {
void cmdBeginRendering(
const VkRenderingInfo* pRenderingInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vkd->vkCmdBeginRendering(getCmdBuffer(), pRenderingInfo);
}
@ -521,7 +520,7 @@ namespace dxvk {
}
void cmdLaunchCuKernel(VkCuLaunchInfoNVX launchInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vkd->vkCmdCuLaunchKernelNVX(getCmdBuffer(), &launchInfo);
}
@ -529,7 +528,7 @@ namespace dxvk {
void cmdBlitImage(
const VkBlitImageInfo2* pBlitInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vkd->vkCmdBlitImage2(getCmdBuffer(), pBlitInfo);
}
@ -552,7 +551,7 @@ namespace dxvk {
const VkClearColorValue* pColor,
uint32_t rangeCount,
const VkImageSubresourceRange* pRanges) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdClearColorImage(getCmdBuffer(cmdBuffer),
image, imageLayout, pColor,
@ -567,7 +566,7 @@ namespace dxvk {
const VkClearDepthStencilValue* pDepthStencil,
uint32_t rangeCount,
const VkImageSubresourceRange* pRanges) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdClearDepthStencilImage(getCmdBuffer(cmdBuffer),
image, imageLayout, pDepthStencil,
@ -578,7 +577,7 @@ namespace dxvk {
void cmdCopyBuffer(
DxvkCmdBuffer cmdBuffer,
const VkCopyBufferInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyBuffer2(getCmdBuffer(cmdBuffer), copyInfo);
}
@ -587,7 +586,7 @@ namespace dxvk {
void cmdCopyBufferToImage(
DxvkCmdBuffer cmdBuffer,
const VkCopyBufferToImageInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyBufferToImage2(getCmdBuffer(cmdBuffer), copyInfo);
}
@ -596,7 +595,7 @@ namespace dxvk {
void cmdCopyImage(
DxvkCmdBuffer cmdBuffer,
const VkCopyImageInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyImage2(getCmdBuffer(cmdBuffer), copyInfo);
}
@ -605,7 +604,7 @@ namespace dxvk {
void cmdCopyImageToBuffer(
DxvkCmdBuffer cmdBuffer,
const VkCopyImageToBufferInfo2* copyInfo) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyImageToBuffer2(getCmdBuffer(cmdBuffer), copyInfo);
}
@ -620,7 +619,7 @@ namespace dxvk {
VkDeviceSize dstOffset,
VkDeviceSize stride,
VkQueryResultFlags flags) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdCopyQueryPoolResults(getCmdBuffer(cmdBuffer),
queryPool, firstQuery, queryCount,
@ -633,7 +632,7 @@ namespace dxvk {
uint32_t x,
uint32_t y,
uint32_t z) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdDispatch(getCmdBuffer(cmdBuffer), x, y, z);
}
@ -643,7 +642,7 @@ namespace dxvk {
DxvkCmdBuffer cmdBuffer,
VkBuffer buffer,
VkDeviceSize offset) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdDispatchIndirect(getCmdBuffer(cmdBuffer), buffer, offset);
}
@ -767,7 +766,7 @@ namespace dxvk {
VkDeviceSize dstOffset,
VkDeviceSize size,
uint32_t data) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdFillBuffer(getCmdBuffer(cmdBuffer),
dstBuffer, dstOffset, size, data);
@ -777,7 +776,7 @@ namespace dxvk {
void cmdPipelineBarrier(
DxvkCmdBuffer cmdBuffer,
const VkDependencyInfo* dependencyInfo) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdPipelineBarrier2(getCmdBuffer(cmdBuffer), dependencyInfo);
}
@ -800,7 +799,7 @@ namespace dxvk {
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdResetQueryPool(getCmdBuffer(cmdBuffer),
queryPool, firstQuery, queryCount);
@ -809,7 +808,7 @@ namespace dxvk {
void cmdResolveImage(
const VkResolveImageInfo2* resolveInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vkd->vkCmdResolveImage2(getCmdBuffer(), resolveInfo);
}
@ -821,7 +820,7 @@ namespace dxvk {
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void* pData) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdUpdateBuffer(getCmdBuffer(cmdBuffer),
dstBuffer, dstOffset, dataSize, pData);
@ -903,7 +902,7 @@ namespace dxvk {
void cmdSetEvent(
VkEvent event,
const VkDependencyInfo* dependencyInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vkd->vkCmdSetEvent2(getCmdBuffer(), event, dependencyInfo);
}
@ -1001,7 +1000,7 @@ namespace dxvk {
VkPipelineStageFlagBits2 pipelineStage,
VkQueryPool queryPool,
uint32_t query) {
m_cmd.usedFlags.set(cmdBuffer);
m_cmd.execCommands |= cmdBuffer == DxvkCmdBuffer::ExecBuffer;
m_vkd->vkCmdWriteTimestamp2(getCmdBuffer(cmdBuffer),
pipelineStage, queryPool, query);
@ -1010,14 +1009,14 @@ namespace dxvk {
void cmdBeginDebugUtilsLabel(
VkDebugUtilsLabelEXT* pLabelInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vki->vkCmdBeginDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo);
}
void cmdEndDebugUtilsLabel() {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vki->vkCmdEndDebugUtilsLabelEXT(getCmdBuffer());
}
@ -1025,7 +1024,7 @@ namespace dxvk {
void cmdInsertDebugUtilsLabel(
VkDebugUtilsLabelEXT* pLabelInfo) {
m_cmd.usedFlags.set(DxvkCmdBuffer::ExecBuffer);
m_cmd.execCommands = true;
m_vki->vkCmdInsertDebugUtilsLabelEXT(getCmdBuffer(), pLabelInfo);
}
@ -1104,8 +1103,21 @@ namespace dxvk {
std::vector<DxvkGraphicsPipeline*> m_pipelines;
force_inline VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer = DxvkCmdBuffer::ExecBuffer) const {
return m_cmd.cmdBuffers[uint32_t(cmdBuffer)];
force_inline VkCommandBuffer getCmdBuffer() const {
// Allocation logic will always provide an execution buffer
return m_cmd.cmdBuffers[uint32_t(DxvkCmdBuffer::ExecBuffer)];
}
force_inline VkCommandBuffer getCmdBuffer(DxvkCmdBuffer cmdBuffer) {
VkCommandBuffer buffer = m_cmd.cmdBuffers[uint32_t(cmdBuffer)];
if (likely(cmdBuffer == DxvkCmdBuffer::ExecBuffer || buffer))
return buffer;
// Allocate a new command buffer if necessary
buffer = allocateCommandBuffer(cmdBuffer);
m_cmd.cmdBuffers[uint32_t(cmdBuffer)] = buffer;
return buffer;
}
DxvkSparseBindSubmission& getSparseBindSubmission() {