From 0c5773dbb9ed3358a708c930a9d20a31d4ec2404 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Mon, 4 Jul 2022 14:59:03 +0200 Subject: [PATCH] [dxvk] Emit smarter post-renderpass barriers Allows folding the old post-renderpass barrier into the same pipeline barrier call as layout transitions from prepareImage or from starting another render pass, thus reducing the overall number of pipeline barriers. Also no longer uses VK_PIPELINE_STAGE_ALL_COMMANDS_BIT. --- src/dxvk/dxvk_barrier.h | 13 +++ src/dxvk/dxvk_context.cpp | 169 +++++++++++++++++++++++++------------ src/dxvk/dxvk_context.h | 6 +- src/dxvk/dxvk_renderpass.h | 15 ---- 4 files changed, 130 insertions(+), 73 deletions(-) diff --git a/src/dxvk/dxvk_barrier.h b/src/dxvk/dxvk_barrier.h index f7a7713d9..18308d1bf 100644 --- a/src/dxvk/dxvk_barrier.h +++ b/src/dxvk/dxvk_barrier.h @@ -333,6 +333,14 @@ namespace dxvk { m_list.clear(); } + /** + * \brief Checks whether set is empty + * \returns \c true if there are no entries + */ + bool empty() const { + return m_used == 0; + } + private: struct ListEntry { @@ -538,6 +546,11 @@ namespace dxvk { void reset(); + bool hasResourceBarriers() const { + return !m_bufSlices.empty() + || !m_imgSlices.empty(); + } + static DxvkAccessFlags getAccessTypes(VkAccessFlags flags); private: diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index d09daeaf2..e68583fc7 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -16,7 +16,6 @@ namespace dxvk { m_initBarriers(DxvkCmdBuffer::InitBuffer), m_execAcquires(DxvkCmdBuffer::ExecBuffer), m_execBarriers(DxvkCmdBuffer::ExecBuffer), - m_gfxBarriers (DxvkCmdBuffer::ExecBuffer), m_queryManager(m_common->queryPool()), m_staging (device, StagingBufferSize) { // Init framebuffer info with default render pass in case @@ -37,6 +36,33 @@ namespace dxvk { } m_descriptorManager = new DxvkDescriptorManager(device.ptr(), type); + + // Default destination barriers for graphics pipelines + m_globalRoGraphicsBarrier.stages = m_device->getShaderPipelineStages() + | VK_PIPELINE_STAGE_TRANSFER_BIT + | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + m_globalRoGraphicsBarrier.access = 0; + + if (m_device->features().extTransformFeedback.transformFeedback) + m_globalRoGraphicsBarrier.stages |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; + + m_globalRwGraphicsBarrier = m_globalRoGraphicsBarrier; + m_globalRwGraphicsBarrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT + | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + + m_globalRwGraphicsBarrier.access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT + | VK_ACCESS_INDEX_READ_BIT + | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT + | VK_ACCESS_UNIFORM_READ_BIT + | VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + | VK_ACCESS_TRANSFER_READ_BIT; + + if (m_device->features().extTransformFeedback.transformFeedback) + m_globalRwGraphicsBarrier.access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; } @@ -4020,15 +4046,15 @@ namespace dxvk { if (suspend) m_flags.set(DxvkContextFlag::GpRenderPassSuspended); else - this->transitionRenderTargetLayouts(m_gfxBarriers, false); + this->transitionRenderTargetLayouts(m_execBarriers, false); - m_gfxBarriers.recordCommands(m_cmd); + m_execBarriers.recordCommands(m_cmd); } else if (!suspend) { // We may end a previously suspended render pass if (m_flags.test(DxvkContextFlag::GpRenderPassSuspended)) { m_flags.clr(DxvkContextFlag::GpRenderPassSuspended); - this->transitionRenderTargetLayouts(m_gfxBarriers, false); - m_gfxBarriers.recordCommands(m_cmd); + this->transitionRenderTargetLayouts(m_execBarriers, false); + m_execBarriers.recordCommands(m_cmd); } // Execute deferred clears if necessary @@ -4123,42 +4149,59 @@ namespace dxvk { const auto& depthAttachment = framebufferInfo.getDepthTarget(); if (depthAttachment.view != nullptr) { - m_execBarriers.accessImage( - depthAttachment.view->image(), - depthAttachment.view->imageSubresources(), - depthAttachment.layout, - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - ops.depthOps.storeLayout, - depthAttachment.view->imageInfo().stages, - depthAttachment.view->imageInfo().access); + if (depthAttachment.layout != ops.depthOps.storeLayout) { + m_execBarriers.accessImage( + depthAttachment.view->image(), + depthAttachment.view->imageSubresources(), + depthAttachment.layout, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + ops.depthOps.storeLayout, + depthAttachment.view->imageInfo().stages, + depthAttachment.view->imageInfo().access); + } else { + VkAccessFlags srcAccess = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + + if (depthAttachment.layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) + srcAccess |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + + m_execBarriers.accessMemory( + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + srcAccess, + depthAttachment.view->imageInfo().stages, + depthAttachment.view->imageInfo().access); + } } for (uint32_t i = 0; i < MaxNumRenderTargets; i++) { const auto& colorAttachment = framebufferInfo.getColorTarget(i); if (colorAttachment.view != nullptr) { - m_execBarriers.accessImage( - colorAttachment.view->image(), - colorAttachment.view->imageSubresources(), - colorAttachment.layout, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - ops.colorOps[i].storeLayout, - colorAttachment.view->imageInfo().stages, - colorAttachment.view->imageInfo().access); + if (colorAttachment.layout != ops.colorOps[i].storeLayout) { + m_execBarriers.accessImage( + colorAttachment.view->image(), + colorAttachment.view->imageSubresources(), + colorAttachment.layout, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + ops.colorOps[i].storeLayout, + colorAttachment.view->imageInfo().stages, + colorAttachment.view->imageInfo().access); + } else { + m_execBarriers.accessMemory( + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + colorAttachment.view->imageInfo().stages, + colorAttachment.view->imageInfo().access); + } } } - m_execBarriers.accessMemory( - ops.barrier.srcStages, - ops.barrier.srcAccess, - ops.barrier.dstStages, - ops.barrier.dstAccess); - // Do not flush barriers here. This is intended since // we pre-record them when binding the framebuffer. } @@ -4252,26 +4295,21 @@ namespace dxvk { void DxvkContext::renderPassUnbindFramebuffer() { m_cmd->cmdEndRendering(); - // TODO Try to get rid of this for performance reasons. - // This only exists to emulate render pass barriers. - m_execBarriers.recordCommands(m_cmd); + // If there are pending layout transitions, execute them immediately + // since the backend expects images to be in the store layout after + // a render pass instance. This is expected to be rare. + if (m_execBarriers.hasResourceBarriers()) + m_execBarriers.recordCommands(m_cmd); } void DxvkContext::resetRenderPassOps( const DxvkRenderTargets& renderTargets, DxvkRenderPassOps& renderPassOps) { - VkAccessFlags access = 0; - if (renderTargets.depth.view != nullptr) { renderPassOps.depthOps = DxvkDepthAttachmentOps { VK_ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_LOAD, renderTargets.depth.layout, renderTargets.depth.layout }; - - access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; - - if (renderTargets.depth.layout != VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL) - access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; } else { renderPassOps.depthOps = DxvkDepthAttachmentOps { }; } @@ -4282,18 +4320,10 @@ namespace dxvk { VK_ATTACHMENT_LOAD_OP_LOAD, renderTargets.color[i].layout, renderTargets.color[i].layout }; - - access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT - | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; } else { renderPassOps.colorOps[i] = DxvkColorAttachmentOps { }; } } - - renderPassOps.barrier.srcStages = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; - renderPassOps.barrier.srcAccess = access; - renderPassOps.barrier.dstStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - renderPassOps.barrier.dstAccess = access; } @@ -4447,7 +4477,7 @@ namespace dxvk { } - bool DxvkContext::updateGraphicsPipelineState() { + bool DxvkContext::updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier) { // Set up vertex buffer strides for active bindings for (uint32_t i = 0; i < m_state.gp.state.il.bindingCount(); i++) { const uint32_t binding = m_state.gp.state.ilBindings[i].binding(); @@ -4487,6 +4517,21 @@ namespace dxvk { VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + // Emit barrier based on pipeline properties, in order to avoid + // accidental write-after-read hazards after the render pass. + DxvkGlobalPipelineBarrier pipelineBarrier = m_state.gp.pipeline->getGlobalBarrier(m_state.gp.state); + srcBarrier.stages |= pipelineBarrier.stages; + srcBarrier.access |= pipelineBarrier.access; + + DxvkAccessFlags access = DxvkBarrierSet::getAccessTypes(srcBarrier.access); + DxvkGlobalPipelineBarrier dstBarrier = access.test(DxvkAccess::Write) + ? m_globalRwGraphicsBarrier + : m_globalRoGraphicsBarrier; + + m_execBarriers.accessMemory( + srcBarrier.stages, srcBarrier.access, + dstBarrier.stages, dstBarrier.access); + m_flags.clr(DxvkContextFlag::GpDirtyPipelineState); return true; } @@ -5148,7 +5193,19 @@ namespace dxvk { this->updateGraphicsShaderResources(); if (m_flags.test(DxvkContextFlag::GpDirtyPipelineState)) { - if (unlikely(!this->updateGraphicsPipelineState())) + DxvkGlobalPipelineBarrier barrier = { }; + + if (Indexed) { + barrier.stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + barrier.access |= VK_ACCESS_INDEX_READ_BIT; + } + + if (Indirect) { + barrier.stages |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; + barrier.access |= VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + } + + if (unlikely(!this->updateGraphicsPipelineState(barrier))) return false; } @@ -5462,14 +5519,14 @@ namespace dxvk { VkPipelineStageFlags stages, VkAccessFlags access) { if constexpr (DoEmit) { - m_gfxBarriers.accessBuffer( + m_execBarriers.accessBuffer( slice.getSliceHandle(), stages, access, slice.bufferInfo().stages, slice.bufferInfo().access); return DxvkAccessFlags(); } else { - return m_gfxBarriers.getBufferAccess(slice.getSliceHandle()); + return m_execBarriers.getBufferAccess(slice.getSliceHandle()); } } @@ -5480,7 +5537,7 @@ namespace dxvk { VkPipelineStageFlags stages, VkAccessFlags access) { if constexpr (DoEmit) { - m_gfxBarriers.accessImage( + m_execBarriers.accessImage( imageView->image(), imageView->imageSubresources(), imageView->imageInfo().layout, @@ -5490,7 +5547,7 @@ namespace dxvk { imageView->imageInfo().access); return DxvkAccessFlags(); } else { - return m_gfxBarriers.getImageAccess( + return m_execBarriers.getImageAccess( imageView->image(), imageView->imageSubresources()); } diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index a6a9c3291..738ecb494 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -1137,12 +1137,14 @@ namespace dxvk { DxvkBarrierSet m_initBarriers; DxvkBarrierSet m_execAcquires; DxvkBarrierSet m_execBarriers; - DxvkBarrierSet m_gfxBarriers; DxvkBarrierControlFlags m_barrierControl; DxvkGpuQueryManager m_queryManager; DxvkStagingBuffer m_staging; + DxvkGlobalPipelineBarrier m_globalRoGraphicsBarrier; + DxvkGlobalPipelineBarrier m_globalRwGraphicsBarrier; + DxvkRenderTargetLayouts m_rtLayouts = { }; DxvkBindingSet m_vbTracked; @@ -1314,7 +1316,7 @@ namespace dxvk { void unbindGraphicsPipeline(); bool updateGraphicsPipeline(); - bool updateGraphicsPipelineState(); + bool updateGraphicsPipelineState(DxvkGlobalPipelineBarrier srcBarrier); void invalidateState(); diff --git a/src/dxvk/dxvk_renderpass.h b/src/dxvk/dxvk_renderpass.h index f98b34964..5688946cb 100644 --- a/src/dxvk/dxvk_renderpass.h +++ b/src/dxvk/dxvk_renderpass.h @@ -41,20 +41,6 @@ namespace dxvk { }; - /** - * \brief Render pass barrier - * - * External subpass dependency that is to be - * executed after a render pass has completed. - */ - struct DxvkRenderPassBarrier { - VkPipelineStageFlags srcStages = 0; - VkAccessFlags srcAccess = 0; - VkPipelineStageFlags dstStages = 0; - VkAccessFlags dstAccess = 0; - }; - - /** * \brief Render pass transitions * @@ -63,7 +49,6 @@ namespace dxvk { * from a group of render passes with the same format. */ struct DxvkRenderPassOps { - DxvkRenderPassBarrier barrier; DxvkDepthAttachmentOps depthOps; DxvkColorAttachmentOps colorOps[MaxNumRenderTargets]; };