From ec4fc06bb3b3f00b7c53ac2eca89a640a59cb9b5 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sun, 16 Feb 2025 18:13:09 +0100 Subject: [PATCH] [dxvk,d3d11] Improve explicit UAV overlap behaviour If the app explicitly enables UAV overlap, don't synchronize back-to-back read-modify-write operations to the same UAV either. --- src/d3d11/d3d11_context_ext.cpp | 6 ++-- src/d3d11/d3d11_device.h | 6 ++-- src/dxvk/dxvk_context.cpp | 27 +++++++++++++++++- src/dxvk/dxvk_context.h | 49 ++++++++++++++++++++------------- src/dxvk/dxvk_context_state.h | 9 ++++-- 5 files changed, 70 insertions(+), 27 deletions(-) diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp index aac32b887..051610167 100644 --- a/src/d3d11/d3d11_context_ext.cpp +++ b/src/d3d11/d3d11_context_ext.cpp @@ -146,8 +146,10 @@ namespace dxvk { D3D11Device* parent = static_cast(m_ctx->GetParentInterface()); DxvkBarrierControlFlags flags = parent->GetOptionsBarrierControlFlags(); - if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE) - flags.set(DxvkBarrierControl::IgnoreComputeWriteAfterWrite, DxvkBarrierControl::IgnoreGraphicsWriteAfterWrite); + if (ControlFlags & D3D11_VK_BARRIER_CONTROL_IGNORE_WRITE_AFTER_WRITE) { + flags.set(DxvkBarrierControl::ComputeAllowReadWriteOverlap, + DxvkBarrierControl::GraphicsAllowReadWriteOverlap); + } m_ctx->EmitCs([cFlags = flags] (DxvkContext* ctx) { ctx->setBarrierControl(cFlags); diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h index 27d1900a4..041f758a3 100644 --- a/src/d3d11/d3d11_device.h +++ b/src/d3d11/d3d11_device.h @@ -472,13 +472,13 @@ namespace dxvk { const Rc& Adapter); DxvkBarrierControlFlags GetOptionsBarrierControlFlags() { - DxvkBarrierControlFlags barrierControl; + DxvkBarrierControlFlags barrierControl = 0u; if (m_d3d11Options.relaxedBarriers) - barrierControl.set(DxvkBarrierControl::IgnoreComputeWriteAfterWrite); + barrierControl.set(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap); if (m_d3d11Options.relaxedBarriers || m_d3d11Options.relaxedGraphicsBarriers) - barrierControl.set(DxvkBarrierControl::IgnoreGraphicsWriteAfterWrite); + barrierControl.set(DxvkBarrierControl::GraphicsAllowReadWriteOverlap); return barrierControl; } diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 95aa05d4e..8a22d764c 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -2623,6 +2623,20 @@ namespace dxvk { void DxvkContext::setBarrierControl(DxvkBarrierControlFlags control) { + // If any currently relevant control flags change, play it safe and force + // a barrier the next time we encounter a write-after-write hazard, even + // if the same set of flags is restored by that time. Only check graphics + // flags inside a render pass to avoid performance regressions when an + // application uses this feature but we already have an app profile. + // Barriers get flushed when beginning or ending a render pass anyway. + DxvkBarrierControlFlags mask = m_flags.test(DxvkContextFlag::GpRenderPassBound) + ? DxvkBarrierControlFlags(DxvkBarrierControl::GraphicsAllowReadWriteOverlap) + : DxvkBarrierControlFlags(DxvkBarrierControl::ComputeAllowReadWriteOverlap, + DxvkBarrierControl::ComputeAllowWriteOnlyOverlap); + + if (!((m_barrierControl ^ control) & mask).isClear()) + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); + m_barrierControl = control; } @@ -3740,12 +3754,14 @@ namespace dxvk { vk::makeSubresourceRange(imageSubresource), imageLayout, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT); + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer); m_cmd->track(buffer, DxvkAccess::Write); m_cmd->track(image, DxvkAccess::Read); - } +} void DxvkContext::clearImageViewFb( @@ -3951,6 +3967,9 @@ namespace dxvk { VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT); + if (cmdBuffer == DxvkCmdBuffer::ExecBuffer) + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); + if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) m_cmd->cmdEndDebugUtilsLabel(cmdBuffer); @@ -5528,6 +5547,10 @@ namespace dxvk { ctrOffsets[i] = physSlice.offset; if (physSlice.handle) { + // Just in case someone is mad enough to write to a + // transform feedback buffer from a shader as well + m_flags.set(DxvkContextFlag::ForceWriteAfterWriteSync); + accessBuffer(DxvkCmdBuffer::ExecBuffer, m_state.xfb.activeCounters[i], VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT, VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | @@ -7923,6 +7946,8 @@ namespace dxvk { void DxvkContext::flushBarriers() { m_execBarriers.flush(m_cmd); m_barrierTracker.clear(); + + m_flags.clr(DxvkContextFlag::ForceWriteAfterWriteSync); } diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index a3aba78d2..bca3eac77 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -1785,23 +1785,33 @@ namespace dxvk { VkAccessFlags access); template - bool canIgnoreWawHazards() { - constexpr auto controlFlag = BindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS - ? DxvkBarrierControl::IgnoreGraphicsWriteAfterWrite - : DxvkBarrierControl::IgnoreComputeWriteAfterWrite; + DxvkAccessFlags getAllowedStorageHazards() { + if (m_barrierControl.isClear() || m_flags.test(DxvkContextFlag::ForceWriteAfterWriteSync)) + return DxvkAccessFlags(); - if (!m_barrierControl.test(controlFlag)) - return false; - - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) { + if constexpr (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE) { + // If there are any pending accesses that are not directly related + // to shader dispatches, always insert a barrier if there is a hazard. VkPipelineStageFlags2 stageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT; - return !m_execBarriers.hasPendingStages(~stageMask); + + if (!m_execBarriers.hasPendingStages(~stageMask)) { + if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowReadWriteOverlap)) + return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read); + else if (m_barrierControl.test(DxvkBarrierControl::ComputeAllowWriteOnlyOverlap)) + return DxvkAccessFlags(DxvkAccess::Write); + } + } else { + // For graphics, the only type of unrelated access we have to worry about + // is transform feedback writes, in which case inserting a barrier is fine. + if (m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap)) + return DxvkAccessFlags(DxvkAccess::Write, DxvkAccess::Read); } - return true; + return DxvkAccessFlags(); } + void emitMemoryBarrier( VkPipelineStageFlags srcStages, VkAccessFlags srcAccess, @@ -2039,18 +2049,17 @@ namespace dxvk { if (hasPendingWrite) { // If there is a write-after-write hazard and synchronization // for those is not explicitly disabled, insert a barrier. - if (!canIgnoreWawHazards()) + DxvkAccessFlags allowedHazards = getAllowedStorageHazards(); + + if (!allowedHazards.test(DxvkAccess::Write)) return true; - // If write-after-write checking is disabled and we're on graphics, - // be aggressive about avoiding barriers and ignore any reads if we - // do find a write-after-write hazard. This essentially assumes that - // back-to-back read-modify-write operations are safe, but will still - // consider read-only or transform feedback operations as unsafe. - if (BindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) - return !(access & VK_ACCESS_SHADER_WRITE_BIT); + // Skip barrier if overlapping read-modify-write ops are allowed. + // This includes shader atomics, but also non-atomic load-stores. + if (allowedHazards.test(DxvkAccess::Read)) + return false; - // On compute, if we are reading the resource, add a barrier. + // Otherwise, check if there is a read-after-write hazard. if (access & vk::AccessReadMask) return true; } @@ -2059,6 +2068,8 @@ namespace dxvk { return pred(DxvkAccess::Read); } + void invalidateWriteAfterWriteTracking(); + void beginRenderPassDebugRegion(); void beginInternalDebugRegion( diff --git a/src/dxvk/dxvk_context_state.h b/src/dxvk/dxvk_context_state.h index ba39e14fa..f91357e1e 100644 --- a/src/dxvk/dxvk_context_state.h +++ b/src/dxvk/dxvk_context_state.h @@ -57,6 +57,8 @@ namespace dxvk { DirtyDrawBuffer, ///< Indirect argument buffer is dirty DirtyPushConstants, ///< Push constant data has changed + ForceWriteAfterWriteSync, ///< Ignores barrier control flags for write-after-write hazards + Count }; @@ -86,8 +88,11 @@ namespace dxvk { * synchronize implicitly. */ enum class DxvkBarrierControl : uint32_t { - IgnoreComputeWriteAfterWrite = 0, - IgnoreGraphicsWriteAfterWrite = 1, + // Ignores write-after-write hazard + ComputeAllowWriteOnlyOverlap = 0, + ComputeAllowReadWriteOverlap = 1, + + GraphicsAllowReadWriteOverlap = 2, }; using DxvkBarrierControlFlags = Flags;