From 9389456d20943714cf9d67a6bfbc7e30585b3b2b Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 20 Feb 2025 15:09:35 +0100 Subject: [PATCH] [d3d11] Lazy-bind compute shader UAVs And factor UAV counter updates out of binding. --- src/d3d11/d3d11_context.cpp | 89 +++++++++++++++++++++++++-------- src/d3d11/d3d11_context.h | 16 +++++- src/d3d11/d3d11_context_imm.cpp | 19 +++++++ src/d3d11/d3d11_view_uav.h | 4 ++ 4 files changed, 105 insertions(+), 23 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index f722211d9..8a3243954 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -2169,7 +2169,8 @@ namespace dxvk { m_state.uav.views[uavId] = nullptr; m_state.uav.mask.clr(uavId); - BindUnorderedAccessView(DxbcProgramType::ComputeShader, uavId, nullptr, ~0u); + if (!DirtyComputeUnorderedAccessView(uavId, true)) + BindUnorderedAccessView(DxbcProgramType::ComputeShader, uavId, nullptr); } } @@ -2184,11 +2185,16 @@ namespace dxvk { auto uav = static_cast(ppUnorderedAccessViews[i]); auto ctr = pUAVInitialCounts ? pUAVInitialCounts[i] : ~0u; - if (m_state.uav.views[StartSlot + i] != uav || ctr != ~0u) { + if (ctr != ~0u && uav && uav->HasCounter()) + UpdateUnorderedAccessViewCounter(uav, ctr); + + if (m_state.uav.views[StartSlot + i] != uav) { m_state.uav.views[StartSlot + i] = uav; m_state.uav.mask.set(StartSlot + i, uav != nullptr); - BindUnorderedAccessView(DxbcProgramType::ComputeShader, StartSlot + i, uav, ctr); + if (!DirtyComputeUnorderedAccessView(StartSlot + i, !uav)) + BindUnorderedAccessView(DxbcProgramType::ComputeShader, StartSlot + i, uav); + ResolveCsSrvHazards(uav); } } @@ -3241,6 +3247,28 @@ namespace dxvk { } + template + void D3D11CommonContext::ApplyDirtyUnorderedAccessViews( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask) { + uint64_t bindMask = BoundMask.uavMask & DirtyMask.uavMask; + + if (!bindMask) + return; + + const auto& views = Stage == DxbcProgramType::ComputeShader + ? m_state.uav.views + : m_state.om.uavs; + + // Need to clear dirty bits before binding + DirtyMask.uavMask -= bindMask; + + for (uint32_t slot : bit::BitMask(bindMask)) + BindUnorderedAccessView(Stage, slot, views[slot].ptr()); + } + + template void D3D11CommonContext::ApplyDirtyGraphicsBindings() { auto dirtyMask = m_state.lazy.shadersDirty & m_state.lazy.shadersUsed; @@ -3271,6 +3299,7 @@ namespace dxvk { ApplyDirtySamplers(stage, boundMask, dirtyMask); ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask); ApplyDirtyShaderResources(stage, boundMask, dirtyMask); + ApplyDirtyUnorderedAccessViews(stage, boundMask, dirtyMask); m_state.lazy.shadersDirty.clr(stage); } @@ -3921,8 +3950,7 @@ namespace dxvk { void D3D11CommonContext::BindUnorderedAccessView( DxbcProgramType ShaderStage, UINT Slot, - D3D11UnorderedAccessView* pUav, - UINT Counter) { + D3D11UnorderedAccessView* pUav) { uint32_t uavSlotId = computeUavBinding(ShaderStage, Slot); uint32_t ctrSlotId = computeUavCounterBinding(ShaderStage, Slot); @@ -3937,19 +3965,8 @@ namespace dxvk { cCtrSlotId = ctrSlotId, cStages = stages, cBufferView = pUav->GetBufferView(), - cCounterView = pUav->GetCounterView(), - cCounterValue = Counter + cCounterView = pUav->GetCounterView() ] (DxvkContext* ctx) mutable { - if (cCounterView != nullptr && cCounterValue != ~0u) { - DxvkBufferSlice counterSlice(cCounterView); - - ctx->updateBuffer( - counterSlice.buffer(), - counterSlice.offset(), - sizeof(uint32_t), - &cCounterValue); - } - ctx->bindResourceBufferView(cStages, cUavSlotId, Forwarder::move(cBufferView)); ctx->bindResourceBufferView(cStages, cCtrSlotId, @@ -4432,6 +4449,19 @@ namespace dxvk { } + template + bool D3D11CommonContext::DirtyComputeUnorderedAccessView( + uint32_t Slot, + bool IsNull) { + constexpr DxbcProgramType ShaderStage = DxbcProgramType::ComputeShader; + + return DirtyBindingGeneric(ShaderStage, + m_state.lazy.bindingsUsed[ShaderStage].uavMask, + m_state.lazy.bindingsDirty[ShaderStage].uavMask, + uint64_t(1u) << Slot, IsNull); + } + + template void D3D11CommonContext::DiscardBuffer( ID3D11Resource* pResource) { @@ -4854,7 +4884,7 @@ namespace dxvk { if (CheckViewOverlap(pView, m_state.om.uavs[i].ptr())) { m_state.om.uavs[i] = nullptr; - BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr, ~0u); + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr); } } } @@ -4962,7 +4992,7 @@ namespace dxvk { : m_state.om.maxUav; for (uint32_t i = 0; i < maxCount; i++) - BindUnorderedAccessView(Stage, i, views[i].ptr(), ~0u); + BindUnorderedAccessView(Stage, i, views[i].ptr()); } @@ -5192,10 +5222,13 @@ namespace dxvk { ctr = pUAVInitialCounts ? pUAVInitialCounts[i - UAVStartSlot] : ~0u; } - if (m_state.om.uavs[i] != uav || ctr != ~0u) { + if (ctr != ~0u && uav && uav->HasCounter()) + UpdateUnorderedAccessViewCounter(uav, ctr); + + if (m_state.om.uavs[i] != uav) { m_state.om.uavs[i] = uav; - BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav, ctr); + BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav); ResolveOmSrvHazards(uav); if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) @@ -5590,6 +5623,20 @@ namespace dxvk { } + template + void D3D11CommonContext::UpdateUnorderedAccessViewCounter( + D3D11UnorderedAccessView* pUav, + uint32_t CounterValue) { + EmitCs([ + cView = pUav->GetCounterView(), + cCounter = CounterValue + ] (DxvkContext* ctx) { + ctx->updateBuffer(cView->buffer(), + cView->info().offset, sizeof(cCounter), &cCounter); + }); + } + + template bool D3D11CommonContext::ValidateRenderTargets( UINT NumViews, diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 5c376d195..91530c8d1 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -819,6 +819,11 @@ namespace dxvk { const DxbcBindingMask& BoundMask, DxbcBindingMask& DirtyMask); + void ApplyDirtyUnorderedAccessViews( + DxbcProgramType Stage, + const DxbcBindingMask& BoundMask, + DxbcBindingMask& DirtyMask); + void ApplyDirtyGraphicsBindings(); void ApplyDirtyComputeBindings(); @@ -904,8 +909,7 @@ namespace dxvk { void BindUnorderedAccessView( DxbcProgramType ShaderStage, UINT Slot, - D3D11UnorderedAccessView* pUav, - UINT Counter); + D3D11UnorderedAccessView* pUav); VkClearValue ConvertColorValue( const FLOAT Color[4], @@ -957,6 +961,10 @@ namespace dxvk { uint32_t Slot, bool IsNull); + bool DirtyComputeUnorderedAccessView( + uint32_t Slot, + bool IsNull); + void DiscardBuffer( ID3D11Resource* pResource); @@ -1115,6 +1123,10 @@ namespace dxvk { UINT SrcDepthPitch, UINT CopyFlags); + void UpdateUnorderedAccessViewCounter( + D3D11UnorderedAccessView* pUav, + uint32_t CounterValue); + bool ValidateRenderTargets( UINT NumViews, ID3D11RenderTargetView* const* ppRenderTargetViews, diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index 538e3789c..8f85f082e 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -1020,6 +1020,18 @@ namespace dxvk { for (uint32_t index : bit::BitMask(cDirtyState[dxStage].srvMask[m])) ctx->bindResourceImageView(vkStage, srvSlot + index + m * 64u, nullptr); } + + // Unbind all dirty unordered access views. Only consider compute + // here since we don't actually lazy-bind graphics UAVs. + if (dxStage == DxbcProgramType::ComputeShader) { + auto uavSlot = computeUavBinding(dxStage, 0); + auto ctrSlot = computeUavCounterBinding(dxStage, 0); + + for (uint32_t index : bit::BitMask(cDirtyState[dxStage].uavMask)) { + ctx->bindResourceImageView(vkStage, uavSlot + index, nullptr); + ctx->bindResourceBufferView(vkStage, ctrSlot + index, nullptr); + } + } } }); @@ -1045,6 +1057,13 @@ namespace dxvk { } } + if (stage == DxbcProgramType::ComputeShader) { + for (uint32_t index : bit::BitMask(dirtyState[stage].uavMask)) { + if (!m_state.uav.views[index].ptr()) + dirtyState[stage].uavMask &= ~(uint64_t(1u) << index); + } + } + if (dirtyState[stage].empty()) m_state.lazy.shadersDirty.clr(stage); } diff --git a/src/d3d11/d3d11_view_uav.h b/src/d3d11/d3d11_view_uav.h index c86eba5fa..8396fb8a1 100644 --- a/src/d3d11/d3d11_view_uav.h +++ b/src/d3d11/d3d11_view_uav.h @@ -43,6 +43,10 @@ namespace dxvk { return m_info.BindFlags & Flags; } + BOOL HasCounter() const { + return m_counterView != nullptr; + } + D3D11_RESOURCE_DIMENSION GetResourceType() const { D3D11_RESOURCE_DIMENSION type; m_resource->GetType(&type);