From 4e1f6e5efd407148f17c93ec7f7cb8b1c82dfde9 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 4 Aug 2022 16:23:47 +0200 Subject: [PATCH] [d3d11] Refactor unordered access view and output merger state --- src/d3d11/d3d11_context.cpp | 132 +++++++++++++------------------- src/d3d11/d3d11_context.h | 3 +- src/d3d11/d3d11_context_state.h | 68 +++++++++++++--- 3 files changed, 112 insertions(+), 91 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 244ee11f..72206278 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -2082,16 +2082,16 @@ namespace dxvk { uint32_t uavSlotId = computeUavBinding (DxbcProgramType::ComputeShader, 0); uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::ComputeShader, 0); - int32_t uavId = m_state.cs.uavMask.findNext(0); + int32_t uavId = m_state.uav.mask.findNext(0); while (uavId >= 0) { if (uint32_t(uavId) < StartSlot || uint32_t(uavId) >= StartSlot + NumUAVs) { for (uint32_t i = 0; i < NumUAVs; i++) { auto uav = static_cast(ppUnorderedAccessViews[i]); - if (CheckViewOverlap(uav, m_state.cs.unorderedAccessViews[uavId].ptr())) { - m_state.cs.unorderedAccessViews[uavId] = nullptr; - m_state.cs.uavMask.clr(uavId); + if (CheckViewOverlap(uav, m_state.uav.views[uavId].ptr())) { + m_state.uav.views[uavId] = nullptr; + m_state.uav.mask.clr(uavId); BindUnorderedAccessView( uavSlotId + uavId, nullptr, @@ -2099,9 +2099,9 @@ namespace dxvk { } } - uavId = m_state.cs.uavMask.findNext(uavId + 1); + uavId = m_state.uav.mask.findNext(uavId + 1); } else { - uavId = m_state.cs.uavMask.findNext(StartSlot + NumUAVs); + uavId = m_state.uav.mask.findNext(StartSlot + NumUAVs); } } @@ -2110,9 +2110,9 @@ namespace dxvk { auto uav = static_cast(ppUnorderedAccessViews[i]); auto ctr = pUAVInitialCounts ? pUAVInitialCounts[i] : ~0u; - if (m_state.cs.unorderedAccessViews[StartSlot + i] != uav || ctr != ~0u) { - m_state.cs.unorderedAccessViews[StartSlot + i] = uav; - m_state.cs.uavMask.set(StartSlot + i, uav != nullptr); + if (m_state.uav.views[StartSlot + i] != uav || ctr != ~0u) { + m_state.uav.views[StartSlot + i] = uav; + m_state.uav.mask.set(StartSlot + i, uav != nullptr); BindUnorderedAccessView( uavSlotId + StartSlot + i, uav, @@ -2199,8 +2199,8 @@ namespace dxvk { D3D10DeviceLock lock = LockContext(); for (uint32_t i = 0; i < NumUAVs; i++) { - ppUnorderedAccessViews[i] = StartSlot + i < m_state.cs.unorderedAccessViews.size() - ? m_state.cs.unorderedAccessViews[StartSlot + i].ref() + ppUnorderedAccessViews[i] = StartSlot + i < m_state.uav.views.size() + ? m_state.uav.views[StartSlot + i].ref() : nullptr; } } @@ -2311,19 +2311,19 @@ namespace dxvk { if (ppRenderTargetViews) { for (UINT i = 0; i < NumRTVs; i++) { - ppRenderTargetViews[i] = i < m_state.om.renderTargetViews.size() - ? m_state.om.renderTargetViews[i].ref() + ppRenderTargetViews[i] = i < m_state.om.rtvs.size() + ? m_state.om.rtvs[i].ref() : nullptr; } } if (ppDepthStencilView) - *ppDepthStencilView = m_state.om.depthStencilView.ref(); + *ppDepthStencilView = m_state.om.dsv.ref(); if (ppUnorderedAccessViews) { for (UINT i = 0; i < NumUAVs; i++) { - ppUnorderedAccessViews[i] = UAVStartSlot + i < m_state.ps.unorderedAccessViews.size() - ? m_state.ps.unorderedAccessViews[UAVStartSlot + i].ref() + ppUnorderedAccessViews[i] = UAVStartSlot + i < m_state.om.uavs.size() + ? m_state.om.uavs[UAVStartSlot + i].ref() : nullptr; } } @@ -3133,20 +3133,20 @@ namespace dxvk { // D3D11 doesn't have the concept of a framebuffer object, // so we'll just create a new one every time the render // target bindings are updated. Set up the attachments. - for (UINT i = 0; i < m_state.om.renderTargetViews.size(); i++) { - if (m_state.om.renderTargetViews[i] != nullptr) { + for (UINT i = 0; i < m_state.om.rtvs.size(); i++) { + if (m_state.om.rtvs[i] != nullptr) { attachments.color[i] = { - m_state.om.renderTargetViews[i]->GetImageView(), - m_state.om.renderTargetViews[i]->GetRenderLayout() }; - sampleCount = m_state.om.renderTargetViews[i]->GetSampleCount(); + m_state.om.rtvs[i]->GetImageView(), + m_state.om.rtvs[i]->GetRenderLayout() }; + sampleCount = m_state.om.rtvs[i]->GetSampleCount(); } } - if (m_state.om.depthStencilView != nullptr) { + if (m_state.om.dsv != nullptr) { attachments.depth = { - m_state.om.depthStencilView->GetImageView(), - m_state.om.depthStencilView->GetRenderLayout() }; - sampleCount = m_state.om.depthStencilView->GetSampleCount(); + m_state.om.dsv->GetImageView(), + m_state.om.dsv->GetRenderLayout() }; + sampleCount = m_state.om.dsv->GetSampleCount(); } // Create and bind the framebuffer object to the context @@ -3895,15 +3895,9 @@ namespace dxvk { // Reset resource bindings m_state.cbv.reset(); m_state.srv.reset(); + m_state.uav.reset(); m_state.samplers.reset(); - - // Default UAVs - for (uint32_t i = 0; i < D3D11_1_UAV_SLOT_COUNT; i++) { - m_state.ps.unorderedAccessViews[i] = nullptr; - m_state.cs.unorderedAccessViews[i] = nullptr; - } - - m_state.cs.uavMask.clear(); + m_state.om.reset(); // Default ID state m_state.id.argBuffer = nullptr; @@ -3922,24 +3916,6 @@ namespace dxvk { m_state.ia.indexBuffer.offset = 0; m_state.ia.indexBuffer.format = DXGI_FORMAT_UNKNOWN; - // Default OM State - for (uint32_t i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++) - m_state.om.renderTargetViews[i] = nullptr; - m_state.om.depthStencilView = nullptr; - - m_state.om.cbState = nullptr; - m_state.om.dsState = nullptr; - - for (uint32_t i = 0; i < 4; i++) - m_state.om.blendFactor[i] = 1.0f; - - m_state.om.sampleCount = 0; - m_state.om.sampleMask = D3D11_DEFAULT_SAMPLE_MASK; - m_state.om.stencilRef = D3D11_DEFAULT_STENCIL_REFERENCE; - - m_state.om.maxRtv = 0; - m_state.om.maxUav = 0; - // Default RS state m_state.rs.state = nullptr; m_state.rs.numViewports = 0; @@ -4029,14 +4005,14 @@ namespace dxvk { bool hazard = false; - if (CheckViewOverlap(pView, m_state.om.depthStencilView.ptr())) { - m_state.om.depthStencilView = nullptr; + if (CheckViewOverlap(pView, m_state.om.dsv.ptr())) { + m_state.om.dsv = nullptr; hazard = true; } for (uint32_t i = 0; i < m_state.om.maxRtv; i++) { - if (CheckViewOverlap(pView, m_state.om.renderTargetViews[i].ptr())) { - m_state.om.renderTargetViews[i] = nullptr; + if (CheckViewOverlap(pView, m_state.om.rtvs[i].ptr())) { + m_state.om.rtvs[i] = nullptr; hazard = true; } } @@ -4055,8 +4031,8 @@ namespace dxvk { uint32_t ctrSlotId = computeUavCounterBinding(DxbcProgramType::PixelShader, 0); for (uint32_t i = 0; i < m_state.om.maxUav; i++) { - if (CheckViewOverlap(pView, m_state.ps.unorderedAccessViews[i].ptr())) { - m_state.ps.unorderedAccessViews[i] = nullptr; + if (CheckViewOverlap(pView, m_state.om.uavs[i].ptr())) { + m_state.om.uavs[i] = nullptr; BindUnorderedAccessView( uavSlotId + i, nullptr, @@ -4120,8 +4096,8 @@ namespace dxvk { RestoreShaderResources(); RestoreShaderResources(); - RestoreUnorderedAccessViews (m_state.ps.unorderedAccessViews); - RestoreUnorderedAccessViews (m_state.cs.unorderedAccessViews); + RestoreUnorderedAccessViews(); + RestoreUnorderedAccessViews(); RestoreSamplers(); RestoreSamplers(); @@ -4169,15 +4145,17 @@ namespace dxvk { template template - void D3D11CommonContext::RestoreUnorderedAccessViews( - D3D11UnorderedAccessBindings& Bindings) { + void D3D11CommonContext::RestoreUnorderedAccessViews() { + const auto& views = Stage == DxbcProgramType::ComputeShader + ? m_state.uav.views + : m_state.om.uavs; + uint32_t uavSlotId = computeUavBinding (Stage, 0); uint32_t ctrSlotId = computeUavCounterBinding(Stage, 0); - for (uint32_t i = 0; i < Bindings.size(); i++) { + for (uint32_t i = 0; i < views.size(); i++) { BindUnorderedAccessView( - uavSlotId + i, - Bindings[i].ptr(), + uavSlotId + i, views[i].ptr(), ctrSlotId + i, ~0u); } } @@ -4355,13 +4333,13 @@ namespace dxvk { if (!ValidateRenderTargets(NumRTVs, ppRenderTargetViews, pDepthStencilView)) return; - for (uint32_t i = 0; i < m_state.om.renderTargetViews.size(); i++) { + for (uint32_t i = 0; i < m_state.om.rtvs.size(); i++) { auto rtv = i < NumRTVs ? static_cast(ppRenderTargetViews[i]) : nullptr; - if (m_state.om.renderTargetViews[i] != rtv) { - m_state.om.renderTargetViews[i] = rtv; + if (m_state.om.rtvs[i] != rtv) { + m_state.om.rtvs[i] = rtv; needsUpdate = true; ResolveOmSrvHazards(rtv); @@ -4372,8 +4350,8 @@ namespace dxvk { auto dsv = static_cast(pDepthStencilView); - if (m_state.om.depthStencilView != dsv) { - m_state.om.depthStencilView = dsv; + if (m_state.om.dsv != dsv) { + m_state.om.dsv = dsv; needsUpdate = true; ResolveOmSrvHazards(dsv); } @@ -4398,8 +4376,8 @@ namespace dxvk { ctr = pUAVInitialCounts ? pUAVInitialCounts[i - UAVStartSlot] : ~0u; } - if (m_state.ps.unorderedAccessViews[i] != uav || ctr != ~0u) { - m_state.ps.unorderedAccessViews[i] = uav; + if (m_state.om.uavs[i] != uav || ctr != ~0u) { + m_state.om.uavs[i] = uav; BindUnorderedAccessView( uavSlotId + i, uav, @@ -4487,20 +4465,20 @@ namespace dxvk { bool hazard = false; if (ShaderStage == DxbcProgramType::ComputeShader) { - int32_t uav = m_state.cs.uavMask.findNext(0); + int32_t uav = m_state.uav.mask.findNext(0); while (uav >= 0 && !hazard) { - hazard = CheckViewOverlap(pView, m_state.cs.unorderedAccessViews[uav].ptr()); - uav = m_state.cs.uavMask.findNext(uav + 1); + hazard = CheckViewOverlap(pView, m_state.uav.views[uav].ptr()); + uav = m_state.uav.mask.findNext(uav + 1); } } else { - hazard = CheckViewOverlap(pView, m_state.om.depthStencilView.ptr()); + hazard = CheckViewOverlap(pView, m_state.om.dsv.ptr()); for (uint32_t i = 0; !hazard && i < m_state.om.maxRtv; i++) - hazard = CheckViewOverlap(pView, m_state.om.renderTargetViews[i].ptr()); + hazard = CheckViewOverlap(pView, m_state.om.rtvs[i].ptr()); for (uint32_t i = 0; !hazard && i < m_state.om.maxUav; i++) - hazard = CheckViewOverlap(pView, m_state.ps.unorderedAccessViews[i].ptr()); + hazard = CheckViewOverlap(pView, m_state.om.uavs[i].ptr()); } return hazard; diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 41df199e..39977776 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -942,8 +942,7 @@ namespace dxvk { void RestoreShaderResources(); template - void RestoreUnorderedAccessViews( - D3D11UnorderedAccessBindings& Bindings); + void RestoreUnorderedAccessViews(); template void SetConstantBuffers( diff --git a/src/d3d11/d3d11_context_state.h b/src/d3d11/d3d11_context_state.h index 7135990d..e13e557f 100644 --- a/src/d3d11/d3d11_context_state.h +++ b/src/d3d11/d3d11_context_state.h @@ -103,11 +103,28 @@ namespace dxvk { }; using D3D11SamplerBindings = D3D11ShaderStageState; + + /** + * \brief UAV bindings + * + * Stores bound UAVs. For compute shader UAVs, + * we also store a bit mask of bound UAVs. + */ + using D3D11ShaderStageUavBinding = std::array, D3D11_1_UAV_SLOT_COUNT>; - using D3D11UnorderedAccessBindings = std::array< - Com, D3D11_1_UAV_SLOT_COUNT>; - - + struct D3D11UavBindings { + D3D11ShaderStageUavBinding views = { }; + DxvkBindingSet mask = { }; + + void reset() { + for (uint32_t i = 0; i < views.size(); i++) + views[i] = nullptr; + + mask.clear(); + } + }; + + struct D3D11ContextStateVS { Com shader = nullptr; }; @@ -130,15 +147,11 @@ namespace dxvk { struct D3D11ContextStatePS { Com shader = nullptr; - D3D11UnorderedAccessBindings unorderedAccessViews = { }; }; struct D3D11ContextStateCS { Com shader = nullptr; - D3D11UnorderedAccessBindings unorderedAccessViews = { }; - - DxvkBindingSet uavMask = { }; }; @@ -170,10 +183,17 @@ namespace dxvk { D3D11IndexBufferBinding indexBuffer = { }; }; + /** + * \brief Output merger state + * + * Stores RTV, DSV, and graphics UAV bindings, as well as related state. + */ + using D3D11RenderTargetViewBinding = std::array, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT>; struct D3D11ContextStateOM { - std::array, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT> renderTargetViews = { }; - Com depthStencilView = { }; + D3D11ShaderStageUavBinding uavs = { }; + D3D11RenderTargetViewBinding rtvs = { }; + Com dsv = { }; D3D11BlendState* cbState = nullptr; D3D11DepthStencilState* dsState = nullptr; @@ -181,11 +201,34 @@ namespace dxvk { FLOAT blendFactor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; UINT sampleCount = 0u; - UINT sampleMask = 0xFFFFFFFFu; - UINT stencilRef = 0u; + UINT sampleMask = D3D11_DEFAULT_SAMPLE_MASK; + UINT stencilRef = D3D11_DEFAULT_STENCIL_REFERENCE; UINT maxRtv = 0u; UINT maxUav = 0u; + + void reset() { + for (uint32_t i = 0; i < maxUav; i++) + uavs[i] = nullptr; + + for (uint32_t i = 0; i < maxRtv; i++) + rtvs[i] = nullptr; + + dsv = nullptr; + + cbState = nullptr; + dsState = nullptr; + + for (uint32_t i = 0; i < 4; i++) + blendFactor[i] = 1.0f; + + sampleCount = 0u; + sampleMask = D3D11_DEFAULT_SAMPLE_MASK; + stencilRef = D3D11_DEFAULT_STENCIL_REFERENCE; + + maxRtv = 0; + maxUav = 0; + } }; @@ -237,6 +280,7 @@ namespace dxvk { D3D11CbvBindings cbv; D3D11SrvBindings srv; + D3D11UavBindings uav; D3D11SamplerBindings samplers; };