1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-27 04:54:15 +01:00

[d3d11] Lazy-bind compute shader UAVs

And factor UAV counter updates out of binding.
This commit is contained in:
Philip Rebohle 2025-02-20 15:09:35 +01:00 committed by Philip Rebohle
parent 69171873fa
commit 9389456d20
4 changed files with 105 additions and 23 deletions

View File

@ -2169,7 +2169,8 @@ namespace dxvk {
m_state.uav.views[uavId] = nullptr;
m_state.uav.mask.clr(uavId);
BindUnorderedAccessView(DxbcProgramType::ComputeShader, uavId, nullptr, ~0u);
if (!DirtyComputeUnorderedAccessView(uavId, true))
BindUnorderedAccessView(DxbcProgramType::ComputeShader, uavId, nullptr);
}
}
@ -2184,11 +2185,16 @@ namespace dxvk {
auto uav = static_cast<D3D11UnorderedAccessView*>(ppUnorderedAccessViews[i]);
auto ctr = pUAVInitialCounts ? pUAVInitialCounts[i] : ~0u;
if (m_state.uav.views[StartSlot + i] != uav || ctr != ~0u) {
if (ctr != ~0u && uav && uav->HasCounter())
UpdateUnorderedAccessViewCounter(uav, ctr);
if (m_state.uav.views[StartSlot + i] != uav) {
m_state.uav.views[StartSlot + i] = uav;
m_state.uav.mask.set(StartSlot + i, uav != nullptr);
BindUnorderedAccessView(DxbcProgramType::ComputeShader, StartSlot + i, uav, ctr);
if (!DirtyComputeUnorderedAccessView(StartSlot + i, !uav))
BindUnorderedAccessView(DxbcProgramType::ComputeShader, StartSlot + i, uav);
ResolveCsSrvHazards(uav);
}
}
@ -3241,6 +3247,28 @@ namespace dxvk {
}
template<typename ContextType>
void D3D11CommonContext<ContextType>::ApplyDirtyUnorderedAccessViews(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask) {
uint64_t bindMask = BoundMask.uavMask & DirtyMask.uavMask;
if (!bindMask)
return;
const auto& views = Stage == DxbcProgramType::ComputeShader
? m_state.uav.views
: m_state.om.uavs;
// Need to clear dirty bits before binding
DirtyMask.uavMask -= bindMask;
for (uint32_t slot : bit::BitMask(bindMask))
BindUnorderedAccessView(Stage, slot, views[slot].ptr());
}
template<typename ContextType>
void D3D11CommonContext<ContextType>::ApplyDirtyGraphicsBindings() {
auto dirtyMask = m_state.lazy.shadersDirty & m_state.lazy.shadersUsed;
@ -3271,6 +3299,7 @@ namespace dxvk {
ApplyDirtySamplers(stage, boundMask, dirtyMask);
ApplyDirtyConstantBuffers(stage, boundMask, dirtyMask);
ApplyDirtyShaderResources(stage, boundMask, dirtyMask);
ApplyDirtyUnorderedAccessViews(stage, boundMask, dirtyMask);
m_state.lazy.shadersDirty.clr(stage);
}
@ -3921,8 +3950,7 @@ namespace dxvk {
void D3D11CommonContext<ContextType>::BindUnorderedAccessView(
DxbcProgramType ShaderStage,
UINT Slot,
D3D11UnorderedAccessView* pUav,
UINT Counter) {
D3D11UnorderedAccessView* pUav) {
uint32_t uavSlotId = computeUavBinding(ShaderStage, Slot);
uint32_t ctrSlotId = computeUavCounterBinding(ShaderStage, Slot);
@ -3937,19 +3965,8 @@ namespace dxvk {
cCtrSlotId = ctrSlotId,
cStages = stages,
cBufferView = pUav->GetBufferView(),
cCounterView = pUav->GetCounterView(),
cCounterValue = Counter
cCounterView = pUav->GetCounterView()
] (DxvkContext* ctx) mutable {
if (cCounterView != nullptr && cCounterValue != ~0u) {
DxvkBufferSlice counterSlice(cCounterView);
ctx->updateBuffer(
counterSlice.buffer(),
counterSlice.offset(),
sizeof(uint32_t),
&cCounterValue);
}
ctx->bindResourceBufferView(cStages, cUavSlotId,
Forwarder::move(cBufferView));
ctx->bindResourceBufferView(cStages, cCtrSlotId,
@ -4432,6 +4449,19 @@ namespace dxvk {
}
template<typename ContextType>
bool D3D11CommonContext<ContextType>::DirtyComputeUnorderedAccessView(
uint32_t Slot,
bool IsNull) {
constexpr DxbcProgramType ShaderStage = DxbcProgramType::ComputeShader;
return DirtyBindingGeneric(ShaderStage,
m_state.lazy.bindingsUsed[ShaderStage].uavMask,
m_state.lazy.bindingsDirty[ShaderStage].uavMask,
uint64_t(1u) << Slot, IsNull);
}
template<typename ContextType>
void D3D11CommonContext<ContextType>::DiscardBuffer(
ID3D11Resource* pResource) {
@ -4854,7 +4884,7 @@ namespace dxvk {
if (CheckViewOverlap(pView, m_state.om.uavs[i].ptr())) {
m_state.om.uavs[i] = nullptr;
BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr, ~0u);
BindUnorderedAccessView(DxbcProgramType::PixelShader, i, nullptr);
}
}
}
@ -4962,7 +4992,7 @@ namespace dxvk {
: m_state.om.maxUav;
for (uint32_t i = 0; i < maxCount; i++)
BindUnorderedAccessView(Stage, i, views[i].ptr(), ~0u);
BindUnorderedAccessView(Stage, i, views[i].ptr());
}
@ -5192,10 +5222,13 @@ namespace dxvk {
ctr = pUAVInitialCounts ? pUAVInitialCounts[i - UAVStartSlot] : ~0u;
}
if (m_state.om.uavs[i] != uav || ctr != ~0u) {
if (ctr != ~0u && uav && uav->HasCounter())
UpdateUnorderedAccessViewCounter(uav, ctr);
if (m_state.om.uavs[i] != uav) {
m_state.om.uavs[i] = uav;
BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav, ctr);
BindUnorderedAccessView(DxbcProgramType::PixelShader, i, uav);
ResolveOmSrvHazards(uav);
if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL)
@ -5590,6 +5623,20 @@ namespace dxvk {
}
template<typename ContextType>
void D3D11CommonContext<ContextType>::UpdateUnorderedAccessViewCounter(
D3D11UnorderedAccessView* pUav,
uint32_t CounterValue) {
EmitCs([
cView = pUav->GetCounterView(),
cCounter = CounterValue
] (DxvkContext* ctx) {
ctx->updateBuffer(cView->buffer(),
cView->info().offset, sizeof(cCounter), &cCounter);
});
}
template<typename ContextType>
bool D3D11CommonContext<ContextType>::ValidateRenderTargets(
UINT NumViews,

View File

@ -819,6 +819,11 @@ namespace dxvk {
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask);
void ApplyDirtyUnorderedAccessViews(
DxbcProgramType Stage,
const DxbcBindingMask& BoundMask,
DxbcBindingMask& DirtyMask);
void ApplyDirtyGraphicsBindings();
void ApplyDirtyComputeBindings();
@ -904,8 +909,7 @@ namespace dxvk {
void BindUnorderedAccessView(
DxbcProgramType ShaderStage,
UINT Slot,
D3D11UnorderedAccessView* pUav,
UINT Counter);
D3D11UnorderedAccessView* pUav);
VkClearValue ConvertColorValue(
const FLOAT Color[4],
@ -957,6 +961,10 @@ namespace dxvk {
uint32_t Slot,
bool IsNull);
bool DirtyComputeUnorderedAccessView(
uint32_t Slot,
bool IsNull);
void DiscardBuffer(
ID3D11Resource* pResource);
@ -1115,6 +1123,10 @@ namespace dxvk {
UINT SrcDepthPitch,
UINT CopyFlags);
void UpdateUnorderedAccessViewCounter(
D3D11UnorderedAccessView* pUav,
uint32_t CounterValue);
bool ValidateRenderTargets(
UINT NumViews,
ID3D11RenderTargetView* const* ppRenderTargetViews,

View File

@ -1020,6 +1020,18 @@ namespace dxvk {
for (uint32_t index : bit::BitMask(cDirtyState[dxStage].srvMask[m]))
ctx->bindResourceImageView(vkStage, srvSlot + index + m * 64u, nullptr);
}
// Unbind all dirty unordered access views. Only consider compute
// here since we don't actually lazy-bind graphics UAVs.
if (dxStage == DxbcProgramType::ComputeShader) {
auto uavSlot = computeUavBinding(dxStage, 0);
auto ctrSlot = computeUavCounterBinding(dxStage, 0);
for (uint32_t index : bit::BitMask(cDirtyState[dxStage].uavMask)) {
ctx->bindResourceImageView(vkStage, uavSlot + index, nullptr);
ctx->bindResourceBufferView(vkStage, ctrSlot + index, nullptr);
}
}
}
});
@ -1045,6 +1057,13 @@ namespace dxvk {
}
}
if (stage == DxbcProgramType::ComputeShader) {
for (uint32_t index : bit::BitMask(dirtyState[stage].uavMask)) {
if (!m_state.uav.views[index].ptr())
dirtyState[stage].uavMask &= ~(uint64_t(1u) << index);
}
}
if (dirtyState[stage].empty())
m_state.lazy.shadersDirty.clr(stage);
}

View File

@ -43,6 +43,10 @@ namespace dxvk {
return m_info.BindFlags & Flags;
}
BOOL HasCounter() const {
return m_counterView != nullptr;
}
D3D11_RESOURCE_DIMENSION GetResourceType() const {
D3D11_RESOURCE_DIMENSION type;
m_resource->GetType(&type);