1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-19 05:52:11 +01:00

[d3d11] Optimize UAV binding

- UpdateBuffer is faster than ClearBuffer for small updates.
- We shouldn't dispatch *two* CS commands for each UAV, one is enough.
This commit is contained in:
Philip Rebohle 2018-09-27 16:50:34 +02:00
parent 518ab2ebdd
commit 161fb6215a
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
4 changed files with 26 additions and 63 deletions

View File

@ -2181,14 +2181,9 @@ namespace dxvk {
DxbcProgramType::ComputeShader, DxbcProgramType::ComputeShader,
m_state.cs.unorderedAccessViews, m_state.cs.unorderedAccessViews,
StartSlot, NumUAVs, StartSlot, NumUAVs,
ppUnorderedAccessViews); ppUnorderedAccessViews,
if (pUAVInitialCounts != nullptr) {
InitUnorderedAccessViewCounters(
NumUAVs, ppUnorderedAccessViews,
pUAVInitialCounts); pUAVInitialCounts);
} }
}
void STDMETHODCALLTYPE D3D11DeviceContext::CSGetShader( void STDMETHODCALLTYPE D3D11DeviceContext::CSGetShader(
@ -2289,17 +2284,12 @@ namespace dxvk {
// UAVs are made available to all shader stages in // UAVs are made available to all shader stages in
// the graphics pipeline even though this code may // the graphics pipeline even though this code may
// suggest that they are limited to the pixel shader. // suggest that they are limited to the pixel shader.
// This behaviour is only required for FL_11_1.
SetUnorderedAccessViews( SetUnorderedAccessViews(
DxbcProgramType::PixelShader, DxbcProgramType::PixelShader,
m_state.ps.unorderedAccessViews, m_state.ps.unorderedAccessViews,
UAVStartSlot, NumUAVs, UAVStartSlot, NumUAVs,
ppUnorderedAccessViews); ppUnorderedAccessViews,
pUAVInitialCounts);
if (pUAVInitialCounts != nullptr) {
InitUnorderedAccessViewCounters(NumUAVs,
ppUnorderedAccessViews, pUAVInitialCounts);
}
} }
BindFramebuffer(spillOnBind); BindFramebuffer(spillOnBind);
@ -2882,14 +2872,24 @@ namespace dxvk {
void D3D11DeviceContext::BindUnorderedAccessView( void D3D11DeviceContext::BindUnorderedAccessView(
UINT UavSlot, UINT UavSlot,
UINT CtrSlot, UINT CtrSlot,
D3D11UnorderedAccessView* pUav) { D3D11UnorderedAccessView* pUav,
UINT Counter) {
EmitCs([ EmitCs([
cUavSlotId = UavSlot, cUavSlotId = UavSlot,
cCtrSlotId = CtrSlot, cCtrSlotId = CtrSlot,
cImageView = pUav != nullptr ? pUav->GetImageView() : nullptr, cImageView = pUav != nullptr ? pUav->GetImageView() : nullptr,
cBufferView = pUav != nullptr ? pUav->GetBufferView() : nullptr, cBufferView = pUav != nullptr ? pUav->GetBufferView() : nullptr,
cCounterSlice = pUav != nullptr ? pUav->GetCounterSlice() : DxvkBufferSlice() cCounterSlice = pUav != nullptr ? pUav->GetCounterSlice() : DxvkBufferSlice(),
cCounterValue = Counter
] (DxvkContext* ctx) { ] (DxvkContext* ctx) {
if (cCounterSlice.defined() && cCounterValue != ~0u) {
ctx->updateBuffer(
cCounterSlice.buffer(),
cCounterSlice.offset(),
sizeof(uint32_t),
&cCounterValue);
}
ctx->bindResourceView (cUavSlotId, cImageView, cBufferView); ctx->bindResourceView (cUavSlotId, cImageView, cBufferView);
ctx->bindResourceBuffer (cCtrSlotId, cCounterSlice); ctx->bindResourceBuffer (cCtrSlotId, cCounterSlice);
}); });
@ -2989,7 +2989,8 @@ namespace dxvk {
D3D11UnorderedAccessBindings& Bindings, D3D11UnorderedAccessBindings& Bindings,
UINT StartSlot, UINT StartSlot,
UINT NumUAVs, UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews) { ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts) {
const uint32_t uavSlotId = computeResourceSlotId( const uint32_t uavSlotId = computeResourceSlotId(
ShaderStage, DxbcBindingType::UnorderedAccessView, ShaderStage, DxbcBindingType::UnorderedAccessView,
StartSlot); StartSlot);
@ -3003,7 +3004,8 @@ namespace dxvk {
if (Bindings[StartSlot + i] != uav) { if (Bindings[StartSlot + i] != uav) {
Bindings[StartSlot + i] = uav; Bindings[StartSlot + i] = uav;
BindUnorderedAccessView(uavSlotId + i, ctrSlotId + i, uav); BindUnorderedAccessView(uavSlotId + i, ctrSlotId + i, uav,
pUAVInitialCounts ? pUAVInitialCounts[i] : ~0u);
} }
} }
} }
@ -3028,31 +3030,6 @@ namespace dxvk {
} }
void D3D11DeviceContext::InitUnorderedAccessViewCounters(
UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts) {
for (uint32_t i = 0; i < NumUAVs; i++) {
auto uav = static_cast<D3D11UnorderedAccessView*>(ppUnorderedAccessViews[i]);
if (uav != nullptr) {
const DxvkBufferSlice counterSlice = uav->GetCounterSlice();
const D3D11UavCounter counterValue = { pUAVInitialCounts[i] };
if (counterSlice.defined() && counterValue.atomicCtr != 0xFFFFFFFFu) {
EmitCs([counterSlice, counterValue] (DxvkContext* ctx) {
ctx->clearBuffer(
counterSlice.buffer(),
counterSlice.offset(),
counterSlice.length(),
counterValue.atomicCtr);
});
}
}
}
}
void D3D11DeviceContext::GetConstantBuffers( void D3D11DeviceContext::GetConstantBuffers(
const D3D11ConstantBufferBindings& Bindings, const D3D11ConstantBufferBindings& Bindings,
UINT StartSlot, UINT StartSlot,
@ -3175,7 +3152,7 @@ namespace dxvk {
for (uint32_t i = 0; i < Bindings.size(); i++) { for (uint32_t i = 0; i < Bindings.size(); i++) {
BindUnorderedAccessView( BindUnorderedAccessView(
uavSlotId + i, ctrSlotId + i, uavSlotId + i, ctrSlotId + i,
Bindings[i].ptr()); Bindings[i].ptr(), ~0u);
} }
} }

View File

@ -701,7 +701,8 @@ namespace dxvk {
void BindUnorderedAccessView( void BindUnorderedAccessView(
UINT UavSlot, UINT UavSlot,
UINT CtrSlot, UINT CtrSlot,
D3D11UnorderedAccessView* pUav); D3D11UnorderedAccessView* pUav,
UINT Counter);
void DiscardBuffer( void DiscardBuffer(
D3D11Buffer* pBuffer); D3D11Buffer* pBuffer);
@ -734,18 +735,14 @@ namespace dxvk {
D3D11UnorderedAccessBindings& Bindings, D3D11UnorderedAccessBindings& Bindings,
UINT StartSlot, UINT StartSlot,
UINT NumUAVs, UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews); ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts);
void SetRenderTargets( void SetRenderTargets(
UINT NumViews, UINT NumViews,
ID3D11RenderTargetView* const* ppRenderTargetViews, ID3D11RenderTargetView* const* ppRenderTargetViews,
ID3D11DepthStencilView* pDepthStencilView); ID3D11DepthStencilView* pDepthStencilView);
void InitUnorderedAccessViewCounters(
UINT NumUAVs,
ID3D11UnorderedAccessView* const* ppUnorderedAccessViews,
const UINT* pUAVInitialCounts);
void GetConstantBuffers( void GetConstantBuffers(
const D3D11ConstantBufferBindings& Bindings, const D3D11ConstantBufferBindings& Bindings,
UINT StartSlot, UINT StartSlot,

View File

@ -1404,7 +1404,7 @@ namespace dxvk {
const auto& devInfo = m_dxvkAdapter->deviceProperties(); const auto& devInfo = m_dxvkAdapter->deviceProperties();
VkDeviceSize uavCounterSliceLength = align<VkDeviceSize>( VkDeviceSize uavCounterSliceLength = align<VkDeviceSize>(
sizeof(D3D11UavCounter), devInfo.limits.minStorageBufferOffsetAlignment); sizeof(uint32_t), devInfo.limits.minStorageBufferOffsetAlignment);
DxvkBufferCreateInfo uavCounterInfo; DxvkBufferCreateInfo uavCounterInfo;
uavCounterInfo.size = 4096 * uavCounterSliceLength; uavCounterInfo.size = 4096 * uavCounterSliceLength;

View File

@ -8,17 +8,6 @@ namespace dxvk {
class D3D11Device; class D3D11Device;
/**
* \brief UAV counter structure
*
* Data structure passed to shaders that use
* append/consume buffer functionality.
*/
struct D3D11UavCounter {
uint32_t atomicCtr;
};
/** /**
* \brief Unordered access view * \brief Unordered access view
* *