1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-23 19:54:16 +01:00

[dxvk] Rework graphics barrier tracking

Avoids having to insert redundant barriers when the app does UAV rendering.
This commit is contained in:
Philip Rebohle 2025-02-14 21:24:46 +01:00
parent 767bf80b23
commit e45ea9f3bb
5 changed files with 181 additions and 145 deletions

View File

@ -5170,7 +5170,8 @@ namespace dxvk {
void DxvkContext::spillRenderPass(bool suspend) { void DxvkContext::spillRenderPass(bool suspend) {
if (m_flags.test(DxvkContextFlag::GpRenderPassBound)) { if (m_flags.test(DxvkContextFlag::GpRenderPassBound)) {
m_flags.clr(DxvkContextFlag::GpRenderPassBound); m_flags.clr(DxvkContextFlag::GpRenderPassBound,
DxvkContextFlag::GpRenderPassSideEffects);
this->pauseTransformFeedback(); this->pauseTransformFeedback();
@ -5659,24 +5660,11 @@ namespace dxvk {
DxvkGraphicsPipelineFlags newFlags = newPipeline->flags(); DxvkGraphicsPipelineFlags newFlags = newPipeline->flags();
DxvkGraphicsPipelineFlags diffFlags = oldFlags ^ newFlags; DxvkGraphicsPipelineFlags diffFlags = oldFlags ^ newFlags;
DxvkGraphicsPipelineFlags hazardMask(
DxvkGraphicsPipelineFlag::HasTransformFeedback,
DxvkGraphicsPipelineFlag::HasStorageDescriptors);
m_state.gp.flags = newFlags; m_state.gp.flags = newFlags;
if ((diffFlags & hazardMask) != 0) { if (newFlags.any(DxvkGraphicsPipelineFlag::HasTransformFeedback,
// Force-update vertex/index buffers for hazard checks DxvkGraphicsPipelineFlag::HasStorageDescriptors))
m_flags.set(DxvkContextFlag::GpDirtyIndexBuffer, m_flags.set(DxvkContextFlag::GpRenderPassSideEffects);
DxvkContextFlag::GpDirtyVertexBuffers,
DxvkContextFlag::GpDirtyXfbBuffers,
DxvkContextFlag::DirtyDrawBuffer);
// This is necessary because we'll only do hazard
// tracking if the active pipeline has side effects
if (!m_barrierControl.test(DxvkBarrierControl::IgnoreGraphicsBarriers))
this->spillRenderPass(true);
}
if (diffFlags.test(DxvkGraphicsPipelineFlag::HasSampleMaskExport)) if (diffFlags.test(DxvkGraphicsPipelineFlag::HasSampleMaskExport))
m_flags.set(DxvkContextFlag::GpDirtyMultisampleState); m_flags.set(DxvkContextFlag::GpDirtyMultisampleState);
@ -6644,32 +6632,33 @@ namespace dxvk {
if (m_flags.test(DxvkContextFlag::GpDirtyFramebuffer)) if (m_flags.test(DxvkContextFlag::GpDirtyFramebuffer))
this->updateFramebuffer(); this->updateFramebuffer();
if (!m_flags.test(DxvkContextFlag::GpRenderPassBound)) if (m_flags.test(DxvkContextFlag::GpXfbActive)) {
this->startRenderPass();
if (m_state.gp.flags.any(
DxvkGraphicsPipelineFlag::HasStorageDescriptors,
DxvkGraphicsPipelineFlag::HasTransformFeedback)) {
this->commitGraphicsBarriers<Indexed, Indirect, false>();
// If transform feedback is active and there is a chance that we might // If transform feedback is active and there is a chance that we might
// need to rebind the pipeline, we need to end transform feedback and // need to rebind the pipeline, we need to end transform feedback and
// issue a barrier. End the render pass to do that. Ignore dirty vertex // issue a barrier. End the render pass to do that. Ignore dirty vertex
// buffers here since non-dynamic vertex strides are such an extreme // buffers here since non-dynamic vertex strides are such an extreme
// edge case that it's likely irrelevant in practice. // edge case that it's likely irrelevant in practice.
if (m_flags.test(DxvkContextFlag::GpXfbActive) if (m_flags.any(DxvkContextFlag::GpDirtyPipelineState,
&& m_flags.any(DxvkContextFlag::GpDirtyPipelineState, DxvkContextFlag::GpDirtySpecConstants,
DxvkContextFlag::GpDirtySpecConstants)) DxvkContextFlag::GpDirtyXfbBuffers))
this->spillRenderPass(true); this->spillRenderPass(true);
// This can only happen if the render pass was active before,
// so we'll never begin the render pass twice in one draw
if (!m_flags.test(DxvkContextFlag::GpRenderPassBound))
this->startRenderPass();
this->commitGraphicsBarriers<Indexed, Indirect, true>();
} }
if (m_flags.test(DxvkContextFlag::GpRenderPassSideEffects)) {
// If either the current pipeline has side effects or if there are pending
// writes from previous draws, check for hazards. This also tracks any
// resources written for the first time, but does not emit any barriers
// on its own so calling this outside a render pass is safe. This also
// implicitly dirties all state for which we need to track resource access.
if (this->checkGraphicsHazards<Indexed, Indirect>())
this->spillRenderPass(true);
}
// Start the render pass. This must happen before any render state
// is set up so that we can safely use secondary command buffers.
if (!m_flags.test(DxvkContextFlag::GpRenderPassBound))
this->startRenderPass();
if (m_flags.test(DxvkContextFlag::GpDirtyIndexBuffer) && Indexed) { if (m_flags.test(DxvkContextFlag::GpDirtyIndexBuffer) && Indexed) {
if (unlikely(!this->updateIndexBufferBinding())) if (unlikely(!this->updateIndexBufferBinding()))
return false; return false;
@ -6771,81 +6760,122 @@ namespace dxvk {
} }
} }
} }
template<VkPipelineBindPoint BindPoint>
bool DxvkContext::checkResourceHazards(
const DxvkBindingLayout& layout,
uint32_t setMask) {
constexpr bool IsGraphics = BindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS;
// For graphics, if we are not currently inside a render pass, we'll issue
// a barrier anyway so checking hazards is not meaningful. Avoid some overhead
// and only track written resources in that case.
bool requiresBarrier = IsGraphics && !m_flags.test(DxvkContextFlag::GpRenderPassBound);
for (auto setIndex : bit::BitMask(setMask)) {
uint32_t bindingCount = layout.getBindingCount(setIndex);
for (uint32_t j = 0; j < bindingCount; j++) {
const DxvkBindingInfo& binding = layout.getBinding(setIndex, j);
const DxvkShaderResourceSlot& slot = m_rc[binding.resourceBinding];
// Skip read-only bindings if we already know that we need a barrier
if (requiresBarrier && !(binding.access & vk::AccessWriteMask))
continue;
switch (binding.descriptorType) {
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: {
if (slot.bufferView) {
if (!IsGraphics || slot.bufferView->buffer()->hasGfxStores())
requiresBarrier |= checkBufferViewBarrier<false>(slot.bufferView, util::pipelineStages(binding.stage), binding.access);
else if (binding.access & vk::AccessWriteMask)
requiresBarrier |= !slot.bufferView->buffer()->trackGfxStores();
}
} break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: {
if (slot.bufferView && (!IsGraphics || slot.bufferView->buffer()->hasGfxStores()))
requiresBarrier |= checkBufferViewBarrier<false>(slot.bufferView, util::pipelineStages(binding.stage), binding.access);
} break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
if (slot.bufferSlice.length() && (!IsGraphics || slot.bufferSlice.buffer()->hasGfxStores()))
requiresBarrier |= checkBufferBarrier<false>(slot.bufferSlice, util::pipelineStages(binding.stage), binding.access);
} break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
if (slot.bufferSlice.length()) {
if (!IsGraphics || slot.bufferSlice.buffer()->hasGfxStores())
requiresBarrier |= checkBufferBarrier<false>(slot.bufferSlice, util::pipelineStages(binding.stage), binding.access);
else if (binding.access & vk::AccessWriteMask)
requiresBarrier |= !slot.bufferSlice.buffer()->trackGfxStores();
}
} break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
if (slot.imageView) {
if (!IsGraphics || slot.imageView->image()->hasGfxStores())
requiresBarrier |= checkImageViewBarrier<false>(slot.imageView, util::pipelineStages(binding.stage), binding.access);
else if (binding.access & vk::AccessWriteMask)
requiresBarrier |= !slot.imageView->image()->trackGfxStores();
}
} break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
if (slot.imageView && (!IsGraphics || slot.imageView->image()->hasGfxStores()))
requiresBarrier |= checkImageViewBarrier<false>(slot.imageView, util::pipelineStages(binding.stage), binding.access);
} break;
default:
/* nothing to do */;
}
// We don't need to do any extra tracking for compute here, exit early
if (requiresBarrier && !IsGraphics)
return true;
}
}
return requiresBarrier;
}
template<bool Indexed, bool Indirect, bool DoEmit> template<bool Indexed, bool Indirect>
void DxvkContext::commitGraphicsBarriers() { bool DxvkContext::checkGraphicsHazards() {
if (m_barrierControl.test(DxvkBarrierControl::IgnoreGraphicsBarriers)) if (m_barrierControl.test(DxvkBarrierControl::IgnoreGraphicsBarriers))
return; return false;
constexpr auto storageBufferAccess = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT; // Check shader resources on every draw to handle WAW hazards, and to make
constexpr auto storageImageAccess = VK_ACCESS_SHADER_WRITE_BIT; // sure that writes are handled properly. If the pipeline does not have any
// storage descriptors, we only need to check dirty resources.
const auto& layout = m_state.gp.pipeline->getBindings()->layout();
bool requiresBarrier = false; uint32_t setMask = layout.getSetMask();
// Check the draw buffer for indirect draw calls if (!m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasStorageDescriptors))
if (m_flags.test(DxvkContextFlag::DirtyDrawBuffer) && Indirect) { setMask &= m_descriptorState.getDirtyGraphicsSets();
std::array<DxvkBufferSlice*, 2> slices = {{
&m_state.id.argBuffer,
&m_state.id.cntBuffer,
}};
for (uint32_t i = 0; i < slices.size() && !requiresBarrier; i++) { bool requiresBarrier = checkResourceHazards<VK_PIPELINE_BIND_POINT_GRAPHICS>(layout, setMask);
if ((slices[i]->length())
&& (slices[i]->buffer()->info().access & storageBufferAccess)) {
requiresBarrier = this->checkBufferBarrier<DoEmit>(*slices[i],
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
}
}
}
// Read-only stage, so we only have to check this if // Transform feedback buffer writes won't overlap, so we also only need to
// the bindngs have actually changed between draws // check those if dirty.
if (m_flags.test(DxvkContextFlag::GpDirtyIndexBuffer) && !requiresBarrier && Indexed) {
const auto& indexBufferSlice = m_state.vi.indexBuffer;
if ((indexBufferSlice.length())
&& (indexBufferSlice.bufferInfo().access & storageBufferAccess)) {
requiresBarrier = this->checkBufferBarrier<DoEmit>(indexBufferSlice,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_INDEX_READ_BIT);
}
}
// Same here, also ignore unused vertex bindings
if (m_flags.test(DxvkContextFlag::GpDirtyVertexBuffers)) {
uint32_t bindingCount = m_state.gp.state.il.bindingCount();
for (uint32_t i = 0; i < bindingCount && !requiresBarrier; i++) {
uint32_t binding = m_state.gp.state.ilBindings[i].binding();
const auto& vertexBufferSlice = m_state.vi.vertexBuffers[binding];
if ((vertexBufferSlice.length())
&& (vertexBufferSlice.bufferInfo().access & storageBufferAccess)) {
requiresBarrier = this->checkBufferBarrier<DoEmit>(vertexBufferSlice,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT);
}
}
}
// Transform feedback buffer writes won't overlap, so we
// also only need to check those when they are rebound
if (m_flags.test(DxvkContextFlag::GpDirtyXfbBuffers) if (m_flags.test(DxvkContextFlag::GpDirtyXfbBuffers)
&& m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) { && m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) {
for (uint32_t i = 0; i < MaxNumXfbBuffers && !requiresBarrier; i++) { for (uint32_t i = 0; i < MaxNumXfbBuffers; i++) {
const auto& xfbBufferSlice = m_state.xfb.buffers[i]; const auto& xfbBufferSlice = m_state.xfb.buffers[i];
const auto& xfbCounterSlice = m_state.xfb.activeCounters[i]; const auto& xfbCounterSlice = m_state.xfb.activeCounters[i];
if (xfbBufferSlice.length()) { if (xfbBufferSlice.length()) {
requiresBarrier = this->checkBufferBarrier<DoEmit>(xfbBufferSlice, requiresBarrier |= !xfbBufferSlice.buffer()->trackGfxStores();
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, requiresBarrier |= checkBufferBarrier<false>(xfbBufferSlice,
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT); VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT);
if (xfbCounterSlice.length()) { if (xfbCounterSlice.length()) {
requiresBarrier |= this->checkBufferBarrier<DoEmit>(xfbCounterSlice, requiresBarrier |= !xfbCounterSlice.buffer()->trackGfxStores();
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | requiresBarrier |= checkBufferBarrier<false>(xfbCounterSlice,
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT); VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT);
@ -6854,56 +6884,53 @@ namespace dxvk {
} }
} }
// Check shader resources on every draw to handle WAW hazards // From now on, we only have read-only resources to check and can
auto layout = m_state.gp.pipeline->getBindings()->layout(); // exit early if we find a hazard.
if (requiresBarrier)
return true;
for (uint32_t i = 0; i < DxvkDescriptorSets::SetCount && !requiresBarrier; i++) { // Check the draw buffer for indirect draw calls
uint32_t bindingCount = layout.getBindingCount(i); if (m_flags.test(DxvkContextFlag::DirtyDrawBuffer) && Indirect) {
std::array<DxvkBufferSlice*, 2> slices = {{
&m_state.id.argBuffer,
&m_state.id.cntBuffer,
}};
for (uint32_t j = 0; j < bindingCount && !requiresBarrier; j++) { for (uint32_t i = 0; i < slices.size(); i++) {
const DxvkBindingInfo& binding = layout.getBinding(i, j); if (slices[i]->length() && slices[i]->buffer()->hasGfxStores()) {
const DxvkShaderResourceSlot& slot = m_rc[binding.resourceBinding]; if (checkBufferBarrier<false>(*slices[i], VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT))
return true;
switch (binding.descriptorType) {
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
if ((slot.bufferSlice.length())
&& (slot.bufferSlice.bufferInfo().access & storageBufferAccess)) {
requiresBarrier = this->checkBufferBarrier<DoEmit>(slot.bufferSlice,
util::pipelineStages(binding.stage), binding.access);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
if ((slot.bufferView != nullptr)
&& (slot.bufferView->buffer()->info().access & storageBufferAccess)) {
requiresBarrier = this->checkBufferViewBarrier<DoEmit>(slot.bufferView,
util::pipelineStages(binding.stage), binding.access);
}
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
if ((slot.imageView != nullptr)
&& (slot.imageView->image()->info().access & storageImageAccess)) {
requiresBarrier = this->checkImageViewBarrier<DoEmit>(slot.imageView,
util::pipelineStages(binding.stage), binding.access);
}
break;
default:
/* nothing to do */;
} }
} }
} }
// External subpass dependencies serve as full memory // Read-only stage, so we only have to check this if
// and execution barriers, so we can use this to allow // the bindngs have actually changed between draws
// inter-stage synchronization. if (m_flags.test(DxvkContextFlag::GpDirtyIndexBuffer) && Indexed) {
if (requiresBarrier) const auto& indexBufferSlice = m_state.vi.indexBuffer;
this->spillRenderPass(true);
if (indexBufferSlice.length() && indexBufferSlice.buffer()->hasGfxStores()) {
if (checkBufferBarrier<false>(indexBufferSlice, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_INDEX_READ_BIT))
return true;
}
}
// Same here, also ignore unused vertex bindings
if (m_flags.test(DxvkContextFlag::GpDirtyVertexBuffers)) {
uint32_t bindingCount = m_state.gp.state.il.bindingCount();
for (uint32_t i = 0; i < bindingCount; i++) {
uint32_t binding = m_state.gp.state.ilBindings[i].binding();
const auto& vertexBufferSlice = m_state.vi.vertexBuffers[binding];
if (vertexBufferSlice.length() && vertexBufferSlice.buffer()->hasGfxStores()) {
if (checkBufferBarrier<false>(vertexBufferSlice, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT))
return true;
}
}
}
return false;
} }

View File

@ -1757,13 +1757,16 @@ namespace dxvk {
template<bool Indexed, bool Indirect> template<bool Indexed, bool Indirect>
bool commitGraphicsState(); bool commitGraphicsState();
template<VkPipelineBindPoint BindPoint>
bool checkResourceHazards(
const DxvkBindingLayout& layout,
uint32_t setMask);
template<bool DoEmit> template<bool DoEmit>
void commitComputeBarriers(); void commitComputeBarriers();
void commitComputePostBarriers(); template<bool Indexed, bool Indirect>
bool checkGraphicsHazards();
template<bool Indexed, bool Indirect, bool DoEmit>
void commitGraphicsBarriers();
template<bool DoEmit> template<bool DoEmit>
bool checkBufferBarrier( bool checkBufferBarrier(

View File

@ -20,10 +20,11 @@ namespace dxvk {
* of the graphics and compute pipelines * of the graphics and compute pipelines
* has changed and/or needs to be updated. * has changed and/or needs to be updated.
*/ */
enum class DxvkContextFlag : uint32_t { enum class DxvkContextFlag : uint64_t {
GpRenderPassBound, ///< Render pass is currently bound GpRenderPassBound, ///< Render pass is currently bound
GpRenderPassSuspended, ///< Render pass is currently suspended GpRenderPassSuspended, ///< Render pass is currently suspended
GpRenderPassSecondaryCmd, ///< Render pass uses secondary command buffer GpRenderPassSecondaryCmd, ///< Render pass uses secondary command buffer
GpRenderPassSideEffects, ///< Render pass has side effects
GpXfbActive, ///< Transform feedback is enabled GpXfbActive, ///< Transform feedback is enabled
GpDirtyFramebuffer, ///< Framebuffer binding is out of date GpDirtyFramebuffer, ///< Framebuffer binding is out of date
GpDirtyPipeline, ///< Graphics pipeline binding is out of date GpDirtyPipeline, ///< Graphics pipeline binding is out of date
@ -59,7 +60,7 @@ namespace dxvk {
Count Count
}; };
static_assert(uint32_t(DxvkContextFlag::Count) <= 32u); static_assert(uint32_t(DxvkContextFlag::Count) <= 64u);
using DxvkContextFlags = Flags<DxvkContextFlag>; using DxvkContextFlags = Flags<DxvkContextFlag>;

View File

@ -590,4 +590,4 @@ namespace dxvk {
}; };
} }

View File

@ -40,6 +40,11 @@ namespace dxvk::vk {
= VK_ACCESS_HOST_READ_BIT = VK_ACCESS_HOST_READ_BIT
| VK_ACCESS_HOST_WRITE_BIT; | VK_ACCESS_HOST_WRITE_BIT;
constexpr static VkAccessFlags AccessGfxSideEffectMask
= VK_ACCESS_SHADER_WRITE_BIT
| VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT
| VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
constexpr static VkPipelineStageFlags StageDeviceMask constexpr static VkPipelineStageFlags StageDeviceMask
= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
| VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT