diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 97af10d3a..16529a217 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -1121,7 +1121,7 @@ namespace dxvk { } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { - ctx->drawIndexedIndirect(data->offset, data->count, data->stride); + ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true); }); cmdData->type = D3D11CmdType::DrawIndirectIndexed; @@ -1156,7 +1156,7 @@ namespace dxvk { } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { - ctx->drawIndirect(data->offset, data->count, data->stride); + ctx->drawIndirect(data->offset, data->count, data->stride, true); }); cmdData->type = D3D11CmdType::DrawIndirect; diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp index 051610167..5254f480f 100644 --- a/src/d3d11/d3d11_context_ext.cpp +++ b/src/d3d11/d3d11_context_ext.cpp @@ -53,7 +53,7 @@ namespace dxvk { cOffset = ByteOffsetForArgs, cStride = ByteStrideForArgs ] (DxvkContext* ctx) { - ctx->drawIndirect(cOffset, cCount, cStride); + ctx->drawIndirect(cOffset, cCount, cStride, false); }); } @@ -72,7 +72,7 @@ namespace dxvk { cOffset = ByteOffsetForArgs, cStride = ByteStrideForArgs ] (DxvkContext* ctx) { - ctx->drawIndexedIndirect(cOffset, cCount, cStride); + ctx->drawIndexedIndirect(cOffset, cCount, cStride, false); }); } diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index f35ea2fbf..3ccf3835c 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -937,18 +937,9 @@ namespace dxvk { void DxvkContext::drawIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride) { - if (this->commitGraphicsState()) { - auto descriptor = m_state.id.argBuffer.getDescriptor(); - - m_cmd->cmdDrawIndirect( - descriptor.buffer.buffer, - descriptor.buffer.offset + offset, - count, stride); - - if (unlikely(m_state.id.argBuffer.buffer()->hasGfxStores())) - accessDrawBuffer(offset, count, stride, sizeof(VkDrawIndirectCommand)); - } + uint32_t stride, + bool unroll) { + drawIndirectGeneric(offset, count, stride, unroll); } @@ -995,18 +986,9 @@ namespace dxvk { void DxvkContext::drawIndexedIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride) { - if (this->commitGraphicsState()) { - auto descriptor = m_state.id.argBuffer.getDescriptor(); - - m_cmd->cmdDrawIndexedIndirect( - descriptor.buffer.buffer, - descriptor.buffer.offset + offset, - count, stride); - - if (unlikely(m_state.id.argBuffer.buffer()->hasGfxStores())) - accessDrawBuffer(offset, count, stride, sizeof(VkDrawIndexedIndirectCommand)); - } + uint32_t stride, + bool unroll) { + drawIndirectGeneric(offset, count, stride, unroll); } @@ -1739,6 +1721,52 @@ namespace dxvk { } + template + void DxvkContext::drawIndirectGeneric( + VkDeviceSize offset, + uint32_t count, + uint32_t stride, + bool unroll) { + if (this->commitGraphicsState()) { + auto descriptor = m_state.id.argBuffer.getDescriptor(); + + if (unroll) { + // Need to do this check after initially setting up the pipeline + unroll = m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws) + && !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap); + } + + // If draws are merged but the pipeline has order-dependent stores, submit + // one draw at a time as well as barriers in between. Otherwise, keep the + // draws merged. + uint32_t step = unroll ? 1u : count; + + for (uint32_t i = 0; i < count; i += step) { + if (unlikely(i)) { + // Insert barrier after the first iteration + this->commitGraphicsState(); + } + + if (Indexed) { + m_cmd->cmdDrawIndexedIndirect(descriptor.buffer.buffer, + descriptor.buffer.offset + offset, step, stride); + } else { + m_cmd->cmdDrawIndirect(descriptor.buffer.buffer, + descriptor.buffer.offset + offset, step, stride); + } + + if (unlikely(m_state.id.argBuffer.buffer()->hasGfxStores())) { + accessDrawBuffer(offset, step, stride, Indexed + ? sizeof(VkDrawIndexedIndirectCommand) + : sizeof(VkDrawIndirectCommand)); + } + + offset += step * stride; + } + } + } + + void DxvkContext::resolveImage( const Rc& dstImage, const Rc& srcImage, diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 4b836d763..b27af8405 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -763,11 +763,14 @@ namespace dxvk { * \param [in] offset Draw buffer offset * \param [in] count Number of draws * \param [in] stride Stride between dispatch calls + * \param [in] unroll Whether to unroll multiple draws if + * there are any potential data dependencies between them. */ void drawIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride); + uint32_t stride, + bool unroll); /** * \brief Indirect draw call @@ -809,12 +812,15 @@ namespace dxvk { * \param [in] offset Draw buffer offset * \param [in] count Number of draws * \param [in] stride Stride between dispatch calls + * \param [in] unroll Whether to unroll multiple draws if + * there are any potential data dependencies between them. */ void drawIndexedIndirect( VkDeviceSize offset, uint32_t count, - uint32_t stride); - + uint32_t stride, + bool unroll); + /** * \brief Indirect indexed draw call * @@ -1589,6 +1595,13 @@ namespace dxvk { const Rc& buffer, VkDeviceSize offset); + template + void drawIndirectGeneric( + VkDeviceSize offset, + uint32_t count, + uint32_t stride, + bool unroll); + void resolveImageHw( const Rc& dstImage, const Rc& srcImage,