From bbc3b3fb2b5e26f402b19a1c9b134071f1d05cee Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 10 Jan 2019 16:58:01 +0100 Subject: [PATCH] [d3d11] Use multiDrawIndirect for subsequent indirect draw calls Significantly improves performance in AC:Odyssey when CPU bound. Only has an effect when no state changes between draw calls, and when the draw parameter buffer is tightly packed. --- src/d3d11/d3d11_cmd.h | 21 +++++++++++++++ src/d3d11/d3d11_context.cpp | 53 ++++++++++++++++++++++++++++++------- src/d3d11/d3d11_context.h | 22 +++++++++++++++ 3 files changed, 87 insertions(+), 9 deletions(-) create mode 100644 src/d3d11/d3d11_cmd.h diff --git a/src/d3d11/d3d11_cmd.h b/src/d3d11/d3d11_cmd.h new file mode 100644 index 00000000..48dd2587 --- /dev/null +++ b/src/d3d11/d3d11_cmd.h @@ -0,0 +1,21 @@ +#pragma once + +#include "d3d11_include.h" + +namespace dxvk { + + enum class D3D11CmdType { + DrawIndirect, + DrawIndirectIndexed, + }; + + struct D3D11CmdData { + D3D11CmdType type; + }; + + struct D3D11CmdDrawIndirectData : public D3D11CmdData { + uint32_t offset; + uint32_t count; + }; + +} \ No newline at end of file diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index dc6ced40..20a33124 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -18,7 +18,8 @@ namespace dxvk { m_multithread(this, false), m_device (Device), m_csFlags (CsFlags), - m_csChunk (AllocCsChunk()) { + m_csChunk (AllocCsChunk()), + m_cmdData (nullptr) { // Create default state objects. We won't ever return them // to the application, but we'll use them to apply state. Com defaultBlendState; @@ -1401,10 +1402,27 @@ namespace dxvk { SetDrawBuffer(pBufferForArgs); - EmitCs([cOffset = AlignedByteOffsetForArgs] - (DxvkContext* ctx) { - ctx->drawIndexedIndirect(cOffset, 1, 0); - }); + // If possible, batch up multiple indirect draw calls of + // the same type into one single multiDrawIndirect call + constexpr VkDeviceSize stride = sizeof(VkDrawIndexedIndirectCommand); + auto cmdData = static_cast(m_cmdData); + + bool useMultiDraw = cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed + && cmdData->offset + cmdData->count * stride == AlignedByteOffsetForArgs + && m_device->features().core.features.multiDrawIndirect; + + if (useMultiDraw) { + cmdData->count += 1; + } else { + cmdData = EmitCsCmd( + [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { + ctx->drawIndexedIndirect(data->offset, data->count, stride); + }); + + cmdData->type = D3D11CmdType::DrawIndirectIndexed; + cmdData->offset = AlignedByteOffsetForArgs; + cmdData->count = 1; + } } @@ -1414,11 +1432,28 @@ namespace dxvk { D3D10DeviceLock lock = LockContext(); SetDrawBuffer(pBufferForArgs); + + // If possible, batch up multiple indirect draw calls of + // the same type into one single multiDrawIndirect call + constexpr VkDeviceSize stride = sizeof(VkDrawIndirectCommand); + auto cmdData = static_cast(m_cmdData); + + bool useMultiDraw = cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed + && cmdData->offset + cmdData->count * stride == AlignedByteOffsetForArgs + && m_device->features().core.features.multiDrawIndirect; - EmitCs([cOffset = AlignedByteOffsetForArgs] - (DxvkContext* ctx) { - ctx->drawIndirect(cOffset, 1, 0); - }); + if (useMultiDraw) { + cmdData->count += 1; + } else { + cmdData = EmitCsCmd( + [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { + ctx->drawIndirect(data->offset, data->count, stride); + }); + + cmdData->type = D3D11CmdType::DrawIndirect; + cmdData->offset = AlignedByteOffsetForArgs; + cmdData->count = 1; + } } diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 43f1badf..fd49ae8e 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -7,6 +7,7 @@ #include "../d3d10/d3d10_multithread.h" #include "d3d11_annotation.h" +#include "d3d11_cmd.h" #include "d3d11_context_state.h" #include "d3d11_device_child.h" #include "d3d11_texture.h" @@ -662,6 +663,7 @@ namespace dxvk { Com m_defaultRasterizerState; D3D11ContextState m_state; + D3D11CmdData* m_cmdData; void ApplyInputLayout(); @@ -814,6 +816,8 @@ namespace dxvk { template void EmitCs(Cmd&& command) { + m_cmdData = nullptr; + if (!m_csChunk->push(command)) { EmitCsChunk(std::move(m_csChunk)); @@ -821,11 +825,29 @@ namespace dxvk { m_csChunk->push(command); } } + + template + M* EmitCsCmd(Cmd&& command, Args&&... args) { + M* data = m_csChunk->pushCmd( + command, std::forward(args)...); + + if (!data) { + EmitCsChunk(std::move(m_csChunk)); + + m_csChunk = AllocCsChunk(); + data = m_csChunk->pushCmd( + command, std::forward(args)...); + } + + m_cmdData = data; + return data; + } void FlushCsChunk() { if (m_csChunk->commandCount() != 0) { EmitCsChunk(std::move(m_csChunk)); m_csChunk = AllocCsChunk(); + m_cmdData = nullptr; } }