1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2024-12-11 19:24:11 +01:00

[d3d11] Use multiDrawIndirect for subsequent indirect draw calls

Significantly improves performance in AC:Odyssey when CPU bound.
Only has an effect when no state changes between draw calls, and
when the draw parameter buffer is tightly packed.
This commit is contained in:
Philip Rebohle 2019-01-10 16:58:01 +01:00
parent ad6233f74c
commit bbc3b3fb2b
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
3 changed files with 87 additions and 9 deletions

21
src/d3d11/d3d11_cmd.h Normal file
View File

@ -0,0 +1,21 @@
#pragma once
#include "d3d11_include.h"
namespace dxvk {
enum class D3D11CmdType {
DrawIndirect,
DrawIndirectIndexed,
};
struct D3D11CmdData {
D3D11CmdType type;
};
struct D3D11CmdDrawIndirectData : public D3D11CmdData {
uint32_t offset;
uint32_t count;
};
}

View File

@ -18,7 +18,8 @@ namespace dxvk {
m_multithread(this, false),
m_device (Device),
m_csFlags (CsFlags),
m_csChunk (AllocCsChunk()) {
m_csChunk (AllocCsChunk()),
m_cmdData (nullptr) {
// Create default state objects. We won't ever return them
// to the application, but we'll use them to apply state.
Com<ID3D11BlendState> defaultBlendState;
@ -1401,10 +1402,27 @@ namespace dxvk {
SetDrawBuffer(pBufferForArgs);
EmitCs([cOffset = AlignedByteOffsetForArgs]
(DxvkContext* ctx) {
ctx->drawIndexedIndirect(cOffset, 1, 0);
});
// If possible, batch up multiple indirect draw calls of
// the same type into one single multiDrawIndirect call
constexpr VkDeviceSize stride = sizeof(VkDrawIndexedIndirectCommand);
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
bool useMultiDraw = cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed
&& cmdData->offset + cmdData->count * stride == AlignedByteOffsetForArgs
&& m_device->features().core.features.multiDrawIndirect;
if (useMultiDraw) {
cmdData->count += 1;
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndexedIndirect(data->offset, data->count, stride);
});
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
}
}
@ -1414,11 +1432,28 @@ namespace dxvk {
D3D10DeviceLock lock = LockContext();
SetDrawBuffer(pBufferForArgs);
// If possible, batch up multiple indirect draw calls of
// the same type into one single multiDrawIndirect call
constexpr VkDeviceSize stride = sizeof(VkDrawIndirectCommand);
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
bool useMultiDraw = cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed
&& cmdData->offset + cmdData->count * stride == AlignedByteOffsetForArgs
&& m_device->features().core.features.multiDrawIndirect;
EmitCs([cOffset = AlignedByteOffsetForArgs]
(DxvkContext* ctx) {
ctx->drawIndirect(cOffset, 1, 0);
});
if (useMultiDraw) {
cmdData->count += 1;
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndirect(data->offset, data->count, stride);
});
cmdData->type = D3D11CmdType::DrawIndirect;
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
}
}

View File

@ -7,6 +7,7 @@
#include "../d3d10/d3d10_multithread.h"
#include "d3d11_annotation.h"
#include "d3d11_cmd.h"
#include "d3d11_context_state.h"
#include "d3d11_device_child.h"
#include "d3d11_texture.h"
@ -662,6 +663,7 @@ namespace dxvk {
Com<D3D11RasterizerState> m_defaultRasterizerState;
D3D11ContextState m_state;
D3D11CmdData* m_cmdData;
void ApplyInputLayout();
@ -814,6 +816,8 @@ namespace dxvk {
template<typename Cmd>
void EmitCs(Cmd&& command) {
m_cmdData = nullptr;
if (!m_csChunk->push(command)) {
EmitCsChunk(std::move(m_csChunk));
@ -821,11 +825,29 @@ namespace dxvk {
m_csChunk->push(command);
}
}
template<typename M, typename Cmd, typename... Args>
M* EmitCsCmd(Cmd&& command, Args&&... args) {
M* data = m_csChunk->pushCmd<M, Cmd, Args...>(
command, std::forward<Args>(args)...);
if (!data) {
EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk();
data = m_csChunk->pushCmd<M, Cmd, Args...>(
command, std::forward<Args>(args)...);
}
m_cmdData = data;
return data;
}
void FlushCsChunk() {
if (m_csChunk->commandCount() != 0) {
EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk();
m_cmdData = nullptr;
}
}