1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-27 04:54:15 +01:00

[dxvk] Implement draw batching via VK_EXT_multi_draw

This commit is contained in:
Philip Rebohle 2025-02-21 15:52:32 +01:00 committed by Philip Rebohle
parent 7e503fa053
commit 016f05a770
7 changed files with 186 additions and 80 deletions

View File

@ -3578,12 +3578,7 @@ namespace dxvk {
EmitCsCmd<VkDrawIndirectCommand>(D3D11CmdType::Draw, 1u,
[] (DxvkContext* ctx, const VkDrawIndirectCommand* draws, size_t count) {
for (size_t i = 0; i < count; i++) {
ctx->draw(draws[i].vertexCount,
draws[i].instanceCount,
draws[i].firstVertex,
draws[i].firstInstance);
}
ctx->draw(count, draws);
});
new (m_csData->first()) VkDrawIndirectCommand(draw);
@ -3608,13 +3603,7 @@ namespace dxvk {
EmitCsCmd<VkDrawIndexedIndirectCommand>(D3D11CmdType::DrawIndexed, 1u,
[] (DxvkContext* ctx, const VkDrawIndexedIndirectCommand* draws, size_t count) {
for (size_t i = 0; i < count; i++) {
ctx->drawIndexed(draws[i].indexCount,
draws[i].instanceCount,
draws[i].firstIndex,
draws[i].vertexOffset,
draws[i].firstInstance);
}
ctx->drawIndexed(count, draws);
});
new (m_csData->first()) VkDrawIndexedIndirectCommand(draw);

View File

@ -1312,7 +1312,11 @@ namespace dxvk {
for (uint32_t i = 0; i < cViews.size(); i++)
ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, Rc<DxvkImageView>(cViews[i]));
ctx->draw(3, 1, 0, 0);
VkDrawIndirectCommand draw = { };
draw.vertexCount = 3u;
draw.instanceCount = 1u;
ctx->draw(1, &draw);
for (uint32_t i = 0; i < cViews.size(); i++)
ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, nullptr);

View File

@ -2890,9 +2890,12 @@ namespace dxvk {
// Tests on Windows show that D3D9 does not do non-indexed instanced draws.
ctx->draw(
vertexCount, 1,
cStartVertex, 0);
VkDrawIndirectCommand draw = { };
draw.vertexCount = vertexCount;
draw.instanceCount = 1u;
draw.firstVertex = cStartVertex;
ctx->draw(1u, &draw);
});
return D3D_OK;
@ -2939,10 +2942,13 @@ namespace dxvk {
ApplyPrimitiveType(ctx, cPrimType);
ctx->drawIndexed(
drawInfo.vertexCount, drawInfo.instanceCount,
cStartIndex,
cBaseVertexIndex, 0);
VkDrawIndexedIndirectCommand draw = { };
draw.indexCount = drawInfo.vertexCount;
draw.instanceCount = drawInfo.instanceCount;
draw.firstIndex = cStartIndex;
draw.vertexOffset = cBaseVertexIndex;
ctx->drawIndexed(1u, &draw);
});
return D3D_OK;
@ -2981,11 +2987,12 @@ namespace dxvk {
ApplyPrimitiveType(ctx, cPrimType);
// Tests on Windows show that D3D9 does not do non-indexed instanced draws.
VkDrawIndirectCommand draw = { };
draw.vertexCount = cVertexCount;
draw.instanceCount = 1u;
ctx->bindVertexBuffer(0, std::move(cBufferSlice), cStride);
ctx->draw(
cVertexCount, 1,
0, 0);
ctx->draw(1u, &draw);
ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
});
@ -3045,12 +3052,13 @@ namespace dxvk {
ApplyPrimitiveType(ctx, cPrimType);
VkDrawIndexedIndirectCommand draw = { };
draw.indexCount = drawInfo.vertexCount;
draw.instanceCount = drawInfo.instanceCount;
ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride);
ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType);
ctx->drawIndexed(
drawInfo.vertexCount, drawInfo.instanceCount,
0,
0, 0);
ctx->drawIndexed(1u, &draw);
ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32);
});
@ -3162,11 +3170,14 @@ namespace dxvk {
// to avoid val errors / UB.
ctx->bindShader<VK_SHADER_STAGE_FRAGMENT_BIT>(nullptr);
VkDrawIndirectCommand draw = { };
draw.vertexCount = drawInfo.vertexCount;
draw.instanceCount = drawInfo.instanceCount;
draw.firstVertex = cStartIndex;
ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(std::move(shader));
ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), std::move(cBufferSlice));
ctx->draw(
drawInfo.vertexCount, drawInfo.instanceCount,
cStartIndex, 0);
ctx->draw(1u, &draw);
ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), DxvkBufferSlice());
ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(nullptr);
});

View File

@ -49,6 +49,11 @@ namespace dxvk {
if (m_device->features().khrMaintenance5.maintenance5)
m_features.set(DxvkContextFeature::IndexBufferRobustness);
// Check whether we can batch direct draws
if (m_device->features().extMultiDraw.multiDraw
&& m_device->properties().extMultiDraw.maxMultiDrawCount >= DirectMultiDrawBatchSize)
m_features.set(DxvkContextFeature::DirectMultiDraw);
// Add a fast path to query debug utils support
if (m_device->isDebugEnabled())
m_features.set(DxvkContextFeature::DebugUtils);
@ -922,15 +927,9 @@ namespace dxvk {
void DxvkContext::draw(
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance) {
if (this->commitGraphicsState<false, false>()) {
m_cmd->cmdDraw(
vertexCount, instanceCount,
firstVertex, firstInstance);
}
uint32_t count,
const VkDrawIndirectCommand* draws) {
drawGeneric<false>(count, draws);
}
@ -953,17 +952,9 @@ namespace dxvk {
void DxvkContext::drawIndexed(
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance) {
if (this->commitGraphicsState<true, false>()) {
m_cmd->cmdDrawIndexed(
indexCount, instanceCount,
firstIndex, vertexOffset,
firstInstance);
}
uint32_t count,
const VkDrawIndexedIndirectCommand* draws) {
drawGeneric<true>(count, draws);
}
@ -1689,6 +1680,116 @@ namespace dxvk {
}
template<bool Indexed, typename T>
void DxvkContext::drawGeneric(
uint32_t count,
const T* draws) {
if (this->commitGraphicsState<Indexed, false>()) {
if (count == 1u) {
// Most common case, just emit a single draw
if constexpr (Indexed) {
m_cmd->cmdDrawIndexed(draws->indexCount, draws->instanceCount,
draws->firstIndex, draws->vertexOffset, draws->firstInstance);
} else {
m_cmd->cmdDraw(draws->vertexCount, draws->instanceCount,
draws->firstVertex, draws->firstInstance);
}
} else if (unlikely(needsDrawBarriers())) {
// If the current pipeline has storage resource hazards,
// unroll draws and insert a barrier after each one.
for (uint32_t i = 0; i < count; i++) {
if (i)
this->commitGraphicsState<Indexed, false>();
if constexpr (Indexed) {
m_cmd->cmdDrawIndexed(draws[i].indexCount, draws[i].instanceCount,
draws[i].firstIndex, draws[i].vertexOffset, draws[i].firstInstance);
} else {
m_cmd->cmdDraw(draws[i].vertexCount, draws[i].instanceCount,
draws[i].firstVertex, draws[i].firstInstance);
}
}
} else {
using MultiDrawInfo = std::conditional_t<Indexed,
VkMultiDrawIndexedInfoEXT, VkMultiDrawInfoEXT>;
// Intentially don't initialize this; we'll probably not use
// the full batch size anyway, so doing so would be wasteful.
std::array<MultiDrawInfo, DirectMultiDrawBatchSize> batch;
uint32_t instanceCount = 0u;
uint32_t instanceIndex = 0u;
uint32_t batchSize = 0u;
for (uint32_t i = 0; i < count; i++) {
if (!batchSize) {
instanceCount = draws[i].instanceCount;
instanceIndex = draws[i].firstInstance;
}
if constexpr (Indexed) {
auto& drawInfo = batch[batchSize++];
drawInfo.firstIndex = draws[i].firstIndex;
drawInfo.indexCount = draws[i].indexCount;
drawInfo.vertexOffset = draws[i].vertexOffset;
} else {
auto& drawInfo = batch[batchSize++];
drawInfo.firstVertex = draws[i].firstVertex;
drawInfo.vertexCount = draws[i].vertexCount;
}
bool emitDraw = i + 1u == count || batchSize == DirectMultiDrawBatchSize;
if (!emitDraw) {
const auto& next = draws[i + 1u];
emitDraw = instanceCount != next.instanceCount
|| instanceIndex != next.firstInstance;
}
if (emitDraw) {
if (m_features.test(DxvkContextFeature::DirectMultiDraw)) {
if constexpr (Indexed) {
m_cmd->cmdDrawMultiIndexed(batchSize, batch.data(),
instanceCount, instanceIndex);
} else {
m_cmd->cmdDrawMulti(batchSize, batch.data(),
instanceCount, instanceIndex);
}
} else {
// This path only really exists for consistency reasons; all drivers
// we care about support MultiDraw natively, but debug tools may not.
if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) {
const char* procName = Indexed ? "vkCmdDrawMultiIndexedEXT" : "vkCmdDrawMultiEXT";
m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer,
vk::makeLabel(0u, str::format(procName, "(", batchSize, ")").c_str()));
}
for (uint32_t i = 0; i < batchSize; i++) {
const auto& entry = batch[i];
if constexpr (Indexed) {
m_cmd->cmdDrawIndexed(entry.indexCount, instanceCount,
entry.firstIndex, entry.vertexOffset, instanceIndex);
} else {
m_cmd->cmdDraw(entry.vertexCount, instanceCount,
entry.firstVertex, instanceIndex);
}
}
if (unlikely(m_features.test(DxvkContextFeature::DebugUtils)))
m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer);
}
batchSize = 0u;
}
}
}
}
}
template<bool Indexed>
void DxvkContext::drawIndirectGeneric(
VkDeviceSize offset,
@ -1698,11 +1799,8 @@ namespace dxvk {
if (this->commitGraphicsState<Indexed, true>()) {
auto descriptor = m_state.id.argBuffer.getDescriptor();
if (unroll) {
// Need to do this check after initially setting up the pipeline
unroll = m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
&& !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
}
if (unroll)
unroll = needsDrawBarriers();
// If draws are merged but the pipeline has order-dependent stores, submit
// one draw at a time as well as barriers in between. Otherwise, keep the
@ -2854,6 +2952,12 @@ namespace dxvk {
}
bool DxvkContext::needsDrawBarriers() {
return m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
&& !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
}
void DxvkContext::beginRenderPassDebugRegion() {
bool hasColorAttachments = false;
bool hasDepthAttachment = m_state.om.renderTargets.depth.view != nullptr;

View File

@ -32,6 +32,8 @@ namespace dxvk {
class DxvkContext : public RcObject {
constexpr static VkDeviceSize MaxDiscardSizeInRp = 256u << 10u;
constexpr static VkDeviceSize MaxDiscardSize = 16u << 10u;
constexpr static uint32_t DirectMultiDrawBatchSize = 256u;
public:
DxvkContext(const Rc<DxvkDevice>& device);
@ -744,16 +746,12 @@ namespace dxvk {
/**
* \brief Draws primitive without using an index buffer
*
* \param [in] vertexCount Number of vertices to draw
* \param [in] instanceCount Number of instances to render
* \param [in] firstVertex First vertex in vertex buffer
* \param [in] firstInstance First instance ID
* \param [in] count Number of draws
* \param [in] draws Draw parameters
*/
void draw(
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance);
uint32_t count,
const VkDrawIndirectCommand* draws);
/**
* \brief Indirect draw call
@ -791,18 +789,12 @@ namespace dxvk {
/**
* \brief Draws primitives using an index buffer
*
* \param [in] indexCount Number of indices to draw
* \param [in] instanceCount Number of instances to render
* \param [in] firstIndex First index within the index buffer
* \param [in] vertexOffset Vertex ID that corresponds to index 0
* \param [in] firstInstance First instance ID
* \param [in] count Number of draws
* \param [in] draws Draw parameters
*/
void drawIndexed(
uint32_t indexCount,
uint32_t instanceCount,
uint32_t firstIndex,
int32_t vertexOffset,
uint32_t firstInstance);
uint32_t count,
const VkDrawIndexedIndirectCommand* draws);
/**
* \brief Indirect indexed draw call
@ -1595,6 +1587,11 @@ namespace dxvk {
const Rc<DxvkBuffer>& buffer,
VkDeviceSize offset);
template<bool Indexed, typename T>
void drawGeneric(
uint32_t count,
const T* draws);
template<bool Indexed>
void drawIndirectGeneric(
VkDeviceSize offset,
@ -2103,7 +2100,7 @@ namespace dxvk {
return pred(DxvkAccess::Read);
}
void invalidateWriteAfterWriteTracking();
bool needsDrawBarriers();
void beginRenderPassDebugRegion();

View File

@ -75,6 +75,7 @@ namespace dxvk {
VariableMultisampleRate,
IndexBufferRobustness,
DebugUtils,
DirectMultiDraw,
FeatureCount
};

View File

@ -244,7 +244,7 @@ namespace dxvk::vk {
label.color[0] = ((color >> 16u) & 0xffu) / 255.0f;
label.color[1] = ((color >> 8u) & 0xffu) / 255.0f;
label.color[2] = ((color >> 0u) & 0xffu) / 255.0f;
label.color[3] = 1.0f;
label.color[3] = color ? 1.0f : 0.0f;
label.pLabelName = text;
return label;
}