mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-27 04:54:15 +01:00
[dxvk] Implement draw batching via VK_EXT_multi_draw
This commit is contained in:
parent
7e503fa053
commit
016f05a770
@ -3578,12 +3578,7 @@ namespace dxvk {
|
||||
|
||||
EmitCsCmd<VkDrawIndirectCommand>(D3D11CmdType::Draw, 1u,
|
||||
[] (DxvkContext* ctx, const VkDrawIndirectCommand* draws, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
ctx->draw(draws[i].vertexCount,
|
||||
draws[i].instanceCount,
|
||||
draws[i].firstVertex,
|
||||
draws[i].firstInstance);
|
||||
}
|
||||
ctx->draw(count, draws);
|
||||
});
|
||||
|
||||
new (m_csData->first()) VkDrawIndirectCommand(draw);
|
||||
@ -3608,13 +3603,7 @@ namespace dxvk {
|
||||
|
||||
EmitCsCmd<VkDrawIndexedIndirectCommand>(D3D11CmdType::DrawIndexed, 1u,
|
||||
[] (DxvkContext* ctx, const VkDrawIndexedIndirectCommand* draws, size_t count) {
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
ctx->drawIndexed(draws[i].indexCount,
|
||||
draws[i].instanceCount,
|
||||
draws[i].firstIndex,
|
||||
draws[i].vertexOffset,
|
||||
draws[i].firstInstance);
|
||||
}
|
||||
ctx->drawIndexed(count, draws);
|
||||
});
|
||||
|
||||
new (m_csData->first()) VkDrawIndexedIndirectCommand(draw);
|
||||
|
@ -1312,7 +1312,11 @@ namespace dxvk {
|
||||
for (uint32_t i = 0; i < cViews.size(); i++)
|
||||
ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, Rc<DxvkImageView>(cViews[i]));
|
||||
|
||||
ctx->draw(3, 1, 0, 0);
|
||||
VkDrawIndirectCommand draw = { };
|
||||
draw.vertexCount = 3u;
|
||||
draw.instanceCount = 1u;
|
||||
|
||||
ctx->draw(1, &draw);
|
||||
|
||||
for (uint32_t i = 0; i < cViews.size(); i++)
|
||||
ctx->bindResourceImageView(VK_SHADER_STAGE_FRAGMENT_BIT, 1 + i, nullptr);
|
||||
|
@ -2890,9 +2890,12 @@ namespace dxvk {
|
||||
|
||||
// Tests on Windows show that D3D9 does not do non-indexed instanced draws.
|
||||
|
||||
ctx->draw(
|
||||
vertexCount, 1,
|
||||
cStartVertex, 0);
|
||||
VkDrawIndirectCommand draw = { };
|
||||
draw.vertexCount = vertexCount;
|
||||
draw.instanceCount = 1u;
|
||||
draw.firstVertex = cStartVertex;
|
||||
|
||||
ctx->draw(1u, &draw);
|
||||
});
|
||||
|
||||
return D3D_OK;
|
||||
@ -2939,10 +2942,13 @@ namespace dxvk {
|
||||
|
||||
ApplyPrimitiveType(ctx, cPrimType);
|
||||
|
||||
ctx->drawIndexed(
|
||||
drawInfo.vertexCount, drawInfo.instanceCount,
|
||||
cStartIndex,
|
||||
cBaseVertexIndex, 0);
|
||||
VkDrawIndexedIndirectCommand draw = { };
|
||||
draw.indexCount = drawInfo.vertexCount;
|
||||
draw.instanceCount = drawInfo.instanceCount;
|
||||
draw.firstIndex = cStartIndex;
|
||||
draw.vertexOffset = cBaseVertexIndex;
|
||||
|
||||
ctx->drawIndexed(1u, &draw);
|
||||
});
|
||||
|
||||
return D3D_OK;
|
||||
@ -2981,11 +2987,12 @@ namespace dxvk {
|
||||
ApplyPrimitiveType(ctx, cPrimType);
|
||||
|
||||
// Tests on Windows show that D3D9 does not do non-indexed instanced draws.
|
||||
VkDrawIndirectCommand draw = { };
|
||||
draw.vertexCount = cVertexCount;
|
||||
draw.instanceCount = 1u;
|
||||
|
||||
ctx->bindVertexBuffer(0, std::move(cBufferSlice), cStride);
|
||||
ctx->draw(
|
||||
cVertexCount, 1,
|
||||
0, 0);
|
||||
ctx->draw(1u, &draw);
|
||||
ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
|
||||
});
|
||||
|
||||
@ -3045,12 +3052,13 @@ namespace dxvk {
|
||||
|
||||
ApplyPrimitiveType(ctx, cPrimType);
|
||||
|
||||
VkDrawIndexedIndirectCommand draw = { };
|
||||
draw.indexCount = drawInfo.vertexCount;
|
||||
draw.instanceCount = drawInfo.instanceCount;
|
||||
|
||||
ctx->bindVertexBuffer(0, cBufferSlice.subSlice(0, cVertexSize), cStride);
|
||||
ctx->bindIndexBuffer(cBufferSlice.subSlice(cVertexSize, cBufferSlice.length() - cVertexSize), cIndexType);
|
||||
ctx->drawIndexed(
|
||||
drawInfo.vertexCount, drawInfo.instanceCount,
|
||||
0,
|
||||
0, 0);
|
||||
ctx->drawIndexed(1u, &draw);
|
||||
ctx->bindVertexBuffer(0, DxvkBufferSlice(), 0);
|
||||
ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32);
|
||||
});
|
||||
@ -3162,11 +3170,14 @@ namespace dxvk {
|
||||
// to avoid val errors / UB.
|
||||
ctx->bindShader<VK_SHADER_STAGE_FRAGMENT_BIT>(nullptr);
|
||||
|
||||
VkDrawIndirectCommand draw = { };
|
||||
draw.vertexCount = drawInfo.vertexCount;
|
||||
draw.instanceCount = drawInfo.instanceCount;
|
||||
draw.firstVertex = cStartIndex;
|
||||
|
||||
ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(std::move(shader));
|
||||
ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), std::move(cBufferSlice));
|
||||
ctx->draw(
|
||||
drawInfo.vertexCount, drawInfo.instanceCount,
|
||||
cStartIndex, 0);
|
||||
ctx->draw(1u, &draw);
|
||||
ctx->bindUniformBuffer(VK_SHADER_STAGE_GEOMETRY_BIT, getSWVPBufferSlot(), DxvkBufferSlice());
|
||||
ctx->bindShader<VK_SHADER_STAGE_GEOMETRY_BIT>(nullptr);
|
||||
});
|
||||
|
@ -49,6 +49,11 @@ namespace dxvk {
|
||||
if (m_device->features().khrMaintenance5.maintenance5)
|
||||
m_features.set(DxvkContextFeature::IndexBufferRobustness);
|
||||
|
||||
// Check whether we can batch direct draws
|
||||
if (m_device->features().extMultiDraw.multiDraw
|
||||
&& m_device->properties().extMultiDraw.maxMultiDrawCount >= DirectMultiDrawBatchSize)
|
||||
m_features.set(DxvkContextFeature::DirectMultiDraw);
|
||||
|
||||
// Add a fast path to query debug utils support
|
||||
if (m_device->isDebugEnabled())
|
||||
m_features.set(DxvkContextFeature::DebugUtils);
|
||||
@ -922,15 +927,9 @@ namespace dxvk {
|
||||
|
||||
|
||||
void DxvkContext::draw(
|
||||
uint32_t vertexCount,
|
||||
uint32_t instanceCount,
|
||||
uint32_t firstVertex,
|
||||
uint32_t firstInstance) {
|
||||
if (this->commitGraphicsState<false, false>()) {
|
||||
m_cmd->cmdDraw(
|
||||
vertexCount, instanceCount,
|
||||
firstVertex, firstInstance);
|
||||
}
|
||||
uint32_t count,
|
||||
const VkDrawIndirectCommand* draws) {
|
||||
drawGeneric<false>(count, draws);
|
||||
}
|
||||
|
||||
|
||||
@ -953,17 +952,9 @@ namespace dxvk {
|
||||
|
||||
|
||||
void DxvkContext::drawIndexed(
|
||||
uint32_t indexCount,
|
||||
uint32_t instanceCount,
|
||||
uint32_t firstIndex,
|
||||
int32_t vertexOffset,
|
||||
uint32_t firstInstance) {
|
||||
if (this->commitGraphicsState<true, false>()) {
|
||||
m_cmd->cmdDrawIndexed(
|
||||
indexCount, instanceCount,
|
||||
firstIndex, vertexOffset,
|
||||
firstInstance);
|
||||
}
|
||||
uint32_t count,
|
||||
const VkDrawIndexedIndirectCommand* draws) {
|
||||
drawGeneric<true>(count, draws);
|
||||
}
|
||||
|
||||
|
||||
@ -1689,6 +1680,116 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
template<bool Indexed, typename T>
|
||||
void DxvkContext::drawGeneric(
|
||||
uint32_t count,
|
||||
const T* draws) {
|
||||
if (this->commitGraphicsState<Indexed, false>()) {
|
||||
if (count == 1u) {
|
||||
// Most common case, just emit a single draw
|
||||
if constexpr (Indexed) {
|
||||
m_cmd->cmdDrawIndexed(draws->indexCount, draws->instanceCount,
|
||||
draws->firstIndex, draws->vertexOffset, draws->firstInstance);
|
||||
} else {
|
||||
m_cmd->cmdDraw(draws->vertexCount, draws->instanceCount,
|
||||
draws->firstVertex, draws->firstInstance);
|
||||
}
|
||||
} else if (unlikely(needsDrawBarriers())) {
|
||||
// If the current pipeline has storage resource hazards,
|
||||
// unroll draws and insert a barrier after each one.
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
if (i)
|
||||
this->commitGraphicsState<Indexed, false>();
|
||||
|
||||
if constexpr (Indexed) {
|
||||
m_cmd->cmdDrawIndexed(draws[i].indexCount, draws[i].instanceCount,
|
||||
draws[i].firstIndex, draws[i].vertexOffset, draws[i].firstInstance);
|
||||
} else {
|
||||
m_cmd->cmdDraw(draws[i].vertexCount, draws[i].instanceCount,
|
||||
draws[i].firstVertex, draws[i].firstInstance);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
using MultiDrawInfo = std::conditional_t<Indexed,
|
||||
VkMultiDrawIndexedInfoEXT, VkMultiDrawInfoEXT>;
|
||||
|
||||
// Intentially don't initialize this; we'll probably not use
|
||||
// the full batch size anyway, so doing so would be wasteful.
|
||||
std::array<MultiDrawInfo, DirectMultiDrawBatchSize> batch;
|
||||
|
||||
uint32_t instanceCount = 0u;
|
||||
uint32_t instanceIndex = 0u;
|
||||
|
||||
uint32_t batchSize = 0u;
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
if (!batchSize) {
|
||||
instanceCount = draws[i].instanceCount;
|
||||
instanceIndex = draws[i].firstInstance;
|
||||
}
|
||||
|
||||
if constexpr (Indexed) {
|
||||
auto& drawInfo = batch[batchSize++];
|
||||
drawInfo.firstIndex = draws[i].firstIndex;
|
||||
drawInfo.indexCount = draws[i].indexCount;
|
||||
drawInfo.vertexOffset = draws[i].vertexOffset;
|
||||
} else {
|
||||
auto& drawInfo = batch[batchSize++];
|
||||
drawInfo.firstVertex = draws[i].firstVertex;
|
||||
drawInfo.vertexCount = draws[i].vertexCount;
|
||||
}
|
||||
|
||||
bool emitDraw = i + 1u == count || batchSize == DirectMultiDrawBatchSize;
|
||||
|
||||
if (!emitDraw) {
|
||||
const auto& next = draws[i + 1u];
|
||||
|
||||
emitDraw = instanceCount != next.instanceCount
|
||||
|| instanceIndex != next.firstInstance;
|
||||
}
|
||||
|
||||
if (emitDraw) {
|
||||
if (m_features.test(DxvkContextFeature::DirectMultiDraw)) {
|
||||
if constexpr (Indexed) {
|
||||
m_cmd->cmdDrawMultiIndexed(batchSize, batch.data(),
|
||||
instanceCount, instanceIndex);
|
||||
} else {
|
||||
m_cmd->cmdDrawMulti(batchSize, batch.data(),
|
||||
instanceCount, instanceIndex);
|
||||
}
|
||||
} else {
|
||||
// This path only really exists for consistency reasons; all drivers
|
||||
// we care about support MultiDraw natively, but debug tools may not.
|
||||
if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) {
|
||||
const char* procName = Indexed ? "vkCmdDrawMultiIndexedEXT" : "vkCmdDrawMultiEXT";
|
||||
m_cmd->cmdBeginDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer,
|
||||
vk::makeLabel(0u, str::format(procName, "(", batchSize, ")").c_str()));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < batchSize; i++) {
|
||||
const auto& entry = batch[i];
|
||||
|
||||
if constexpr (Indexed) {
|
||||
m_cmd->cmdDrawIndexed(entry.indexCount, instanceCount,
|
||||
entry.firstIndex, entry.vertexOffset, instanceIndex);
|
||||
} else {
|
||||
m_cmd->cmdDraw(entry.vertexCount, instanceCount,
|
||||
entry.firstVertex, instanceIndex);
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(m_features.test(DxvkContextFeature::DebugUtils)))
|
||||
m_cmd->cmdEndDebugUtilsLabel(DxvkCmdBuffer::ExecBuffer);
|
||||
}
|
||||
|
||||
batchSize = 0u;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<bool Indexed>
|
||||
void DxvkContext::drawIndirectGeneric(
|
||||
VkDeviceSize offset,
|
||||
@ -1698,11 +1799,8 @@ namespace dxvk {
|
||||
if (this->commitGraphicsState<Indexed, true>()) {
|
||||
auto descriptor = m_state.id.argBuffer.getDescriptor();
|
||||
|
||||
if (unroll) {
|
||||
// Need to do this check after initially setting up the pipeline
|
||||
unroll = m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
|
||||
&& !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
|
||||
}
|
||||
if (unroll)
|
||||
unroll = needsDrawBarriers();
|
||||
|
||||
// If draws are merged but the pipeline has order-dependent stores, submit
|
||||
// one draw at a time as well as barriers in between. Otherwise, keep the
|
||||
@ -2854,6 +2952,12 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
bool DxvkContext::needsDrawBarriers() {
|
||||
return m_state.gp.flags.test(DxvkGraphicsPipelineFlag::UnrollMergedDraws)
|
||||
&& !m_barrierControl.test(DxvkBarrierControl::GraphicsAllowReadWriteOverlap);
|
||||
}
|
||||
|
||||
|
||||
void DxvkContext::beginRenderPassDebugRegion() {
|
||||
bool hasColorAttachments = false;
|
||||
bool hasDepthAttachment = m_state.om.renderTargets.depth.view != nullptr;
|
||||
|
@ -32,6 +32,8 @@ namespace dxvk {
|
||||
class DxvkContext : public RcObject {
|
||||
constexpr static VkDeviceSize MaxDiscardSizeInRp = 256u << 10u;
|
||||
constexpr static VkDeviceSize MaxDiscardSize = 16u << 10u;
|
||||
|
||||
constexpr static uint32_t DirectMultiDrawBatchSize = 256u;
|
||||
public:
|
||||
|
||||
DxvkContext(const Rc<DxvkDevice>& device);
|
||||
@ -744,16 +746,12 @@ namespace dxvk {
|
||||
/**
|
||||
* \brief Draws primitive without using an index buffer
|
||||
*
|
||||
* \param [in] vertexCount Number of vertices to draw
|
||||
* \param [in] instanceCount Number of instances to render
|
||||
* \param [in] firstVertex First vertex in vertex buffer
|
||||
* \param [in] firstInstance First instance ID
|
||||
* \param [in] count Number of draws
|
||||
* \param [in] draws Draw parameters
|
||||
*/
|
||||
void draw(
|
||||
uint32_t vertexCount,
|
||||
uint32_t instanceCount,
|
||||
uint32_t firstVertex,
|
||||
uint32_t firstInstance);
|
||||
uint32_t count,
|
||||
const VkDrawIndirectCommand* draws);
|
||||
|
||||
/**
|
||||
* \brief Indirect draw call
|
||||
@ -791,18 +789,12 @@ namespace dxvk {
|
||||
/**
|
||||
* \brief Draws primitives using an index buffer
|
||||
*
|
||||
* \param [in] indexCount Number of indices to draw
|
||||
* \param [in] instanceCount Number of instances to render
|
||||
* \param [in] firstIndex First index within the index buffer
|
||||
* \param [in] vertexOffset Vertex ID that corresponds to index 0
|
||||
* \param [in] firstInstance First instance ID
|
||||
* \param [in] count Number of draws
|
||||
* \param [in] draws Draw parameters
|
||||
*/
|
||||
void drawIndexed(
|
||||
uint32_t indexCount,
|
||||
uint32_t instanceCount,
|
||||
uint32_t firstIndex,
|
||||
int32_t vertexOffset,
|
||||
uint32_t firstInstance);
|
||||
uint32_t count,
|
||||
const VkDrawIndexedIndirectCommand* draws);
|
||||
|
||||
/**
|
||||
* \brief Indirect indexed draw call
|
||||
@ -1595,6 +1587,11 @@ namespace dxvk {
|
||||
const Rc<DxvkBuffer>& buffer,
|
||||
VkDeviceSize offset);
|
||||
|
||||
template<bool Indexed, typename T>
|
||||
void drawGeneric(
|
||||
uint32_t count,
|
||||
const T* draws);
|
||||
|
||||
template<bool Indexed>
|
||||
void drawIndirectGeneric(
|
||||
VkDeviceSize offset,
|
||||
@ -2103,7 +2100,7 @@ namespace dxvk {
|
||||
return pred(DxvkAccess::Read);
|
||||
}
|
||||
|
||||
void invalidateWriteAfterWriteTracking();
|
||||
bool needsDrawBarriers();
|
||||
|
||||
void beginRenderPassDebugRegion();
|
||||
|
||||
|
@ -75,6 +75,7 @@ namespace dxvk {
|
||||
VariableMultisampleRate,
|
||||
IndexBufferRobustness,
|
||||
DebugUtils,
|
||||
DirectMultiDraw,
|
||||
FeatureCount
|
||||
};
|
||||
|
||||
|
@ -244,7 +244,7 @@ namespace dxvk::vk {
|
||||
label.color[0] = ((color >> 16u) & 0xffu) / 255.0f;
|
||||
label.color[1] = ((color >> 8u) & 0xffu) / 255.0f;
|
||||
label.color[2] = ((color >> 0u) & 0xffu) / 255.0f;
|
||||
label.color[3] = 1.0f;
|
||||
label.color[3] = color ? 1.0f : 0.0f;
|
||||
label.pLabelName = text;
|
||||
return label;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user