mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-13 19:29:14 +01:00
[dxvk] Optimize buffer memory barrier batching
Instead of inserting a barrier after every single buffer copy, update or clear operation, we batch them up and execute the barrier when the first dirty buffer is used by a command. This significantly reduces the number of pipeline barriers in some games, e.g. Final Fantasy XV.
This commit is contained in:
parent
8f8340c2d1
commit
162c465e95
@ -11,12 +11,12 @@ namespace dxvk {
|
||||
VkAccessFlags srcAccess,
|
||||
VkPipelineStageFlags dstStages,
|
||||
VkAccessFlags dstAccess) {
|
||||
DxvkAccessFlags accessTypes = this->getAccessTypes(srcAccess);
|
||||
DxvkAccessFlags access = this->getAccessTypes(srcAccess);
|
||||
|
||||
m_srcStages |= srcStages;
|
||||
m_dstStages |= dstStages;
|
||||
|
||||
if (accessTypes.test(DxvkAccess::Write)) {
|
||||
if (access.test(DxvkAccess::Write)) {
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
@ -29,6 +29,8 @@ namespace dxvk {
|
||||
barrier.size = bufSlice.length();
|
||||
m_bufBarriers.push_back(barrier);
|
||||
}
|
||||
|
||||
m_bufSlices.push_back({ bufSlice, access });
|
||||
}
|
||||
|
||||
|
||||
@ -41,12 +43,12 @@ namespace dxvk {
|
||||
VkImageLayout dstLayout,
|
||||
VkPipelineStageFlags dstStages,
|
||||
VkAccessFlags dstAccess) {
|
||||
DxvkAccessFlags accessTypes = this->getAccessTypes(srcAccess);
|
||||
DxvkAccessFlags access = this->getAccessTypes(srcAccess);
|
||||
|
||||
m_srcStages |= srcStages;
|
||||
m_dstStages |= dstStages;
|
||||
|
||||
if ((srcLayout != dstLayout) || accessTypes.test(DxvkAccess::Write)) {
|
||||
if ((srcLayout != dstLayout) || access.test(DxvkAccess::Write)) {
|
||||
VkImageMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
@ -64,6 +66,20 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
bool DxvkBarrierSet::isBufferDirty(
|
||||
const DxvkPhysicalBufferSlice& bufSlice,
|
||||
DxvkAccessFlags bufAccess) {
|
||||
bool result = false;
|
||||
|
||||
for (uint32_t i = 0; i < m_bufSlices.size() && !result; i++) {
|
||||
result = (bufSlice.overlaps(m_bufSlices[i].slice))
|
||||
&& (bufAccess | m_bufSlices[i].access).test(DxvkAccess::Write);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void DxvkBarrierSet::recordCommands(const Rc<DxvkCommandList>& commandList) {
|
||||
if ((m_srcStages | m_dstStages) != 0) {
|
||||
VkPipelineStageFlags srcFlags = m_srcStages;
|
||||
@ -90,6 +106,8 @@ namespace dxvk {
|
||||
m_memBarriers.resize(0);
|
||||
m_bufBarriers.resize(0);
|
||||
m_imgBarriers.resize(0);
|
||||
|
||||
m_bufSlices.resize(0);
|
||||
}
|
||||
|
||||
|
||||
|
@ -37,12 +37,21 @@ namespace dxvk {
|
||||
VkPipelineStageFlags dstStages,
|
||||
VkAccessFlags dstAccess);
|
||||
|
||||
bool isBufferDirty(
|
||||
const DxvkPhysicalBufferSlice& bufSlice,
|
||||
DxvkAccessFlags bufAccess);
|
||||
|
||||
void recordCommands(
|
||||
const Rc<DxvkCommandList>& commandList);
|
||||
|
||||
void reset();
|
||||
|
||||
private:
|
||||
|
||||
struct BufSlice {
|
||||
DxvkPhysicalBufferSlice slice;
|
||||
DxvkAccessFlags access;
|
||||
};
|
||||
|
||||
VkPipelineStageFlags m_srcStages = 0;
|
||||
VkPipelineStageFlags m_dstStages = 0;
|
||||
@ -50,6 +59,8 @@ namespace dxvk {
|
||||
std::vector<VkMemoryBarrier> m_memBarriers;
|
||||
std::vector<VkBufferMemoryBarrier> m_bufBarriers;
|
||||
std::vector<VkImageMemoryBarrier> m_imgBarriers;
|
||||
|
||||
std::vector<BufSlice> m_bufSlices;
|
||||
|
||||
DxvkAccessFlags getAccessTypes(VkAccessFlags flags) const;
|
||||
|
||||
|
@ -189,6 +189,18 @@ namespace dxvk {
|
||||
return m_buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Checks whether this slice overlaps with another
|
||||
*
|
||||
* \param [in] other The buffer slice to check
|
||||
* \returns \c true if the two slices overlap
|
||||
*/
|
||||
bool overlaps(const DxvkPhysicalBufferSlice& other) const {
|
||||
return this->m_buffer == other.m_buffer
|
||||
&& this->m_offset + this->m_length > other.m_offset
|
||||
&& this->m_offset < other.m_offset + other.m_length;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Rc<DxvkPhysicalBuffer> m_buffer = nullptr;
|
||||
|
@ -52,6 +52,8 @@ namespace dxvk {
|
||||
this->trackQueryPool(m_queryPools[VK_QUERY_TYPE_PIPELINE_STATISTICS]);
|
||||
this->trackQueryPool(m_queryPools[VK_QUERY_TYPE_TIMESTAMP]);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->endRecording();
|
||||
return std::exchange(m_cmd, nullptr);
|
||||
}
|
||||
@ -239,6 +241,9 @@ namespace dxvk {
|
||||
length = align(length, 4);
|
||||
|
||||
auto slice = buffer->subSlice(offset, length);
|
||||
|
||||
if (m_barriers.isBufferDirty(slice, DxvkAccess::Write))
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->cmdFillBuffer(
|
||||
slice.handle(),
|
||||
@ -251,7 +256,6 @@ namespace dxvk {
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
buffer->info().stages,
|
||||
buffer->info().access);
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->trackResource(slice.resource());
|
||||
}
|
||||
@ -264,6 +268,11 @@ namespace dxvk {
|
||||
VkClearColorValue value) {
|
||||
this->spillRenderPass();
|
||||
this->unbindComputePipeline();
|
||||
|
||||
auto bufferSlice = bufferView->physicalSlice();
|
||||
|
||||
if (m_barriers.isBufferDirty(bufferSlice, DxvkAccess::Write))
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
// Query pipeline objects to use for this clear operation
|
||||
DxvkMetaClearPipeline pipeInfo = m_metaClear->getClearBufferPipeline(
|
||||
@ -313,12 +322,11 @@ namespace dxvk {
|
||||
workgroups.depth);
|
||||
|
||||
m_barriers.accessBuffer(
|
||||
bufferView->physicalSlice(),
|
||||
bufferSlice,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
bufferView->bufferInfo().stages,
|
||||
bufferView->bufferInfo().access);
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->trackResource(bufferView->viewResource());
|
||||
m_cmd->trackResource(bufferView->bufferResource());
|
||||
@ -574,6 +582,10 @@ namespace dxvk {
|
||||
auto dstSlice = dstBuffer->subSlice(dstOffset, numBytes);
|
||||
auto srcSlice = srcBuffer->subSlice(srcOffset, numBytes);
|
||||
|
||||
if (m_barriers.isBufferDirty(srcSlice, DxvkAccess::Read)
|
||||
|| m_barriers.isBufferDirty(dstSlice, DxvkAccess::Write))
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
VkBufferCopy bufferRegion;
|
||||
bufferRegion.srcOffset = srcSlice.offset();
|
||||
bufferRegion.dstOffset = dstSlice.offset();
|
||||
@ -596,8 +608,6 @@ namespace dxvk {
|
||||
dstBuffer->info().stages,
|
||||
dstBuffer->info().access);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->trackResource(dstBuffer->resource());
|
||||
m_cmd->trackResource(srcBuffer->resource());
|
||||
}
|
||||
@ -614,7 +624,7 @@ namespace dxvk {
|
||||
this->spillRenderPass();
|
||||
|
||||
auto srcSlice = srcBuffer->subSlice(srcOffset, 0);
|
||||
|
||||
|
||||
VkImageSubresourceRange dstSubresourceRange = {
|
||||
dstSubresource.aspectMask,
|
||||
dstSubresource.mipLevel, 1,
|
||||
@ -631,6 +641,7 @@ namespace dxvk {
|
||||
dstImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
VkBufferImageCopy copyRegion;
|
||||
@ -655,11 +666,13 @@ namespace dxvk {
|
||||
dstImage->info().layout,
|
||||
dstImage->info().stages,
|
||||
dstImage->info().access);
|
||||
|
||||
m_barriers.accessBuffer(srcSlice,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_READ_BIT,
|
||||
srcBuffer->info().stages,
|
||||
srcBuffer->info().access);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->trackResource(dstImage);
|
||||
@ -699,6 +712,7 @@ namespace dxvk {
|
||||
dstImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||
|
||||
m_barriers.accessImage(
|
||||
srcImage, srcSubresourceRange,
|
||||
srcImage->info().layout,
|
||||
@ -707,6 +721,7 @@ namespace dxvk {
|
||||
srcImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
if (dstSubresource.aspectMask == srcSubresource.aspectMask) {
|
||||
@ -758,6 +773,7 @@ namespace dxvk {
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
bufferImageCopy.imageSubresource = dstSubresource;
|
||||
@ -785,6 +801,7 @@ namespace dxvk {
|
||||
dstImage->info().layout,
|
||||
dstImage->info().stages,
|
||||
dstImage->info().access);
|
||||
|
||||
m_barriers.accessImage(
|
||||
srcImage, srcSubresourceRange,
|
||||
srcImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
|
||||
@ -793,6 +810,7 @@ namespace dxvk {
|
||||
srcImage->info().layout,
|
||||
srcImage->info().stages,
|
||||
srcImage->info().access);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->trackResource(dstImage);
|
||||
@ -811,7 +829,7 @@ namespace dxvk {
|
||||
this->spillRenderPass();
|
||||
|
||||
auto dstSlice = dstBuffer->subSlice(dstOffset, 0);
|
||||
|
||||
|
||||
VkImageSubresourceRange srcSubresourceRange = {
|
||||
srcSubresource.aspectMask,
|
||||
srcSubresource.mipLevel, 1,
|
||||
@ -850,11 +868,13 @@ namespace dxvk {
|
||||
srcImage->info().layout,
|
||||
srcImage->info().stages,
|
||||
srcImage->info().access);
|
||||
|
||||
m_barriers.accessBuffer(dstSlice,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
dstBuffer->info().stages,
|
||||
dstBuffer->info().access);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->trackResource(srcImage);
|
||||
@ -1253,6 +1273,9 @@ namespace dxvk {
|
||||
// We'll limit the size to 4kB in order to keep command buffers
|
||||
// reasonably small, we do not know how much data apps may upload.
|
||||
auto physicalSlice = buffer->subSlice(offset, size);
|
||||
|
||||
if (m_barriers.isBufferDirty(physicalSlice, DxvkAccess::Write))
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
if ((size <= 4096) && ((size & 0x3) == 0) && ((offset & 0x3) == 0)) {
|
||||
m_cmd->cmdUpdateBuffer(
|
||||
@ -1277,7 +1300,6 @@ namespace dxvk {
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
buffer->info().stages,
|
||||
buffer->info().access);
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
m_cmd->trackResource(buffer->resource());
|
||||
}
|
||||
@ -1532,6 +1554,8 @@ namespace dxvk {
|
||||
&& (m_state.om.framebuffer != nullptr)) {
|
||||
m_flags.set(DxvkContextFlag::GpRenderPassBound);
|
||||
m_flags.clr(DxvkContextFlag::GpClearRenderTargets);
|
||||
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
|
||||
this->renderPassBindFramebuffer(
|
||||
m_state.om.framebuffer,
|
||||
@ -2044,6 +2068,7 @@ namespace dxvk {
|
||||
this->updateComputeShaderResources();
|
||||
this->updateComputePipelineState();
|
||||
this->updateComputeShaderDescriptors();
|
||||
m_barriers.recordCommands(m_cmd);
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user