mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-15 07:29:17 +01:00
[dxvk] Optimize buffer memory barrier batching
Instead of inserting a barrier after every single buffer copy, update or clear operation, we batch them up and execute the barrier when the first dirty buffer is used by a command. This significantly reduces the number of pipeline barriers in some games, e.g. Final Fantasy XV.
This commit is contained in:
parent
8f8340c2d1
commit
162c465e95
@ -11,12 +11,12 @@ namespace dxvk {
|
|||||||
VkAccessFlags srcAccess,
|
VkAccessFlags srcAccess,
|
||||||
VkPipelineStageFlags dstStages,
|
VkPipelineStageFlags dstStages,
|
||||||
VkAccessFlags dstAccess) {
|
VkAccessFlags dstAccess) {
|
||||||
DxvkAccessFlags accessTypes = this->getAccessTypes(srcAccess);
|
DxvkAccessFlags access = this->getAccessTypes(srcAccess);
|
||||||
|
|
||||||
m_srcStages |= srcStages;
|
m_srcStages |= srcStages;
|
||||||
m_dstStages |= dstStages;
|
m_dstStages |= dstStages;
|
||||||
|
|
||||||
if (accessTypes.test(DxvkAccess::Write)) {
|
if (access.test(DxvkAccess::Write)) {
|
||||||
VkBufferMemoryBarrier barrier;
|
VkBufferMemoryBarrier barrier;
|
||||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
barrier.pNext = nullptr;
|
barrier.pNext = nullptr;
|
||||||
@ -29,6 +29,8 @@ namespace dxvk {
|
|||||||
barrier.size = bufSlice.length();
|
barrier.size = bufSlice.length();
|
||||||
m_bufBarriers.push_back(barrier);
|
m_bufBarriers.push_back(barrier);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_bufSlices.push_back({ bufSlice, access });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -41,12 +43,12 @@ namespace dxvk {
|
|||||||
VkImageLayout dstLayout,
|
VkImageLayout dstLayout,
|
||||||
VkPipelineStageFlags dstStages,
|
VkPipelineStageFlags dstStages,
|
||||||
VkAccessFlags dstAccess) {
|
VkAccessFlags dstAccess) {
|
||||||
DxvkAccessFlags accessTypes = this->getAccessTypes(srcAccess);
|
DxvkAccessFlags access = this->getAccessTypes(srcAccess);
|
||||||
|
|
||||||
m_srcStages |= srcStages;
|
m_srcStages |= srcStages;
|
||||||
m_dstStages |= dstStages;
|
m_dstStages |= dstStages;
|
||||||
|
|
||||||
if ((srcLayout != dstLayout) || accessTypes.test(DxvkAccess::Write)) {
|
if ((srcLayout != dstLayout) || access.test(DxvkAccess::Write)) {
|
||||||
VkImageMemoryBarrier barrier;
|
VkImageMemoryBarrier barrier;
|
||||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||||
barrier.pNext = nullptr;
|
barrier.pNext = nullptr;
|
||||||
@ -64,6 +66,20 @@ namespace dxvk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool DxvkBarrierSet::isBufferDirty(
|
||||||
|
const DxvkPhysicalBufferSlice& bufSlice,
|
||||||
|
DxvkAccessFlags bufAccess) {
|
||||||
|
bool result = false;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < m_bufSlices.size() && !result; i++) {
|
||||||
|
result = (bufSlice.overlaps(m_bufSlices[i].slice))
|
||||||
|
&& (bufAccess | m_bufSlices[i].access).test(DxvkAccess::Write);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void DxvkBarrierSet::recordCommands(const Rc<DxvkCommandList>& commandList) {
|
void DxvkBarrierSet::recordCommands(const Rc<DxvkCommandList>& commandList) {
|
||||||
if ((m_srcStages | m_dstStages) != 0) {
|
if ((m_srcStages | m_dstStages) != 0) {
|
||||||
VkPipelineStageFlags srcFlags = m_srcStages;
|
VkPipelineStageFlags srcFlags = m_srcStages;
|
||||||
@ -90,6 +106,8 @@ namespace dxvk {
|
|||||||
m_memBarriers.resize(0);
|
m_memBarriers.resize(0);
|
||||||
m_bufBarriers.resize(0);
|
m_bufBarriers.resize(0);
|
||||||
m_imgBarriers.resize(0);
|
m_imgBarriers.resize(0);
|
||||||
|
|
||||||
|
m_bufSlices.resize(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,6 +37,10 @@ namespace dxvk {
|
|||||||
VkPipelineStageFlags dstStages,
|
VkPipelineStageFlags dstStages,
|
||||||
VkAccessFlags dstAccess);
|
VkAccessFlags dstAccess);
|
||||||
|
|
||||||
|
bool isBufferDirty(
|
||||||
|
const DxvkPhysicalBufferSlice& bufSlice,
|
||||||
|
DxvkAccessFlags bufAccess);
|
||||||
|
|
||||||
void recordCommands(
|
void recordCommands(
|
||||||
const Rc<DxvkCommandList>& commandList);
|
const Rc<DxvkCommandList>& commandList);
|
||||||
|
|
||||||
@ -44,6 +48,11 @@ namespace dxvk {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
struct BufSlice {
|
||||||
|
DxvkPhysicalBufferSlice slice;
|
||||||
|
DxvkAccessFlags access;
|
||||||
|
};
|
||||||
|
|
||||||
VkPipelineStageFlags m_srcStages = 0;
|
VkPipelineStageFlags m_srcStages = 0;
|
||||||
VkPipelineStageFlags m_dstStages = 0;
|
VkPipelineStageFlags m_dstStages = 0;
|
||||||
|
|
||||||
@ -51,6 +60,8 @@ namespace dxvk {
|
|||||||
std::vector<VkBufferMemoryBarrier> m_bufBarriers;
|
std::vector<VkBufferMemoryBarrier> m_bufBarriers;
|
||||||
std::vector<VkImageMemoryBarrier> m_imgBarriers;
|
std::vector<VkImageMemoryBarrier> m_imgBarriers;
|
||||||
|
|
||||||
|
std::vector<BufSlice> m_bufSlices;
|
||||||
|
|
||||||
DxvkAccessFlags getAccessTypes(VkAccessFlags flags) const;
|
DxvkAccessFlags getAccessTypes(VkAccessFlags flags) const;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -189,6 +189,18 @@ namespace dxvk {
|
|||||||
return m_buffer;
|
return m_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Checks whether this slice overlaps with another
|
||||||
|
*
|
||||||
|
* \param [in] other The buffer slice to check
|
||||||
|
* \returns \c true if the two slices overlap
|
||||||
|
*/
|
||||||
|
bool overlaps(const DxvkPhysicalBufferSlice& other) const {
|
||||||
|
return this->m_buffer == other.m_buffer
|
||||||
|
&& this->m_offset + this->m_length > other.m_offset
|
||||||
|
&& this->m_offset < other.m_offset + other.m_length;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
Rc<DxvkPhysicalBuffer> m_buffer = nullptr;
|
Rc<DxvkPhysicalBuffer> m_buffer = nullptr;
|
||||||
|
@ -52,6 +52,8 @@ namespace dxvk {
|
|||||||
this->trackQueryPool(m_queryPools[VK_QUERY_TYPE_PIPELINE_STATISTICS]);
|
this->trackQueryPool(m_queryPools[VK_QUERY_TYPE_PIPELINE_STATISTICS]);
|
||||||
this->trackQueryPool(m_queryPools[VK_QUERY_TYPE_TIMESTAMP]);
|
this->trackQueryPool(m_queryPools[VK_QUERY_TYPE_TIMESTAMP]);
|
||||||
|
|
||||||
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
m_cmd->endRecording();
|
m_cmd->endRecording();
|
||||||
return std::exchange(m_cmd, nullptr);
|
return std::exchange(m_cmd, nullptr);
|
||||||
}
|
}
|
||||||
@ -240,6 +242,9 @@ namespace dxvk {
|
|||||||
|
|
||||||
auto slice = buffer->subSlice(offset, length);
|
auto slice = buffer->subSlice(offset, length);
|
||||||
|
|
||||||
|
if (m_barriers.isBufferDirty(slice, DxvkAccess::Write))
|
||||||
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
m_cmd->cmdFillBuffer(
|
m_cmd->cmdFillBuffer(
|
||||||
slice.handle(),
|
slice.handle(),
|
||||||
slice.offset(),
|
slice.offset(),
|
||||||
@ -251,7 +256,6 @@ namespace dxvk {
|
|||||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
buffer->info().stages,
|
buffer->info().stages,
|
||||||
buffer->info().access);
|
buffer->info().access);
|
||||||
m_barriers.recordCommands(m_cmd);
|
|
||||||
|
|
||||||
m_cmd->trackResource(slice.resource());
|
m_cmd->trackResource(slice.resource());
|
||||||
}
|
}
|
||||||
@ -265,6 +269,11 @@ namespace dxvk {
|
|||||||
this->spillRenderPass();
|
this->spillRenderPass();
|
||||||
this->unbindComputePipeline();
|
this->unbindComputePipeline();
|
||||||
|
|
||||||
|
auto bufferSlice = bufferView->physicalSlice();
|
||||||
|
|
||||||
|
if (m_barriers.isBufferDirty(bufferSlice, DxvkAccess::Write))
|
||||||
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
// Query pipeline objects to use for this clear operation
|
// Query pipeline objects to use for this clear operation
|
||||||
DxvkMetaClearPipeline pipeInfo = m_metaClear->getClearBufferPipeline(
|
DxvkMetaClearPipeline pipeInfo = m_metaClear->getClearBufferPipeline(
|
||||||
imageFormatInfo(bufferView->info().format)->flags);
|
imageFormatInfo(bufferView->info().format)->flags);
|
||||||
@ -313,12 +322,11 @@ namespace dxvk {
|
|||||||
workgroups.depth);
|
workgroups.depth);
|
||||||
|
|
||||||
m_barriers.accessBuffer(
|
m_barriers.accessBuffer(
|
||||||
bufferView->physicalSlice(),
|
bufferSlice,
|
||||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
VK_ACCESS_SHADER_WRITE_BIT,
|
VK_ACCESS_SHADER_WRITE_BIT,
|
||||||
bufferView->bufferInfo().stages,
|
bufferView->bufferInfo().stages,
|
||||||
bufferView->bufferInfo().access);
|
bufferView->bufferInfo().access);
|
||||||
m_barriers.recordCommands(m_cmd);
|
|
||||||
|
|
||||||
m_cmd->trackResource(bufferView->viewResource());
|
m_cmd->trackResource(bufferView->viewResource());
|
||||||
m_cmd->trackResource(bufferView->bufferResource());
|
m_cmd->trackResource(bufferView->bufferResource());
|
||||||
@ -574,6 +582,10 @@ namespace dxvk {
|
|||||||
auto dstSlice = dstBuffer->subSlice(dstOffset, numBytes);
|
auto dstSlice = dstBuffer->subSlice(dstOffset, numBytes);
|
||||||
auto srcSlice = srcBuffer->subSlice(srcOffset, numBytes);
|
auto srcSlice = srcBuffer->subSlice(srcOffset, numBytes);
|
||||||
|
|
||||||
|
if (m_barriers.isBufferDirty(srcSlice, DxvkAccess::Read)
|
||||||
|
|| m_barriers.isBufferDirty(dstSlice, DxvkAccess::Write))
|
||||||
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
VkBufferCopy bufferRegion;
|
VkBufferCopy bufferRegion;
|
||||||
bufferRegion.srcOffset = srcSlice.offset();
|
bufferRegion.srcOffset = srcSlice.offset();
|
||||||
bufferRegion.dstOffset = dstSlice.offset();
|
bufferRegion.dstOffset = dstSlice.offset();
|
||||||
@ -596,8 +608,6 @@ namespace dxvk {
|
|||||||
dstBuffer->info().stages,
|
dstBuffer->info().stages,
|
||||||
dstBuffer->info().access);
|
dstBuffer->info().access);
|
||||||
|
|
||||||
m_barriers.recordCommands(m_cmd);
|
|
||||||
|
|
||||||
m_cmd->trackResource(dstBuffer->resource());
|
m_cmd->trackResource(dstBuffer->resource());
|
||||||
m_cmd->trackResource(srcBuffer->resource());
|
m_cmd->trackResource(srcBuffer->resource());
|
||||||
}
|
}
|
||||||
@ -631,6 +641,7 @@ namespace dxvk {
|
|||||||
dstImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
|
dstImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||||
|
|
||||||
m_barriers.recordCommands(m_cmd);
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
VkBufferImageCopy copyRegion;
|
VkBufferImageCopy copyRegion;
|
||||||
@ -655,11 +666,13 @@ namespace dxvk {
|
|||||||
dstImage->info().layout,
|
dstImage->info().layout,
|
||||||
dstImage->info().stages,
|
dstImage->info().stages,
|
||||||
dstImage->info().access);
|
dstImage->info().access);
|
||||||
|
|
||||||
m_barriers.accessBuffer(srcSlice,
|
m_barriers.accessBuffer(srcSlice,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_TRANSFER_READ_BIT,
|
VK_ACCESS_TRANSFER_READ_BIT,
|
||||||
srcBuffer->info().stages,
|
srcBuffer->info().stages,
|
||||||
srcBuffer->info().access);
|
srcBuffer->info().access);
|
||||||
|
|
||||||
m_barriers.recordCommands(m_cmd);
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
m_cmd->trackResource(dstImage);
|
m_cmd->trackResource(dstImage);
|
||||||
@ -699,6 +712,7 @@ namespace dxvk {
|
|||||||
dstImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
|
dstImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL),
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT);
|
VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||||
|
|
||||||
m_barriers.accessImage(
|
m_barriers.accessImage(
|
||||||
srcImage, srcSubresourceRange,
|
srcImage, srcSubresourceRange,
|
||||||
srcImage->info().layout,
|
srcImage->info().layout,
|
||||||
@ -707,6 +721,7 @@ namespace dxvk {
|
|||||||
srcImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
|
srcImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_TRANSFER_READ_BIT);
|
VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
|
||||||
m_barriers.recordCommands(m_cmd);
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
if (dstSubresource.aspectMask == srcSubresource.aspectMask) {
|
if (dstSubresource.aspectMask == srcSubresource.aspectMask) {
|
||||||
@ -758,6 +773,7 @@ namespace dxvk {
|
|||||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_TRANSFER_READ_BIT);
|
VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
|
||||||
m_barriers.recordCommands(m_cmd);
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
bufferImageCopy.imageSubresource = dstSubresource;
|
bufferImageCopy.imageSubresource = dstSubresource;
|
||||||
@ -785,6 +801,7 @@ namespace dxvk {
|
|||||||
dstImage->info().layout,
|
dstImage->info().layout,
|
||||||
dstImage->info().stages,
|
dstImage->info().stages,
|
||||||
dstImage->info().access);
|
dstImage->info().access);
|
||||||
|
|
||||||
m_barriers.accessImage(
|
m_barriers.accessImage(
|
||||||
srcImage, srcSubresourceRange,
|
srcImage, srcSubresourceRange,
|
||||||
srcImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
|
srcImage->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL),
|
||||||
@ -793,6 +810,7 @@ namespace dxvk {
|
|||||||
srcImage->info().layout,
|
srcImage->info().layout,
|
||||||
srcImage->info().stages,
|
srcImage->info().stages,
|
||||||
srcImage->info().access);
|
srcImage->info().access);
|
||||||
|
|
||||||
m_barriers.recordCommands(m_cmd);
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
m_cmd->trackResource(dstImage);
|
m_cmd->trackResource(dstImage);
|
||||||
@ -850,11 +868,13 @@ namespace dxvk {
|
|||||||
srcImage->info().layout,
|
srcImage->info().layout,
|
||||||
srcImage->info().stages,
|
srcImage->info().stages,
|
||||||
srcImage->info().access);
|
srcImage->info().access);
|
||||||
|
|
||||||
m_barriers.accessBuffer(dstSlice,
|
m_barriers.accessBuffer(dstSlice,
|
||||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
dstBuffer->info().stages,
|
dstBuffer->info().stages,
|
||||||
dstBuffer->info().access);
|
dstBuffer->info().access);
|
||||||
|
|
||||||
m_barriers.recordCommands(m_cmd);
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
m_cmd->trackResource(srcImage);
|
m_cmd->trackResource(srcImage);
|
||||||
@ -1254,6 +1274,9 @@ namespace dxvk {
|
|||||||
// reasonably small, we do not know how much data apps may upload.
|
// reasonably small, we do not know how much data apps may upload.
|
||||||
auto physicalSlice = buffer->subSlice(offset, size);
|
auto physicalSlice = buffer->subSlice(offset, size);
|
||||||
|
|
||||||
|
if (m_barriers.isBufferDirty(physicalSlice, DxvkAccess::Write))
|
||||||
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
if ((size <= 4096) && ((size & 0x3) == 0) && ((offset & 0x3) == 0)) {
|
if ((size <= 4096) && ((size & 0x3) == 0) && ((offset & 0x3) == 0)) {
|
||||||
m_cmd->cmdUpdateBuffer(
|
m_cmd->cmdUpdateBuffer(
|
||||||
physicalSlice.handle(),
|
physicalSlice.handle(),
|
||||||
@ -1277,7 +1300,6 @@ namespace dxvk {
|
|||||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
buffer->info().stages,
|
buffer->info().stages,
|
||||||
buffer->info().access);
|
buffer->info().access);
|
||||||
m_barriers.recordCommands(m_cmd);
|
|
||||||
|
|
||||||
m_cmd->trackResource(buffer->resource());
|
m_cmd->trackResource(buffer->resource());
|
||||||
}
|
}
|
||||||
@ -1533,6 +1555,8 @@ namespace dxvk {
|
|||||||
m_flags.set(DxvkContextFlag::GpRenderPassBound);
|
m_flags.set(DxvkContextFlag::GpRenderPassBound);
|
||||||
m_flags.clr(DxvkContextFlag::GpClearRenderTargets);
|
m_flags.clr(DxvkContextFlag::GpClearRenderTargets);
|
||||||
|
|
||||||
|
m_barriers.recordCommands(m_cmd);
|
||||||
|
|
||||||
this->renderPassBindFramebuffer(
|
this->renderPassBindFramebuffer(
|
||||||
m_state.om.framebuffer,
|
m_state.om.framebuffer,
|
||||||
m_state.om.renderPassOps,
|
m_state.om.renderPassOps,
|
||||||
@ -2044,6 +2068,7 @@ namespace dxvk {
|
|||||||
this->updateComputeShaderResources();
|
this->updateComputeShaderResources();
|
||||||
this->updateComputePipelineState();
|
this->updateComputePipelineState();
|
||||||
this->updateComputeShaderDescriptors();
|
this->updateComputeShaderDescriptors();
|
||||||
|
m_barriers.recordCommands(m_cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user