diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp index 1db26cca..2d8e313b 100644 --- a/src/dxvk/dxvk_cmdlist.cpp +++ b/src/dxvk/dxvk_cmdlist.cpp @@ -139,16 +139,19 @@ namespace dxvk { void DxvkCommandList::stagedBufferCopy( + DxvkCmdBuffer cmdBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const DxvkStagingBufferSlice& dataSlice) { + m_cmdBuffersUsed.set(cmdBuffer); + VkBufferCopy region; region.srcOffset = dataSlice.offset; region.dstOffset = dstOffset; region.size = dataSize; - m_vkd->vkCmdCopyBuffer(m_execBuffer, + m_vkd->vkCmdCopyBuffer(getCmdBuffer(cmdBuffer), dataSlice.buffer, dstBuffer, 1, ®ion); } diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index 2bf4329b..fa8387c3 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -620,11 +620,14 @@ namespace dxvk { void cmdUpdateBuffer( + DxvkCmdBuffer cmdBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData) { - m_vkd->vkCmdUpdateBuffer(m_execBuffer, + m_cmdBuffersUsed.set(cmdBuffer); + + m_vkd->vkCmdUpdateBuffer(getCmdBuffer(cmdBuffer), dstBuffer, dstOffset, dataSize, pData); } @@ -692,6 +695,7 @@ namespace dxvk { void stagedBufferCopy( + DxvkCmdBuffer cmdBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 494c7852..3f4ff48f 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -28,7 +28,7 @@ namespace dxvk { m_queryManager(gpuQueryPool) { } - + DxvkContext::~DxvkContext() { @@ -68,6 +68,7 @@ namespace dxvk { Rc DxvkContext::endRecording() { this->spillRenderPass(); + m_transfers.recordCommands(m_cmd); m_barriers.recordCommands(m_cmd); m_cmd->endRecording(); @@ -342,6 +343,7 @@ namespace dxvk { data[i] = value; m_cmd->cmdUpdateBuffer( + DxvkCmdBuffer::ExecBuffer, slice.handle, slice.offset, slice.length, @@ -1719,20 +1721,39 @@ namespace dxvk { VkDeviceSize offset, VkDeviceSize size, const void* data) { - this->spillRenderPass(); + bool replaceBuffer = (size == buffer->info().size) + && (size <= (1 << 20)) /* 1 MB */ + && (m_flags.test(DxvkContextFlag::GpRenderPassBound)); + DxvkBufferSliceHandle bufferSlice; + DxvkCmdBuffer cmdBuffer; + + if (replaceBuffer) { + // As an optimization, allocate a free slice and perform + // the copy in the initialization command buffer instead + // interrupting the render pass and stalling the pipeline. + bufferSlice = buffer->allocSlice(); + cmdBuffer = DxvkCmdBuffer::InitBuffer; + + this->invalidateBuffer(buffer, bufferSlice); + } else { + this->spillRenderPass(); + + bufferSlice = buffer->getSliceHandle(offset, size); + cmdBuffer = DxvkCmdBuffer::ExecBuffer; + + if (m_barriers.isBufferDirty(bufferSlice, DxvkAccess::Write)) + m_barriers.recordCommands(m_cmd); + } + // Vulkan specifies that small amounts of data (up to 64kB) can // be copied to a buffer directly if the size is a multiple of // four. Anything else must be copied through a staging buffer. // We'll limit the size to 4kB in order to keep command buffers // reasonably small, we do not know how much data apps may upload. - auto bufferSlice = buffer->getSliceHandle(offset, size); - - if (m_barriers.isBufferDirty(bufferSlice, DxvkAccess::Write)) - m_barriers.recordCommands(m_cmd); - if ((size <= 4096) && ((size & 0x3) == 0) && ((offset & 0x3) == 0)) { m_cmd->cmdUpdateBuffer( + cmdBuffer, bufferSlice.handle, bufferSlice.offset, bufferSlice.length, @@ -1742,13 +1763,18 @@ namespace dxvk { std::memcpy(slice.mapPtr, data, size); m_cmd->stagedBufferCopy( + cmdBuffer, bufferSlice.handle, bufferSlice.offset, bufferSlice.length, slice); } - m_barriers.accessBuffer( + auto& barriers = replaceBuffer + ? m_transfers + : m_barriers; + + barriers.accessBuffer( bufferSlice, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,