From 1fe5b7476253d5075f879ee7d4a15c12e0021aff Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 16 Dec 2017 18:10:55 +0100 Subject: [PATCH] Optimized command submission Command submission now does not synchronize with the device every single time. Instead, the command list and the fence that was created for it are added to a queue. A separate thread will then wait for the execution to complete and return the command list to the device. --- src/dxvk/dxvk_device.cpp | 16 ++++++----- src/dxvk/dxvk_device.h | 8 ++++++ src/dxvk/dxvk_memory.cpp | 20 ++++++------- src/dxvk/dxvk_memory.h | 3 +- src/dxvk/dxvk_queue.cpp | 62 ++++++++++++++++++++++++++++++++++++++++ src/dxvk/dxvk_queue.h | 52 +++++++++++++++++++++++++++++++++ src/dxvk/meson.build | 1 + 7 files changed, 143 insertions(+), 19 deletions(-) create mode 100644 src/dxvk/dxvk_queue.cpp create mode 100644 src/dxvk/dxvk_queue.h diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 72a69eb0b..a3950de7c 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -12,7 +12,8 @@ namespace dxvk { m_features (features), m_memory (new DxvkMemoryAllocator(adapter, vkd)), m_renderPassPool (new DxvkRenderPassPool (vkd)), - m_pipelineManager (new DxvkPipelineManager(vkd)) { + m_pipelineManager (new DxvkPipelineManager(vkd)), + m_submissionQueue (this) { m_vkd->vkGetDeviceQueue(m_vkd->device(), m_adapter->graphicsQueueFamily(), 0, &m_graphicsQueue); @@ -214,12 +215,8 @@ namespace dxvk { waitSemaphore, wakeSemaphore, fence->handle()); } - // TODO Delay synchronization by putting these into a ring buffer - fence->wait(std::numeric_limits::max()); - commandList->reset(); - - // FIXME this must go away once the ring buffer is implemented - m_recycledCommandLists.returnObject(commandList); + // Add this to the set of running submissions + m_submissionQueue.submit(fence, commandList); m_statCounters.increment(DxvkStat::DevQueueSubmissions, 1); return fence; } @@ -232,4 +229,9 @@ namespace dxvk { throw DxvkError("DxvkDevice::waitForIdle: Operation failed"); } + + void DxvkDevice::recycleCommandList(const Rc& cmdList) { + m_recycledCommandLists.returnObject(cmdList); + } + } \ No newline at end of file diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index c85c9c2b8..852a3d013 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -9,6 +9,7 @@ #include "dxvk_image.h" #include "dxvk_memory.h" #include "dxvk_pipemanager.h" +#include "dxvk_queue.h" #include "dxvk_recycler.h" #include "dxvk_renderpass.h" #include "dxvk_sampler.h" @@ -30,6 +31,8 @@ namespace dxvk { * contexts. Multiple contexts can be created for a device. */ class DxvkDevice : public RcObject { + friend class DxvkSubmissionQueue; + constexpr static VkDeviceSize DefaultStagingBufferSize = 64 * 1024 * 1024; public: @@ -308,6 +311,11 @@ namespace dxvk { DxvkStatCounters m_statCounters; + DxvkSubmissionQueue m_submissionQueue; + + void recycleCommandList( + const Rc& cmdList); + }; } \ No newline at end of file diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index ac60622ec..183704ea1 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -58,23 +58,20 @@ namespace dxvk { : m_heap (heap), m_memory(memory), m_mapPtr(mapPtr), - m_size (size), - m_free (size) { - TRACE(this); + m_size (size) { // Mark the entire chunk as free m_freeList.push_back(FreeSlice { 0, size }); } DxvkMemoryChunk::~DxvkMemoryChunk() { - TRACE(this); m_heap->freeDeviceMemory(m_memory); } DxvkMemory DxvkMemoryChunk::alloc(VkDeviceSize size, VkDeviceSize align) { - // Fast exit if the chunk is full already - if (size > m_free) + // If the chunk is full, return + if (m_freeList.size() == 0) return DxvkMemory(); // Select the slice to allocate from in a worst-fit @@ -82,8 +79,12 @@ namespace dxvk { auto bestSlice = m_freeList.begin(); for (auto slice = m_freeList.begin(); slice != m_freeList.end(); slice++) { - if (slice->length > bestSlice->length) + if (slice->length == size) { bestSlice = slice; + break; + } else if (slice->length > bestSlice->length) { + bestSlice = slice; + } } // We need to align the allocation to the requested alignment @@ -99,7 +100,6 @@ namespace dxvk { // We can use this slice, but we'll have to add // the unused parts of it back to the free list. m_freeList.erase(bestSlice); - m_free -= size; if (allocStart != sliceStart) m_freeList.push_back({ sliceStart, allocStart - sliceStart }); @@ -108,6 +108,7 @@ namespace dxvk { m_freeList.push_back({ allocEnd, sliceEnd - allocEnd }); // Create the memory object with the aligned slice + m_delta++; return DxvkMemory(this, m_heap, m_memory, allocStart, allocEnd - allocStart, reinterpret_cast(m_mapPtr) + allocStart); @@ -117,8 +118,6 @@ namespace dxvk { void DxvkMemoryChunk::free( VkDeviceSize offset, VkDeviceSize length) { - m_free += length; - // Remove adjacent entries from the free list and then add // a new slice that covers all those entries. Without doing // so, the slice could not be reused for larger allocations. @@ -137,6 +136,7 @@ namespace dxvk { } } + m_delta--; m_freeList.push_back({ offset, length }); } diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index 4dda2b149..2227e9316 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -130,8 +130,7 @@ namespace dxvk { VkDeviceMemory const m_memory; void* const m_mapPtr; VkDeviceSize const m_size; - VkDeviceSize m_free = 0; - + size_t m_delta = 0; std::vector m_freeList; }; diff --git a/src/dxvk/dxvk_queue.cpp b/src/dxvk/dxvk_queue.cpp new file mode 100644 index 000000000..e7d71b94c --- /dev/null +++ b/src/dxvk/dxvk_queue.cpp @@ -0,0 +1,62 @@ +#include "dxvk_device.h" +#include "dxvk_queue.h" + +namespace dxvk { + + DxvkSubmissionQueue::DxvkSubmissionQueue(DxvkDevice* device) + : m_device(device), + m_thread([this] () { this->threadFunc(); }) { + + } + + + DxvkSubmissionQueue::~DxvkSubmissionQueue() { + m_stopped.store(true); + m_condOnAdd.notify_one(); + m_thread.join(); + } + + + void DxvkSubmissionQueue::submit( + const Rc& fence, + const Rc& cmdList) { + { std::unique_lock lock(m_mutex); + + m_condOnTake.wait(lock, [this] { + return m_entries.size() < 4; + }); + + m_entries.push({ fence, cmdList }); + } + + m_condOnAdd.notify_one(); + } + + + void DxvkSubmissionQueue::threadFunc() { + while (!m_stopped.load()) { + Entry entry; + + { std::unique_lock lock(m_mutex); + + m_condOnAdd.wait(lock, [this] { + return m_stopped.load() || (m_entries.size() != 0); + }); + + if (m_entries.size() != 0) { + entry = std::move(m_entries.front()); + m_entries.pop(); + } + } + + m_condOnTake.notify_one(); + + if (entry.fence != nullptr) { + entry.fence->wait(std::numeric_limits::max()); + entry.cmdList->reset(); + m_device->recycleCommandList(entry.cmdList); + } + } + } + +} \ No newline at end of file diff --git a/src/dxvk/dxvk_queue.h b/src/dxvk/dxvk_queue.h new file mode 100644 index 000000000..69941d5d5 --- /dev/null +++ b/src/dxvk/dxvk_queue.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include + +#include "dxvk_cmdlist.h" +#include "dxvk_sync.h" + +namespace dxvk { + + class DxvkDevice; + + /** + * \brief Submission queue + * + * + */ + class DxvkSubmissionQueue { + + public: + + DxvkSubmissionQueue(DxvkDevice* device); + ~DxvkSubmissionQueue(); + + void submit( + const Rc& fence, + const Rc& cmdList); + + private: + + struct Entry { + Rc fence; + Rc cmdList; + }; + + DxvkDevice* m_device; + + std::atomic m_stopped = { false }; + + std::mutex m_mutex; + std::condition_variable m_condOnAdd; + std::condition_variable m_condOnTake; + std::queue m_entries; + std::thread m_thread; + + void threadFunc(); + + }; + +} \ No newline at end of file diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index cb20499ee..d91c9916e 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -18,6 +18,7 @@ dxvk_src = files([ 'dxvk_memory.cpp', 'dxvk_pipelayout.cpp', 'dxvk_pipemanager.cpp', + 'dxvk_queue.cpp', 'dxvk_renderpass.cpp', 'dxvk_resource.cpp', 'dxvk_sampler.cpp',