1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-21 13:54:18 +01:00

Optimized command submission

Command submission now does not synchronize with the device every single
time. Instead, the command list and the fence that was created for it are
added to a queue. A separate thread will then wait for the execution to
complete and return the command list to the device.
This commit is contained in:
Philip Rebohle 2017-12-16 18:10:55 +01:00
parent d5a49698b4
commit 1fe5b74762
7 changed files with 143 additions and 19 deletions

View File

@ -12,7 +12,8 @@ namespace dxvk {
m_features (features), m_features (features),
m_memory (new DxvkMemoryAllocator(adapter, vkd)), m_memory (new DxvkMemoryAllocator(adapter, vkd)),
m_renderPassPool (new DxvkRenderPassPool (vkd)), m_renderPassPool (new DxvkRenderPassPool (vkd)),
m_pipelineManager (new DxvkPipelineManager(vkd)) { m_pipelineManager (new DxvkPipelineManager(vkd)),
m_submissionQueue (this) {
m_vkd->vkGetDeviceQueue(m_vkd->device(), m_vkd->vkGetDeviceQueue(m_vkd->device(),
m_adapter->graphicsQueueFamily(), 0, m_adapter->graphicsQueueFamily(), 0,
&m_graphicsQueue); &m_graphicsQueue);
@ -214,12 +215,8 @@ namespace dxvk {
waitSemaphore, wakeSemaphore, fence->handle()); waitSemaphore, wakeSemaphore, fence->handle());
} }
// TODO Delay synchronization by putting these into a ring buffer // Add this to the set of running submissions
fence->wait(std::numeric_limits<uint64_t>::max()); m_submissionQueue.submit(fence, commandList);
commandList->reset();
// FIXME this must go away once the ring buffer is implemented
m_recycledCommandLists.returnObject(commandList);
m_statCounters.increment(DxvkStat::DevQueueSubmissions, 1); m_statCounters.increment(DxvkStat::DevQueueSubmissions, 1);
return fence; return fence;
} }
@ -232,4 +229,9 @@ namespace dxvk {
throw DxvkError("DxvkDevice::waitForIdle: Operation failed"); throw DxvkError("DxvkDevice::waitForIdle: Operation failed");
} }
void DxvkDevice::recycleCommandList(const Rc<DxvkCommandList>& cmdList) {
m_recycledCommandLists.returnObject(cmdList);
}
} }

View File

@ -9,6 +9,7 @@
#include "dxvk_image.h" #include "dxvk_image.h"
#include "dxvk_memory.h" #include "dxvk_memory.h"
#include "dxvk_pipemanager.h" #include "dxvk_pipemanager.h"
#include "dxvk_queue.h"
#include "dxvk_recycler.h" #include "dxvk_recycler.h"
#include "dxvk_renderpass.h" #include "dxvk_renderpass.h"
#include "dxvk_sampler.h" #include "dxvk_sampler.h"
@ -30,6 +31,8 @@ namespace dxvk {
* contexts. Multiple contexts can be created for a device. * contexts. Multiple contexts can be created for a device.
*/ */
class DxvkDevice : public RcObject { class DxvkDevice : public RcObject {
friend class DxvkSubmissionQueue;
constexpr static VkDeviceSize DefaultStagingBufferSize = 64 * 1024 * 1024; constexpr static VkDeviceSize DefaultStagingBufferSize = 64 * 1024 * 1024;
public: public:
@ -308,6 +311,11 @@ namespace dxvk {
DxvkStatCounters m_statCounters; DxvkStatCounters m_statCounters;
DxvkSubmissionQueue m_submissionQueue;
void recycleCommandList(
const Rc<DxvkCommandList>& cmdList);
}; };
} }

View File

@ -58,23 +58,20 @@ namespace dxvk {
: m_heap (heap), : m_heap (heap),
m_memory(memory), m_memory(memory),
m_mapPtr(mapPtr), m_mapPtr(mapPtr),
m_size (size), m_size (size) {
m_free (size) {
TRACE(this);
// Mark the entire chunk as free // Mark the entire chunk as free
m_freeList.push_back(FreeSlice { 0, size }); m_freeList.push_back(FreeSlice { 0, size });
} }
DxvkMemoryChunk::~DxvkMemoryChunk() { DxvkMemoryChunk::~DxvkMemoryChunk() {
TRACE(this);
m_heap->freeDeviceMemory(m_memory); m_heap->freeDeviceMemory(m_memory);
} }
DxvkMemory DxvkMemoryChunk::alloc(VkDeviceSize size, VkDeviceSize align) { DxvkMemory DxvkMemoryChunk::alloc(VkDeviceSize size, VkDeviceSize align) {
// Fast exit if the chunk is full already // If the chunk is full, return
if (size > m_free) if (m_freeList.size() == 0)
return DxvkMemory(); return DxvkMemory();
// Select the slice to allocate from in a worst-fit // Select the slice to allocate from in a worst-fit
@ -82,8 +79,12 @@ namespace dxvk {
auto bestSlice = m_freeList.begin(); auto bestSlice = m_freeList.begin();
for (auto slice = m_freeList.begin(); slice != m_freeList.end(); slice++) { for (auto slice = m_freeList.begin(); slice != m_freeList.end(); slice++) {
if (slice->length > bestSlice->length) if (slice->length == size) {
bestSlice = slice; bestSlice = slice;
break;
} else if (slice->length > bestSlice->length) {
bestSlice = slice;
}
} }
// We need to align the allocation to the requested alignment // We need to align the allocation to the requested alignment
@ -99,7 +100,6 @@ namespace dxvk {
// We can use this slice, but we'll have to add // We can use this slice, but we'll have to add
// the unused parts of it back to the free list. // the unused parts of it back to the free list.
m_freeList.erase(bestSlice); m_freeList.erase(bestSlice);
m_free -= size;
if (allocStart != sliceStart) if (allocStart != sliceStart)
m_freeList.push_back({ sliceStart, allocStart - sliceStart }); m_freeList.push_back({ sliceStart, allocStart - sliceStart });
@ -108,6 +108,7 @@ namespace dxvk {
m_freeList.push_back({ allocEnd, sliceEnd - allocEnd }); m_freeList.push_back({ allocEnd, sliceEnd - allocEnd });
// Create the memory object with the aligned slice // Create the memory object with the aligned slice
m_delta++;
return DxvkMemory(this, m_heap, return DxvkMemory(this, m_heap,
m_memory, allocStart, allocEnd - allocStart, m_memory, allocStart, allocEnd - allocStart,
reinterpret_cast<char*>(m_mapPtr) + allocStart); reinterpret_cast<char*>(m_mapPtr) + allocStart);
@ -117,8 +118,6 @@ namespace dxvk {
void DxvkMemoryChunk::free( void DxvkMemoryChunk::free(
VkDeviceSize offset, VkDeviceSize offset,
VkDeviceSize length) { VkDeviceSize length) {
m_free += length;
// Remove adjacent entries from the free list and then add // Remove adjacent entries from the free list and then add
// a new slice that covers all those entries. Without doing // a new slice that covers all those entries. Without doing
// so, the slice could not be reused for larger allocations. // so, the slice could not be reused for larger allocations.
@ -137,6 +136,7 @@ namespace dxvk {
} }
} }
m_delta--;
m_freeList.push_back({ offset, length }); m_freeList.push_back({ offset, length });
} }

View File

@ -130,8 +130,7 @@ namespace dxvk {
VkDeviceMemory const m_memory; VkDeviceMemory const m_memory;
void* const m_mapPtr; void* const m_mapPtr;
VkDeviceSize const m_size; VkDeviceSize const m_size;
VkDeviceSize m_free = 0; size_t m_delta = 0;
std::vector<FreeSlice> m_freeList; std::vector<FreeSlice> m_freeList;
}; };

62
src/dxvk/dxvk_queue.cpp Normal file
View File

@ -0,0 +1,62 @@
#include "dxvk_device.h"
#include "dxvk_queue.h"
namespace dxvk {
DxvkSubmissionQueue::DxvkSubmissionQueue(DxvkDevice* device)
: m_device(device),
m_thread([this] () { this->threadFunc(); }) {
}
DxvkSubmissionQueue::~DxvkSubmissionQueue() {
m_stopped.store(true);
m_condOnAdd.notify_one();
m_thread.join();
}
void DxvkSubmissionQueue::submit(
const Rc<DxvkFence>& fence,
const Rc<DxvkCommandList>& cmdList) {
{ std::unique_lock<std::mutex> lock(m_mutex);
m_condOnTake.wait(lock, [this] {
return m_entries.size() < 4;
});
m_entries.push({ fence, cmdList });
}
m_condOnAdd.notify_one();
}
void DxvkSubmissionQueue::threadFunc() {
while (!m_stopped.load()) {
Entry entry;
{ std::unique_lock<std::mutex> lock(m_mutex);
m_condOnAdd.wait(lock, [this] {
return m_stopped.load() || (m_entries.size() != 0);
});
if (m_entries.size() != 0) {
entry = std::move(m_entries.front());
m_entries.pop();
}
}
m_condOnTake.notify_one();
if (entry.fence != nullptr) {
entry.fence->wait(std::numeric_limits<uint64_t>::max());
entry.cmdList->reset();
m_device->recycleCommandList(entry.cmdList);
}
}
}
}

52
src/dxvk/dxvk_queue.h Normal file
View File

@ -0,0 +1,52 @@
#pragma once
#include <condition_variable>
#include <mutex>
#include <queue>
#include <thread>
#include "dxvk_cmdlist.h"
#include "dxvk_sync.h"
namespace dxvk {
class DxvkDevice;
/**
* \brief Submission queue
*
*
*/
class DxvkSubmissionQueue {
public:
DxvkSubmissionQueue(DxvkDevice* device);
~DxvkSubmissionQueue();
void submit(
const Rc<DxvkFence>& fence,
const Rc<DxvkCommandList>& cmdList);
private:
struct Entry {
Rc<DxvkFence> fence;
Rc<DxvkCommandList> cmdList;
};
DxvkDevice* m_device;
std::atomic<bool> m_stopped = { false };
std::mutex m_mutex;
std::condition_variable m_condOnAdd;
std::condition_variable m_condOnTake;
std::queue<Entry> m_entries;
std::thread m_thread;
void threadFunc();
};
}

View File

@ -18,6 +18,7 @@ dxvk_src = files([
'dxvk_memory.cpp', 'dxvk_memory.cpp',
'dxvk_pipelayout.cpp', 'dxvk_pipelayout.cpp',
'dxvk_pipemanager.cpp', 'dxvk_pipemanager.cpp',
'dxvk_queue.cpp',
'dxvk_renderpass.cpp', 'dxvk_renderpass.cpp',
'dxvk_resource.cpp', 'dxvk_resource.cpp',
'dxvk_sampler.cpp', 'dxvk_sampler.cpp',