From c978e62ec8616031a028f2ffa2b5bf3a6b3662cc Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Fri, 13 Jan 2023 14:20:27 +0100 Subject: [PATCH] [dxvk] Implement better priority system for background shader compiles Reduces the number of workers that perform background optimization, which may reduce the performance impact when encountering a large number of new pipelines at once. --- src/dxvk/dxvk_graphics.cpp | 2 +- src/dxvk/dxvk_pipemanager.cpp | 178 +++++++++++++++------------------- src/dxvk/dxvk_pipemanager.h | 39 +++++--- src/dxvk/dxvk_state_cache.cpp | 2 +- 4 files changed, 103 insertions(+), 118 deletions(-) diff --git a/src/dxvk/dxvk_graphics.cpp b/src/dxvk/dxvk_graphics.cpp index ea34dd01..a6cba785 100644 --- a/src/dxvk/dxvk_graphics.cpp +++ b/src/dxvk/dxvk_graphics.cpp @@ -938,7 +938,7 @@ namespace dxvk { // If necessary, compile an optimized pipeline variant if (!instance->fastHandle.load()) - m_workers->compileGraphicsPipeline(this, state); + m_workers->compileGraphicsPipeline(this, state, DxvkPipelinePriority::Low); // Only store pipelines in the state cache that cannot benefit // from pipeline libraries, or if that feature is disabled. diff --git a/src/dxvk/dxvk_pipemanager.cpp b/src/dxvk/dxvk_pipemanager.cpp index cbcc162d..b6443975 100644 --- a/src/dxvk/dxvk_pipemanager.cpp +++ b/src/dxvk/dxvk_pipemanager.cpp @@ -21,40 +21,28 @@ namespace dxvk { void DxvkPipelineWorkers::compilePipelineLibrary( DxvkShaderPipelineLibrary* library, DxvkPipelinePriority priority) { - std::unique_lock lock(m_queueLock); + std::unique_lock lock(m_lock); this->startWorkers(); m_pendingTasks += 1; - PipelineLibraryEntry e = { }; - e.pipelineLibrary = library; - - if (priority == DxvkPipelinePriority::High) { - m_queuedLibrariesPrioritized.push(e); - m_queueCondPrioritized.notify_one(); - } else { - m_queuedLibraries.push(e); - } - - m_queueCond.notify_one(); + m_buckets[uint32_t(priority)].queue.emplace(library); + notifyWorkers(priority); } void DxvkPipelineWorkers::compileGraphicsPipeline( DxvkGraphicsPipeline* pipeline, - const DxvkGraphicsPipelineStateInfo& state) { - std::unique_lock lock(m_queueLock); + const DxvkGraphicsPipelineStateInfo& state, + DxvkPipelinePriority priority) { + std::unique_lock lock(m_lock); this->startWorkers(); pipeline->acquirePipeline(); m_pendingTasks += 1; - PipelineEntry e = { }; - e.graphicsPipeline = pipeline; - e.graphicsState = state; - - m_queuedPipelines.push(e); - m_queueCond.notify_one(); + m_buckets[uint32_t(priority)].queue.emplace(pipeline, state); + notifyWorkers(priority); } @@ -64,14 +52,15 @@ namespace dxvk { void DxvkPipelineWorkers::stopWorkers() { - { std::unique_lock lock(m_queueLock); + { std::unique_lock lock(m_lock); if (!m_workersRunning) return; m_workersRunning = false; - m_queueCond.notify_all(); - m_queueCondPrioritized.notify_all(); + + for (uint32_t i = 0; i < m_buckets.size(); i++) + m_buckets[i].cond.notify_all(); } for (auto& worker : m_workers) @@ -81,8 +70,23 @@ namespace dxvk { } + void DxvkPipelineWorkers::notifyWorkers(DxvkPipelinePriority priority) { + uint32_t index = uint32_t(priority); + + // If any workers are idle in a suitable set, notify the corresponding + // condition variable. If all workers are busy anyway, we know that the + // job is going to be picked up at some point anyway. + for (uint32_t i = index; i < m_buckets.size(); i++) { + if (m_buckets[i].idleWorkers) { + m_buckets[i].cond.notify_one(); + break; + } + } + } + + void DxvkPipelineWorkers::startWorkers() { - if (!m_workersRunning) { + if (!std::exchange(m_workersRunning, true)) { // Use all available cores by default uint32_t workerCount = dxvk::thread::hardware_concurrency(); @@ -98,102 +102,74 @@ namespace dxvk { // Number of workers that can process pipeline pipelines with normal // priority. Any other workers can only build high-priority pipelines. - uint32_t npWorkerCount = m_device->canUseGraphicsPipelineLibrary() - ? std::max(((workerCount - 1) * 5) / 7, 1u) - : workerCount; - uint32_t hpWorkerCount = workerCount - npWorkerCount; + uint32_t npWorkerCount = std::max(((workerCount - 1) * 5) / 7, 1u); + uint32_t lpWorkerCount = std::max(((workerCount - 1) * 2) / 7, 1u); - Logger::info(str::format("DXVK: Using ", npWorkerCount, " + ", hpWorkerCount, " compiler threads")); - m_workers.resize(npWorkerCount + hpWorkerCount); + m_workers.reserve(workerCount); - // Set worker flag so that they don't exit immediately - m_workersRunning = true; + for (size_t i = 0; i < workerCount; i++) { + DxvkPipelinePriority priority = DxvkPipelinePriority::Normal; - for (size_t i = 0; i < m_workers.size(); i++) { - m_workers[i] = i >= npWorkerCount - ? dxvk::thread([this] { runWorkerPrioritized(); }) - : dxvk::thread([this] { runWorker(); }); - m_workers[i].set_priority(ThreadPriority::Lowest); + if (m_device->canUseGraphicsPipelineLibrary()) { + if (i >= npWorkerCount) + priority = DxvkPipelinePriority::High; + else if (i < lpWorkerCount) + priority = DxvkPipelinePriority::Low; + } + + m_workers.emplace_back([this, priority] { + runWorker(priority); + }); } + + Logger::info(str::format("DXVK: Using ", workerCount, " compiler threads")); } } - void DxvkPipelineWorkers::runWorker() { - env::setThreadName("dxvk-shader"); + void DxvkPipelineWorkers::runWorker(DxvkPipelinePriority maxPriority) { + static const std::array suffixes = { 'h', 'n', 'l' }; + + const uint32_t maxPriorityIndex = uint32_t(maxPriority); + env::setThreadName(str::format("dxvk-shader-", suffixes.at(maxPriorityIndex))); while (true) { - std::optional p; - std::optional l; + PipelineEntry entry; - { std::unique_lock lock(m_queueLock); + { std::unique_lock lock(m_lock); + auto& bucket = m_buckets[maxPriorityIndex]; - m_queueCond.wait(lock, [this] { - return !m_workersRunning - || !m_queuedLibrariesPrioritized.empty() - || !m_queuedLibraries.empty() - || !m_queuedPipelines.empty(); + bucket.idleWorkers += 1; + bucket.cond.wait(lock, [this, maxPriorityIndex, &entry] { + // Attempt to fetch a work item from the + // highest-priority queue that is not empty + for (uint32_t i = 0; i <= maxPriorityIndex; i++) { + if (!m_buckets[i].queue.empty()) { + entry = m_buckets[i].queue.front(); + m_buckets[i].queue.pop(); + return true; + } + } + + return !m_workersRunning; }); - if (!m_workersRunning) { - // Skip pending work, exiting early is - // more important in this case. - break; - } else if (!m_queuedLibrariesPrioritized.empty()) { - l = m_queuedLibrariesPrioritized.front(); - m_queuedLibrariesPrioritized.pop(); - } else if (!m_queuedLibraries.empty()) { - l = m_queuedLibraries.front(); - m_queuedLibraries.pop(); - } else if (!m_queuedPipelines.empty()) { - p = m_queuedPipelines.front(); - m_queuedPipelines.pop(); - } - } - - if (l) { - if (l->pipelineLibrary) - l->pipelineLibrary->compilePipeline(); - - m_pendingTasks -= 1; - } - - if (p) { - if (p->graphicsPipeline) { - p->graphicsPipeline->compilePipeline(p->graphicsState); - p->graphicsPipeline->releasePipeline(); - } - - m_pendingTasks -= 1; - } - } - } - - - void DxvkPipelineWorkers::runWorkerPrioritized() { - env::setThreadName("dxvk-shader-p"); - - while (true) { - PipelineLibraryEntry l = { }; - - { std::unique_lock lock(m_queueLock); - - m_queueCondPrioritized.wait(lock, [this] { - return !m_workersRunning - || !m_queuedLibrariesPrioritized.empty(); - }); + bucket.idleWorkers -= 1; + // Skip pending work, exiting early is + // more important in this case. if (!m_workersRunning) break; - - l = m_queuedLibrariesPrioritized.front(); - m_queuedLibrariesPrioritized.pop(); } - if (l.pipelineLibrary) - l.pipelineLibrary->compilePipeline(); - - m_pendingTasks -= 1; + if (entry.pipelineLibrary) { + entry.pipelineLibrary->compilePipeline(); + m_pendingTasks -= 1; + } else if (entry.graphicsPipeline) { + entry.graphicsPipeline->compilePipeline(entry.graphicsState); + entry.graphicsPipeline->releasePipeline(); + m_pendingTasks -= 1; + } } } diff --git a/src/dxvk/dxvk_pipemanager.h b/src/dxvk/dxvk_pipemanager.h index 49768d4b..033e37ee 100644 --- a/src/dxvk/dxvk_pipemanager.h +++ b/src/dxvk/dxvk_pipemanager.h @@ -38,8 +38,9 @@ namespace dxvk { * \brief Pipeline priority */ enum class DxvkPipelinePriority : uint32_t { - Normal = 0, - High = 1, + High = 0, + Normal = 1, + Low = 2, }; /** @@ -78,7 +79,8 @@ namespace dxvk { */ void compileGraphicsPipeline( DxvkGraphicsPipeline* pipeline, - const DxvkGraphicsPipelineStateInfo& state); + const DxvkGraphicsPipelineStateInfo& state, + DxvkPipelinePriority priority); /** * \brief Checks whether workers are busy @@ -97,34 +99,41 @@ namespace dxvk { private: struct PipelineEntry { + PipelineEntry() + : pipelineLibrary(nullptr), graphicsPipeline(nullptr) { } + + PipelineEntry(DxvkShaderPipelineLibrary* l) + : pipelineLibrary(l), graphicsPipeline(nullptr) { } + + PipelineEntry(DxvkGraphicsPipeline* p, const DxvkGraphicsPipelineStateInfo& s) + : pipelineLibrary(nullptr), graphicsPipeline(p), graphicsState(s) { } + + DxvkShaderPipelineLibrary* pipelineLibrary; DxvkGraphicsPipeline* graphicsPipeline; DxvkGraphicsPipelineStateInfo graphicsState; }; - struct PipelineLibraryEntry { - DxvkShaderPipelineLibrary* pipelineLibrary; + struct PipelineBucket { + dxvk::condition_variable cond; + std::queue queue; + uint32_t idleWorkers = 0; }; DxvkDevice* m_device; std::atomic m_pendingTasks = { 0ull }; - dxvk::mutex m_queueLock; - dxvk::condition_variable m_queueCond; - dxvk::condition_variable m_queueCondPrioritized; - - std::queue m_queuedLibrariesPrioritized; - std::queue m_queuedLibraries; - std::queue m_queuedPipelines; + dxvk::mutex m_lock; + std::array m_buckets; bool m_workersRunning = false; std::vector m_workers; + void notifyWorkers(DxvkPipelinePriority priority); + void startWorkers(); - void runWorker(); - - void runWorkerPrioritized(); + void runWorker(DxvkPipelinePriority maxPriority); }; diff --git a/src/dxvk/dxvk_state_cache.cpp b/src/dxvk/dxvk_state_cache.cpp index b0c1f131..1bd31e67 100644 --- a/src/dxvk/dxvk_state_cache.cpp +++ b/src/dxvk/dxvk_state_cache.cpp @@ -453,7 +453,7 @@ namespace dxvk { if (!pipeline) pipeline = m_pipeManager->createGraphicsPipeline(item.gp); - m_pipeWorkers->compileGraphicsPipeline(pipeline, entry.gpState); + m_pipeWorkers->compileGraphicsPipeline(pipeline, entry.gpState, DxvkPipelinePriority::Normal); } break; case DxvkStateCacheEntryType::PipelineLibrary: {