From c3a53127d746a597d2cfab68d833d134976d6fbd Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 11 Aug 2022 02:37:36 +0200 Subject: [PATCH] [dxvk] Add high-priority queue for shader compiles As well as an API to queue shaders as high priority. --- src/dxvk/dxvk_device.cpp | 6 ++ src/dxvk/dxvk_device.h | 7 +++ src/dxvk/dxvk_pipemanager.cpp | 109 ++++++++++++++++++++++++++++------ src/dxvk/dxvk_pipemanager.h | 34 +++++++++-- 4 files changed, 133 insertions(+), 23 deletions(-) diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index d8bd98a0..08289cee 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -213,6 +213,12 @@ namespace dxvk { } + void DxvkDevice::requestCompileShader( + const Rc& shader) { + m_objects.pipelineManager().requestCompileShader(shader); + } + + void DxvkDevice::presentImage( const Rc& presenter, DxvkSubmitStatus* status) { diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index f4785010..e8f44ee6 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -380,6 +380,13 @@ namespace dxvk { void registerShader( const Rc& shader); + /** + * \brief Prioritizes compilation of a given shader + * \param [in] shader Shader to start compiling + */ + void requestCompileShader( + const Rc& shader); + /** * \brief Presents a swap chain image * diff --git a/src/dxvk/dxvk_pipemanager.cpp b/src/dxvk/dxvk_pipemanager.cpp index 426443d7..ffd52f2b 100644 --- a/src/dxvk/dxvk_pipemanager.cpp +++ b/src/dxvk/dxvk_pipemanager.cpp @@ -7,17 +7,9 @@ namespace dxvk { DxvkPipelineWorkers::DxvkPipelineWorkers( - DxvkDevice* device) { - // Use a reasonably large number of threads for compiling, but - // leave some cores to the application to avoid excessive stutter - uint32_t numCpuCores = dxvk::thread::hardware_concurrency(); - m_workerCount = ((std::max(1u, numCpuCores) - 1) * 5) / 7; + DxvkDevice* device) + : m_device(device) { - if (m_workerCount < 1) m_workerCount = 1; - if (m_workerCount > 32) m_workerCount = 32; - - if (device->config().numCompilerThreads > 0) - m_workerCount = device->config().numCompilerThreads; } @@ -27,7 +19,8 @@ namespace dxvk { void DxvkPipelineWorkers::compilePipelineLibrary( - DxvkShaderPipelineLibrary* library) { + DxvkShaderPipelineLibrary* library, + DxvkPipelinePriority priority) { std::unique_lock lock(m_queueLock); this->startWorkers(); @@ -36,7 +29,13 @@ namespace dxvk { PipelineLibraryEntry e = { }; e.pipelineLibrary = library; - m_queuedLibraries.push(e); + if (priority == DxvkPipelinePriority::High) { + m_queuedLibrariesPrioritized.push(e); + m_queueCondPrioritized.notify_one(); + } else { + m_queuedLibraries.push(e); + } + m_queueCond.notify_one(); } @@ -100,14 +99,37 @@ namespace dxvk { void DxvkPipelineWorkers::startWorkers() { if (!m_workersRunning) { + // Use all available cores by default + uint32_t workerCount = dxvk::thread::hardware_concurrency(); + + if (workerCount < 1) workerCount = 1; + if (workerCount > 64) workerCount = 64; + + // Reduce worker count on 32-bit to save adderss space + if (env::is32BitHostPlatform()) + workerCount = std::min(workerCount, 16u); + + if (m_device->config().numCompilerThreads > 0) + workerCount = m_device->config().numCompilerThreads; + + // Number of workers that can process pipeline pipelines with normal + // priority. Any other workers can only build high-priority pipelines. + uint32_t npWorkerCount = m_device->canUseGraphicsPipelineLibrary() + ? std::max(((workerCount - 1) * 5) / 7, 1u) + : workerCount; + uint32_t hpWorkerCount = workerCount - npWorkerCount; + + Logger::info(str::format("DXVK: Using ", npWorkerCount, " + ", hpWorkerCount, " compiler threads")); + m_workers.resize(npWorkerCount + hpWorkerCount); + + // Set worker flag so that they don't exit immediately m_workersRunning = true; - Logger::info(str::format("DXVK: Using ", m_workerCount, " compiler threads")); - m_workers.resize(m_workerCount); - - for (auto& worker : m_workers) { - worker = dxvk::thread([this] { runWorker(); }); - worker.set_priority(ThreadPriority::Lowest); + for (size_t i = 0; i < m_workers.size(); i++) { + m_workers[i] = i >= npWorkerCount + ? dxvk::thread([this] { runWorkerPrioritized(); }) + : dxvk::thread([this] { runWorker(); }); + m_workers[i].set_priority(ThreadPriority::Lowest); } } } @@ -124,6 +146,7 @@ namespace dxvk { m_queueCond.wait(lock, [this] { return !m_workersRunning + || !m_queuedLibrariesPrioritized.empty() || !m_queuedLibraries.empty() || !m_queuedPipelines.empty(); }); @@ -132,6 +155,9 @@ namespace dxvk { // Skip pending work, exiting early is // more important in this case. break; + } else if (!m_queuedLibrariesPrioritized.empty()) { + l = m_queuedLibrariesPrioritized.front(); + m_queuedLibrariesPrioritized.pop(); } else if (!m_queuedLibraries.empty()) { l = m_queuedLibraries.front(); m_queuedLibraries.pop(); @@ -162,6 +188,34 @@ namespace dxvk { } + void DxvkPipelineWorkers::runWorkerPrioritized() { + env::setThreadName("dxvk-shader-p"); + + while (true) { + PipelineLibraryEntry l = { }; + + { std::unique_lock lock(m_queueLock); + + m_queueCondPrioritized.wait(lock, [this] { + return !m_workersRunning + || !m_queuedLibrariesPrioritized.empty(); + }); + + if (!m_workersRunning) + break; + + l = m_queuedLibrariesPrioritized.front(); + m_queuedLibrariesPrioritized.pop(); + } + + if (l.pipelineLibrary) + l.pipelineLibrary->compilePipeline(); + + m_pendingTasks -= 1; + } + } + + DxvkPipelineManager::DxvkPipelineManager( DxvkDevice* device) : m_device (device), @@ -285,13 +339,30 @@ namespace dxvk { const Rc& shader) { if (canPrecompileShader(shader)) { auto library = createPipelineLibrary(shader); - m_workers.compilePipelineLibrary(library); + m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::Normal); } m_stateCache.registerShader(shader); } + void DxvkPipelineManager::requestCompileShader( + const Rc& shader) { + if (!shader->needsLibraryCompile()) + return; + + // Dispatch high-priority compile job + auto library = findPipelineLibrary(shader); + + if (library) + m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::High); + + // Notify immediately so that this only gets called + // once, even if compilation does ot start immediately + shader->notifyLibraryCompile(); + } + + DxvkPipelineCount DxvkPipelineManager::getPipelineCount() const { DxvkPipelineCount result; result.numGraphicsPipelines = m_stats.numGraphicsPipelines.load(); diff --git a/src/dxvk/dxvk_pipemanager.h b/src/dxvk/dxvk_pipemanager.h index 5661dd86..b71751dd 100644 --- a/src/dxvk/dxvk_pipemanager.h +++ b/src/dxvk/dxvk_pipemanager.h @@ -34,6 +34,14 @@ namespace dxvk { std::atomic numComputePipelines = { 0u }; }; + /** + * \brief Pipeline priority + */ + enum class DxvkPipelinePriority : uint32_t { + Normal = 0, + High = 1, + }; + /** * \brief Pipeline manager worker threads * @@ -56,9 +64,11 @@ namespace dxvk { * the pipeline with default compile arguments. * Note that pipeline libraries are high priority. * \param [in] library The pipeline library + * \param [in] priority Pipeline priority */ void compilePipelineLibrary( - DxvkShaderPipelineLibrary* library); + DxvkShaderPipelineLibrary* library, + DxvkPipelinePriority priority); /** * \brief Compiles an optimized compute pipeline @@ -107,15 +117,18 @@ namespace dxvk { DxvkShaderPipelineLibrary* pipelineLibrary; }; + DxvkDevice* m_device; + std::atomic m_pendingTasks = { 0ull }; dxvk::mutex m_queueLock; dxvk::condition_variable m_queueCond; + dxvk::condition_variable m_queueCondPrioritized; + std::queue m_queuedLibrariesPrioritized; std::queue m_queuedLibraries; std::queue m_queuedPipelines; - uint32_t m_workerCount = 0; bool m_workersRunning = false; std::vector m_workers; @@ -123,6 +136,8 @@ namespace dxvk { void runWorker(); + void runWorkerPrioritized(); + }; @@ -188,7 +203,7 @@ namespace dxvk { DxvkGraphicsPipelineFragmentOutputLibrary* createFragmentOutputLibrary( const DxvkGraphicsPipelineFragmentOutputState& state); - /* + /** * \brief Registers a shader * * Starts compiling pipelines asynchronously @@ -198,7 +213,18 @@ namespace dxvk { */ void registerShader( const Rc& shader); - + + /** + * \brief Prioritizes compilation of a given shader + * + * Adds the pipeline library for the given shader + * to the high-priority queue of the background + * workers to make sure it gets compiled quickly. + * \param [in] shader Newly compiled shader + */ + void requestCompileShader( + const Rc& shader); + /** * \brief Retrieves total pipeline count * \returns Number of compute/graphics pipelines