[dxvk] Add high-priority queue for shader compiles

As well as an API to queue shaders as high priority.
2025-01-07 07:46:19 +01:00 · 2022-08-11 02:37:36 +02:00 · 2022-08-11 02:37:36 +02:00 · c3a53127d7
commit c3a53127d7
parent f09f11aad0
4 changed files with 133 additions and 23 deletions
--- a/src/dxvk/dxvk_device.cpp
+++ b/src/dxvk/dxvk_device.cpp
@ -213,6 +213,12 @@ namespace dxvk {
  }
  void DxvkDevice::requestCompileShader(
    const Rc<DxvkShader>&           shader) {
    m_objects.pipelineManager().requestCompileShader(shader);
  }
  void DxvkDevice::presentImage(
    const Rc<vk::Presenter>&        presenter,
          DxvkSubmitStatus*         status) {
--- a/src/dxvk/dxvk_device.h
+++ b/src/dxvk/dxvk_device.h
@ -380,6 +380,13 @@ namespace dxvk {
    void registerShader(
      const Rc<DxvkShader>&         shader);
    /**
     * \brief Prioritizes compilation of a given shader
     * \param [in] shader Shader to start compiling
     */
    void requestCompileShader(
      const Rc<DxvkShader>&         shader);
    /**
     * \brief Presents a swap chain image
     * 
--- a/src/dxvk/dxvk_pipemanager.cpp
+++ b/src/dxvk/dxvk_pipemanager.cpp
@ -7,17 +7,9 @@
 namespace dxvk {
  DxvkPipelineWorkers::DxvkPipelineWorkers(
-          DxvkDevice*                     device) {
+          DxvkDevice*                     device)
-    // Use a reasonably large number of threads for compiling, but
+  : m_device(device) {
    // leave some cores to the application to avoid excessive stutter
    uint32_t numCpuCores = dxvk::thread::hardware_concurrency();
    m_workerCount = ((std::max(1u, numCpuCores) - 1) * 5) / 7;
    if (m_workerCount <  1) m_workerCount =  1;
    if (m_workerCount > 32) m_workerCount = 32;
    if (device->config().numCompilerThreads > 0)
      m_workerCount = device->config().numCompilerThreads;
  }
@ -27,7 +19,8 @@ namespace dxvk {
  void DxvkPipelineWorkers::compilePipelineLibrary(
-          DxvkShaderPipelineLibrary*      library) {
+          DxvkShaderPipelineLibrary*      library,
          DxvkPipelinePriority            priority) {
    std::unique_lock lock(m_queueLock);
    this->startWorkers();
@ -36,7 +29,13 @@ namespace dxvk {
    PipelineLibraryEntry e = { };
    e.pipelineLibrary = library;
    if (priority == DxvkPipelinePriority::High) {
      m_queuedLibrariesPrioritized.push(e);
      m_queueCondPrioritized.notify_one();
    } else {
      m_queuedLibraries.push(e);
    }
    m_queueCond.notify_one();
  }
@ -100,14 +99,37 @@ namespace dxvk {
  void DxvkPipelineWorkers::startWorkers() {
    if (!m_workersRunning) {
      // Use all available cores by default
      uint32_t workerCount = dxvk::thread::hardware_concurrency();
      if (workerCount <  1) workerCount =  1;
      if (workerCount > 64) workerCount = 64;
      // Reduce worker count on 32-bit to save adderss space
      if (env::is32BitHostPlatform())
        workerCount = std::min(workerCount, 16u);
      if (m_device->config().numCompilerThreads > 0)
        workerCount = m_device->config().numCompilerThreads;
      // Number of workers that can process pipeline pipelines with normal
      // priority. Any other workers can only build high-priority pipelines.
      uint32_t npWorkerCount = m_device->canUseGraphicsPipelineLibrary()
        ? std::max(((workerCount - 1) * 5) / 7, 1u)
        : workerCount;
      uint32_t hpWorkerCount = workerCount - npWorkerCount;
      Logger::info(str::format("DXVK: Using ", npWorkerCount, " + ", hpWorkerCount, " compiler threads"));
      m_workers.resize(npWorkerCount + hpWorkerCount);
      // Set worker flag so that they don't exit immediately
      m_workersRunning = true;
-      Logger::info(str::format("DXVK: Using ", m_workerCount, " compiler threads"));
+      for (size_t i = 0; i < m_workers.size(); i++) {
-      m_workers.resize(m_workerCount);
+        m_workers[i] = i >= npWorkerCount
-
+          ? dxvk::thread([this] { runWorkerPrioritized(); })
-      for (auto& worker : m_workers) {
+          : dxvk::thread([this] { runWorker(); });
-        worker = dxvk::thread([this] { runWorker(); });
+        m_workers[i].set_priority(ThreadPriority::Lowest);
        worker.set_priority(ThreadPriority::Lowest);
      }
    }
  }
@ -124,6 +146,7 @@ namespace dxvk {
        m_queueCond.wait(lock, [this] {
          return !m_workersRunning
              || !m_queuedLibrariesPrioritized.empty()
              || !m_queuedLibraries.empty()
              || !m_queuedPipelines.empty();
        });
@ -132,6 +155,9 @@ namespace dxvk {
          // Skip pending work, exiting early is
          // more important in this case.
          break;
        } else if (!m_queuedLibrariesPrioritized.empty()) {
          l = m_queuedLibrariesPrioritized.front();
          m_queuedLibrariesPrioritized.pop();
        } else if (!m_queuedLibraries.empty()) {
          l = m_queuedLibraries.front();
          m_queuedLibraries.pop();
@ -162,6 +188,34 @@ namespace dxvk {
  }
  void DxvkPipelineWorkers::runWorkerPrioritized() {
    env::setThreadName("dxvk-shader-p");
    while (true) {
      PipelineLibraryEntry l = { };
      { std::unique_lock lock(m_queueLock);
        m_queueCondPrioritized.wait(lock, [this] {
          return !m_workersRunning
              || !m_queuedLibrariesPrioritized.empty();
        });
        if (!m_workersRunning)
          break;
        l = m_queuedLibrariesPrioritized.front();
        m_queuedLibrariesPrioritized.pop();
      }
      if (l.pipelineLibrary)
        l.pipelineLibrary->compilePipeline();
      m_pendingTasks -= 1;
    }
  }
  DxvkPipelineManager::DxvkPipelineManager(
          DxvkDevice*         device)
  : m_device    (device),
@ -285,13 +339,30 @@ namespace dxvk {
    const Rc<DxvkShader>&         shader) {
    if (canPrecompileShader(shader)) {
      auto library = createPipelineLibrary(shader);
-      m_workers.compilePipelineLibrary(library);
+      m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::Normal);
    }
    m_stateCache.registerShader(shader);
  }
  void DxvkPipelineManager::requestCompileShader(
    const Rc<DxvkShader>&         shader) {
    if (!shader->needsLibraryCompile())
      return;
    // Dispatch high-priority compile job
    auto library = findPipelineLibrary(shader);
    if (library)
      m_workers.compilePipelineLibrary(library, DxvkPipelinePriority::High);
    // Notify immediately so that this only gets called
    // once, even if compilation does ot start immediately
    shader->notifyLibraryCompile();
  }
  DxvkPipelineCount DxvkPipelineManager::getPipelineCount() const {
    DxvkPipelineCount result;
    result.numGraphicsPipelines = m_stats.numGraphicsPipelines.load();
--- a/src/dxvk/dxvk_pipemanager.h
+++ b/src/dxvk/dxvk_pipemanager.h
@ -34,6 +34,14 @@ namespace dxvk {
    std::atomic<uint32_t> numComputePipelines   = { 0u };
  };
  /**
   * \brief Pipeline priority
   */
  enum class DxvkPipelinePriority : uint32_t {
    Normal  = 0,
    High    = 1,
  };
  /**
   * \brief Pipeline manager worker threads
   *
@ -56,9 +64,11 @@ namespace dxvk {
     * the pipeline with default compile arguments.
     * Note that pipeline libraries are high priority.
     * \param [in] library The pipeline library
     * \param [in] priority Pipeline priority
     */
    void compilePipelineLibrary(
-            DxvkShaderPipelineLibrary*      library);
+            DxvkShaderPipelineLibrary*      library,
            DxvkPipelinePriority            priority);
    /**
     * \brief Compiles an optimized compute pipeline
@ -107,15 +117,18 @@ namespace dxvk {
      DxvkShaderPipelineLibrary*    pipelineLibrary;
    };
    DxvkDevice*                       m_device;
    std::atomic<uint64_t>             m_pendingTasks = { 0ull };
    dxvk::mutex                       m_queueLock;
    dxvk::condition_variable          m_queueCond;
    dxvk::condition_variable          m_queueCondPrioritized;
    std::queue<PipelineLibraryEntry>  m_queuedLibrariesPrioritized;
    std::queue<PipelineLibraryEntry>  m_queuedLibraries;
    std::queue<PipelineEntry>         m_queuedPipelines;
    uint32_t                          m_workerCount = 0;
    bool                              m_workersRunning = false;
    std::vector<dxvk::thread>         m_workers;
@ -123,6 +136,8 @@ namespace dxvk {
    void runWorker();
    void runWorkerPrioritized();
  };
@ -188,7 +203,7 @@ namespace dxvk {
    DxvkGraphicsPipelineFragmentOutputLibrary* createFragmentOutputLibrary(
      const DxvkGraphicsPipelineFragmentOutputState& state);
-    /*
+    /**
     * \brief Registers a shader
     * 
     * Starts compiling pipelines asynchronously
@ -199,6 +214,17 @@ namespace dxvk {
    void registerShader(
      const Rc<DxvkShader>&         shader);
    /**
     * \brief Prioritizes compilation of a given shader
     *
     * Adds the pipeline library for the given shader
     * to the high-priority queue of the background
     * workers to make sure it gets compiled quickly.
     * \param [in] shader Newly compiled shader
     */
    void requestCompileShader(
      const Rc<DxvkShader>&         shader);
    /**
     * \brief Retrieves total pipeline count
     * \returns Number of compute/graphics pipelines