[util] Add helper class for frame latency control

2025-04-04 01:25:27 +02:00 · 2024-10-25 18:12:28 +02:00 · 2024-10-25 18:12:28 +02:00 · f57306a1a0
commit f57306a1a0
parent 763780fb4c
3 changed files with 219 additions and 0 deletions
--- a/src/util/meson.build
+++ b/src/util/meson.build
@ -4,6 +4,7 @@ util_src = files([
  'util_fps_limiter.cpp',
  'util_flush.cpp',
  'util_gdi.cpp',
+  'util_latency.cpp',
  'util_luid.cpp',
  'util_matrix.cpp',
  'util_shared_res.cpp',
--- a/src/util/util_latency.cpp
+++ b/src/util/util_latency.cpp
@ -0,0 +1,94 @@
+#include "util_env.h"
+#include "util_latency.h"
+#include "util_string.h"
+
+#include "./log/log.h"
+
+#include "./sync/sync_spinlock.h"
+
+namespace dxvk {
+
+  DxvkLatencyControl::DxvkLatencyControl() {
+    std::string env = env::getEnvVar("DXVK_FRAME_RATE");
+
+    if (!env.empty())
+      m_frameRateLimit = std::stod(env);
+  }
+
+
+  DxvkLatencyControl::~DxvkLatencyControl() {
+
+  }
+
+
+  void DxvkLatencyControl::sleep(
+        uint64_t                      frameId,
+        double                        frameRate) {
+    // Apply environment override as necessary
+    if (m_frameRateLimit != 0.0) {
+      frameRate = frameRate == 0.0f ? std::abs(m_frameRateLimit)
+        : std::min(std::abs(frameRate), std::abs(m_frameRateLimit));
+    }
+
+    // Wait for the current frame's first submission to become available
+    auto& currFrame = m_frames[(frameId - 0u) % m_frames.size()];
+    auto& prevFrame = m_frames[(frameId - 1u) % m_frames.size()];
+
+    sync::spin(-1u, [&currFrame, &prevFrame] {
+      return bool(currFrame.markerMask.load(std::memory_order::memory_order_acquire) & (1u << uint32_t(DxvkLatencyMarker::GpuFrameStart)))
+          && bool(prevFrame.markerMask.load(std::memory_order::memory_order_acquire) & (1u << uint32_t(DxvkLatencyMarker::GpuPresentEnd)));
+    });
+
+    // Estimate GPU execution time. Use the minimum from the past frames
+    // to avoid creating a feedback loop with oscillating frame times.
+    auto gpuFrameInterval = std::chrono::nanoseconds(~0u);
+
+    for (uint32_t i = 1; i < m_frames.size(); i++) {
+      auto& frame = m_frames[(frameId - i) % m_frames.size()];
+
+      gpuFrameInterval = std::min(gpuFrameInterval,
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+          frame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameEnd)] -
+          frame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameStart)]));
+    }
+
+    // If the minimum present interval is higher than the GPU execution time,
+    // we need to delay the next frame even further to reduce frame latency
+    auto presentInterval = std::chrono::nanoseconds(0);
+
+    if (frameRate != 0.0)
+      presentInterval = std::chrono::nanoseconds(int64_t(1000000000.0 / std::abs(frameRate)));
+
+    // Estimate simulation time from end of present to first submission. Use
+    // the maximum of the past few frames here to account for fluctuations.
+    auto cpuSubmitDelay = std::chrono::nanoseconds(0);
+
+    for (uint32_t i = 0; i < m_frames.size(); i++) {
+      auto& frame = m_frames[(frameId - i) % m_frames.size()];
+      cpuSubmitDelay = std::max(cpuSubmitDelay, std::chrono::duration_cast<std::chrono::nanoseconds>(
+        frame.timestamps[uint32_t(DxvkLatencyMarker::CpuFirstSubmit)] -
+        frame.timestamps[uint32_t(DxvkLatencyMarker::CpuFrameStart)]));
+    }
+
+    // Aim for roughly 2ms of delay between the first CPU submit within a
+    // frame and the GPU starting to process that submission. This gives
+    // us some headroom to not starve the GPU.
+    auto tolerance = std::chrono::nanoseconds(1000000) + gpuFrameInterval / 16u;
+
+    // Compute time when to start the next frame
+    auto nextGpuStartDeadline = std::max(
+      currFrame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameStart)] + gpuFrameInterval,
+      prevFrame.timestamps[uint32_t(DxvkLatencyMarker::GpuPresentEnd)] + (presentInterval + presentInterval - gpuFrameInterval));
+    auto nextCpuStartDeadline = nextGpuStartDeadline - (cpuSubmitDelay + tolerance);
+
+    // Sleep if necessary, and return the amount of time spent sleeping
+    auto now = high_resolution_clock::now();
+    Sleep::sleepUntil(now, nextCpuStartDeadline);
+
+    // Store sleep duration for HUD statistics
+    std::lock_guard lock(m_statLock);
+    m_statSleepDuration = std::max(std::chrono::nanoseconds(0),
+      std::chrono::duration_cast<std::chrono::nanoseconds>(nextCpuStartDeadline - now));
+  }
+
+}
--- a/src/util/util_latency.h
+++ b/src/util/util_latency.h
@ -0,0 +1,124 @@
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <cstdint>
+
+#include "./sync/sync_spinlock.h"
+
+#include "util_flags.h"
+#include "util_sleep.h"
+#include "util_time.h"
+
+namespace dxvk {
+
+  /**
+   * \brief Internal latency marker
+   */
+  enum class DxvkLatencyMarker : uint32_t {
+    CpuFrameStart   = 0u,
+    CpuFirstSubmit  = 1u,
+    CpuPresent      = 2u,
+    GpuFrameStart   = 3u,
+    GpuFrameEnd     = 4u,
+    GpuPresentEnd   = 5u,
+
+    Count
+  };
+
+  using DxvkLatencyMarkerFlags = Flags<DxvkLatencyMarker>;
+
+
+  /**
+   * \brief Latency control helper
+   *
+   * 
+   */
+  class DxvkLatencyControl {
+
+  public:
+
+    DxvkLatencyControl();
+
+    ~DxvkLatencyControl();
+
+    /**
+     * \brief Increments reference count
+     */
+    void incRef() {
+      m_refCount.fetch_add(1u, std::memory_order_acquire);
+    }
+
+    /**
+     * \brief Decrements reference count
+     *
+     * Frees the object as necessary.
+     */
+    void decRef() {
+      if (m_refCount.fetch_sub(1u, std::memory_order_release) == 1u)
+        delete this;
+    }
+
+    /**
+     * \brief Sets latency marker
+     *
+     * Sets the time stamp for the given marker to the current time.
+     * \param [in] frameId Current frame ID
+     * \param [in] marker Marker to set
+     */
+    void setMarker(
+            uint64_t                    frameId,
+            DxvkLatencyMarker           marker) {
+      auto& frame = m_frames[frameId % m_frames.size()];
+      frame.timestamps[uint32_t(marker)] = high_resolution_clock::now();
+
+      if (marker == DxvkLatencyMarker::CpuFrameStart)
+        frame.markerMask.store(1u << uint32_t(marker), std::memory_order_release);
+      else
+        frame.markerMask.fetch_or(1u << uint32_t(marker), std::memory_order_release);
+    }
+
+    /**
+     * \brief Stalls the calling thread to reduce latency
+     *
+     * Uses markers from the current and previous frames to determine
+     * when to give control back to the application in order to reduce
+     * overall frame latency without starving the GPU.
+     * \param [in] frameId Current frame ID. All CPU timeline
+     *    markers for this frame must be up to date.
+     * \param [in] frameRate Target frame rate
+     */
+    void sleep(
+          uint64_t                      frameId,
+          double                        frameRate);
+
+    /**
+     * \brief Queries last sleep duration
+     * \returns Last sleep duration
+     */
+    auto getLastSleepDuration() {
+      std::lock_guard lock(m_statLock);
+      return m_statSleepDuration;
+    }
+
+  private:
+
+    struct FrameEntry {
+      std::atomic<uint32_t> markerMask = { ~0u };
+      std::array<high_resolution_clock::time_point,
+        uint32_t(DxvkLatencyMarker::Count)> timestamps = { };
+    };
+
+    std::atomic<uint32_t>       m_refCount = { 0u };
+    std::array<FrameEntry, 8u>  m_frames   = { };
+
+    double m_frameRateLimit = 0.0;
+
+    sync::Spinlock            m_statLock;
+    std::chrono::nanoseconds  m_statSleepDuration = { };
+
+  };
+
+}