1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-27 13:54:16 +01:00

[util] Add helper class for frame latency control

This commit is contained in:
Philip Rebohle 2024-10-25 18:12:28 +02:00
parent 763780fb4c
commit f57306a1a0
3 changed files with 219 additions and 0 deletions

View File

@ -4,6 +4,7 @@ util_src = files([
'util_fps_limiter.cpp',
'util_flush.cpp',
'util_gdi.cpp',
'util_latency.cpp',
'util_luid.cpp',
'util_matrix.cpp',
'util_shared_res.cpp',

94
src/util/util_latency.cpp Normal file
View File

@ -0,0 +1,94 @@
#include "util_env.h"
#include "util_latency.h"
#include "util_string.h"
#include "./log/log.h"
#include "./sync/sync_spinlock.h"
namespace dxvk {
DxvkLatencyControl::DxvkLatencyControl() {
std::string env = env::getEnvVar("DXVK_FRAME_RATE");
if (!env.empty())
m_frameRateLimit = std::stod(env);
}
DxvkLatencyControl::~DxvkLatencyControl() {
}
void DxvkLatencyControl::sleep(
uint64_t frameId,
double frameRate) {
// Apply environment override as necessary
if (m_frameRateLimit != 0.0) {
frameRate = frameRate == 0.0f ? std::abs(m_frameRateLimit)
: std::min(std::abs(frameRate), std::abs(m_frameRateLimit));
}
// Wait for the current frame's first submission to become available
auto& currFrame = m_frames[(frameId - 0u) % m_frames.size()];
auto& prevFrame = m_frames[(frameId - 1u) % m_frames.size()];
sync::spin(-1u, [&currFrame, &prevFrame] {
return bool(currFrame.markerMask.load(std::memory_order::memory_order_acquire) & (1u << uint32_t(DxvkLatencyMarker::GpuFrameStart)))
&& bool(prevFrame.markerMask.load(std::memory_order::memory_order_acquire) & (1u << uint32_t(DxvkLatencyMarker::GpuPresentEnd)));
});
// Estimate GPU execution time. Use the minimum from the past frames
// to avoid creating a feedback loop with oscillating frame times.
auto gpuFrameInterval = std::chrono::nanoseconds(~0u);
for (uint32_t i = 1; i < m_frames.size(); i++) {
auto& frame = m_frames[(frameId - i) % m_frames.size()];
gpuFrameInterval = std::min(gpuFrameInterval,
std::chrono::duration_cast<std::chrono::nanoseconds>(
frame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameEnd)] -
frame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameStart)]));
}
// If the minimum present interval is higher than the GPU execution time,
// we need to delay the next frame even further to reduce frame latency
auto presentInterval = std::chrono::nanoseconds(0);
if (frameRate != 0.0)
presentInterval = std::chrono::nanoseconds(int64_t(1000000000.0 / std::abs(frameRate)));
// Estimate simulation time from end of present to first submission. Use
// the maximum of the past few frames here to account for fluctuations.
auto cpuSubmitDelay = std::chrono::nanoseconds(0);
for (uint32_t i = 0; i < m_frames.size(); i++) {
auto& frame = m_frames[(frameId - i) % m_frames.size()];
cpuSubmitDelay = std::max(cpuSubmitDelay, std::chrono::duration_cast<std::chrono::nanoseconds>(
frame.timestamps[uint32_t(DxvkLatencyMarker::CpuFirstSubmit)] -
frame.timestamps[uint32_t(DxvkLatencyMarker::CpuFrameStart)]));
}
// Aim for roughly 2ms of delay between the first CPU submit within a
// frame and the GPU starting to process that submission. This gives
// us some headroom to not starve the GPU.
auto tolerance = std::chrono::nanoseconds(1000000) + gpuFrameInterval / 16u;
// Compute time when to start the next frame
auto nextGpuStartDeadline = std::max(
currFrame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameStart)] + gpuFrameInterval,
prevFrame.timestamps[uint32_t(DxvkLatencyMarker::GpuPresentEnd)] + (presentInterval + presentInterval - gpuFrameInterval));
auto nextCpuStartDeadline = nextGpuStartDeadline - (cpuSubmitDelay + tolerance);
// Sleep if necessary, and return the amount of time spent sleeping
auto now = high_resolution_clock::now();
Sleep::sleepUntil(now, nextCpuStartDeadline);
// Store sleep duration for HUD statistics
std::lock_guard lock(m_statLock);
m_statSleepDuration = std::max(std::chrono::nanoseconds(0),
std::chrono::duration_cast<std::chrono::nanoseconds>(nextCpuStartDeadline - now));
}
}

124
src/util/util_latency.h Normal file
View File

@ -0,0 +1,124 @@
#pragma once
#include <array>
#include <atomic>
#include <chrono>
#include <cmath>
#include <cstdint>
#include "./sync/sync_spinlock.h"
#include "util_flags.h"
#include "util_sleep.h"
#include "util_time.h"
namespace dxvk {
/**
* \brief Internal latency marker
*/
enum class DxvkLatencyMarker : uint32_t {
CpuFrameStart = 0u,
CpuFirstSubmit = 1u,
CpuPresent = 2u,
GpuFrameStart = 3u,
GpuFrameEnd = 4u,
GpuPresentEnd = 5u,
Count
};
using DxvkLatencyMarkerFlags = Flags<DxvkLatencyMarker>;
/**
* \brief Latency control helper
*
*
*/
class DxvkLatencyControl {
public:
DxvkLatencyControl();
~DxvkLatencyControl();
/**
* \brief Increments reference count
*/
void incRef() {
m_refCount.fetch_add(1u, std::memory_order_acquire);
}
/**
* \brief Decrements reference count
*
* Frees the object as necessary.
*/
void decRef() {
if (m_refCount.fetch_sub(1u, std::memory_order_release) == 1u)
delete this;
}
/**
* \brief Sets latency marker
*
* Sets the time stamp for the given marker to the current time.
* \param [in] frameId Current frame ID
* \param [in] marker Marker to set
*/
void setMarker(
uint64_t frameId,
DxvkLatencyMarker marker) {
auto& frame = m_frames[frameId % m_frames.size()];
frame.timestamps[uint32_t(marker)] = high_resolution_clock::now();
if (marker == DxvkLatencyMarker::CpuFrameStart)
frame.markerMask.store(1u << uint32_t(marker), std::memory_order_release);
else
frame.markerMask.fetch_or(1u << uint32_t(marker), std::memory_order_release);
}
/**
* \brief Stalls the calling thread to reduce latency
*
* Uses markers from the current and previous frames to determine
* when to give control back to the application in order to reduce
* overall frame latency without starving the GPU.
* \param [in] frameId Current frame ID. All CPU timeline
* markers for this frame must be up to date.
* \param [in] frameRate Target frame rate
*/
void sleep(
uint64_t frameId,
double frameRate);
/**
* \brief Queries last sleep duration
* \returns Last sleep duration
*/
auto getLastSleepDuration() {
std::lock_guard lock(m_statLock);
return m_statSleepDuration;
}
private:
struct FrameEntry {
std::atomic<uint32_t> markerMask = { ~0u };
std::array<high_resolution_clock::time_point,
uint32_t(DxvkLatencyMarker::Count)> timestamps = { };
};
std::atomic<uint32_t> m_refCount = { 0u };
std::array<FrameEntry, 8u> m_frames = { };
double m_frameRateLimit = 0.0;
sync::Spinlock m_statLock;
std::chrono::nanoseconds m_statSleepDuration = { };
};
}