mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-27 04:54:15 +01:00
[util] Add helper class for frame latency control
This commit is contained in:
parent
763780fb4c
commit
f57306a1a0
@ -4,6 +4,7 @@ util_src = files([
|
||||
'util_fps_limiter.cpp',
|
||||
'util_flush.cpp',
|
||||
'util_gdi.cpp',
|
||||
'util_latency.cpp',
|
||||
'util_luid.cpp',
|
||||
'util_matrix.cpp',
|
||||
'util_shared_res.cpp',
|
||||
|
94
src/util/util_latency.cpp
Normal file
94
src/util/util_latency.cpp
Normal file
@ -0,0 +1,94 @@
|
||||
#include "util_env.h"
|
||||
#include "util_latency.h"
|
||||
#include "util_string.h"
|
||||
|
||||
#include "./log/log.h"
|
||||
|
||||
#include "./sync/sync_spinlock.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
DxvkLatencyControl::DxvkLatencyControl() {
|
||||
std::string env = env::getEnvVar("DXVK_FRAME_RATE");
|
||||
|
||||
if (!env.empty())
|
||||
m_frameRateLimit = std::stod(env);
|
||||
}
|
||||
|
||||
|
||||
DxvkLatencyControl::~DxvkLatencyControl() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
void DxvkLatencyControl::sleep(
|
||||
uint64_t frameId,
|
||||
double frameRate) {
|
||||
// Apply environment override as necessary
|
||||
if (m_frameRateLimit != 0.0) {
|
||||
frameRate = frameRate == 0.0f ? std::abs(m_frameRateLimit)
|
||||
: std::min(std::abs(frameRate), std::abs(m_frameRateLimit));
|
||||
}
|
||||
|
||||
// Wait for the current frame's first submission to become available
|
||||
auto& currFrame = m_frames[(frameId - 0u) % m_frames.size()];
|
||||
auto& prevFrame = m_frames[(frameId - 1u) % m_frames.size()];
|
||||
|
||||
sync::spin(-1u, [&currFrame, &prevFrame] {
|
||||
return bool(currFrame.markerMask.load(std::memory_order::memory_order_acquire) & (1u << uint32_t(DxvkLatencyMarker::GpuFrameStart)))
|
||||
&& bool(prevFrame.markerMask.load(std::memory_order::memory_order_acquire) & (1u << uint32_t(DxvkLatencyMarker::GpuPresentEnd)));
|
||||
});
|
||||
|
||||
// Estimate GPU execution time. Use the minimum from the past frames
|
||||
// to avoid creating a feedback loop with oscillating frame times.
|
||||
auto gpuFrameInterval = std::chrono::nanoseconds(~0u);
|
||||
|
||||
for (uint32_t i = 1; i < m_frames.size(); i++) {
|
||||
auto& frame = m_frames[(frameId - i) % m_frames.size()];
|
||||
|
||||
gpuFrameInterval = std::min(gpuFrameInterval,
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
frame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameEnd)] -
|
||||
frame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameStart)]));
|
||||
}
|
||||
|
||||
// If the minimum present interval is higher than the GPU execution time,
|
||||
// we need to delay the next frame even further to reduce frame latency
|
||||
auto presentInterval = std::chrono::nanoseconds(0);
|
||||
|
||||
if (frameRate != 0.0)
|
||||
presentInterval = std::chrono::nanoseconds(int64_t(1000000000.0 / std::abs(frameRate)));
|
||||
|
||||
// Estimate simulation time from end of present to first submission. Use
|
||||
// the maximum of the past few frames here to account for fluctuations.
|
||||
auto cpuSubmitDelay = std::chrono::nanoseconds(0);
|
||||
|
||||
for (uint32_t i = 0; i < m_frames.size(); i++) {
|
||||
auto& frame = m_frames[(frameId - i) % m_frames.size()];
|
||||
cpuSubmitDelay = std::max(cpuSubmitDelay, std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
frame.timestamps[uint32_t(DxvkLatencyMarker::CpuFirstSubmit)] -
|
||||
frame.timestamps[uint32_t(DxvkLatencyMarker::CpuFrameStart)]));
|
||||
}
|
||||
|
||||
// Aim for roughly 2ms of delay between the first CPU submit within a
|
||||
// frame and the GPU starting to process that submission. This gives
|
||||
// us some headroom to not starve the GPU.
|
||||
auto tolerance = std::chrono::nanoseconds(1000000) + gpuFrameInterval / 16u;
|
||||
|
||||
// Compute time when to start the next frame
|
||||
auto nextGpuStartDeadline = std::max(
|
||||
currFrame.timestamps[uint32_t(DxvkLatencyMarker::GpuFrameStart)] + gpuFrameInterval,
|
||||
prevFrame.timestamps[uint32_t(DxvkLatencyMarker::GpuPresentEnd)] + (presentInterval + presentInterval - gpuFrameInterval));
|
||||
auto nextCpuStartDeadline = nextGpuStartDeadline - (cpuSubmitDelay + tolerance);
|
||||
|
||||
// Sleep if necessary, and return the amount of time spent sleeping
|
||||
auto now = high_resolution_clock::now();
|
||||
Sleep::sleepUntil(now, nextCpuStartDeadline);
|
||||
|
||||
// Store sleep duration for HUD statistics
|
||||
std::lock_guard lock(m_statLock);
|
||||
m_statSleepDuration = std::max(std::chrono::nanoseconds(0),
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(nextCpuStartDeadline - now));
|
||||
}
|
||||
|
||||
}
|
124
src/util/util_latency.h
Normal file
124
src/util/util_latency.h
Normal file
@ -0,0 +1,124 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
|
||||
#include "./sync/sync_spinlock.h"
|
||||
|
||||
#include "util_flags.h"
|
||||
#include "util_sleep.h"
|
||||
#include "util_time.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/**
|
||||
* \brief Internal latency marker
|
||||
*/
|
||||
enum class DxvkLatencyMarker : uint32_t {
|
||||
CpuFrameStart = 0u,
|
||||
CpuFirstSubmit = 1u,
|
||||
CpuPresent = 2u,
|
||||
GpuFrameStart = 3u,
|
||||
GpuFrameEnd = 4u,
|
||||
GpuPresentEnd = 5u,
|
||||
|
||||
Count
|
||||
};
|
||||
|
||||
using DxvkLatencyMarkerFlags = Flags<DxvkLatencyMarker>;
|
||||
|
||||
|
||||
/**
|
||||
* \brief Latency control helper
|
||||
*
|
||||
*
|
||||
*/
|
||||
class DxvkLatencyControl {
|
||||
|
||||
public:
|
||||
|
||||
DxvkLatencyControl();
|
||||
|
||||
~DxvkLatencyControl();
|
||||
|
||||
/**
|
||||
* \brief Increments reference count
|
||||
*/
|
||||
void incRef() {
|
||||
m_refCount.fetch_add(1u, std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Decrements reference count
|
||||
*
|
||||
* Frees the object as necessary.
|
||||
*/
|
||||
void decRef() {
|
||||
if (m_refCount.fetch_sub(1u, std::memory_order_release) == 1u)
|
||||
delete this;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Sets latency marker
|
||||
*
|
||||
* Sets the time stamp for the given marker to the current time.
|
||||
* \param [in] frameId Current frame ID
|
||||
* \param [in] marker Marker to set
|
||||
*/
|
||||
void setMarker(
|
||||
uint64_t frameId,
|
||||
DxvkLatencyMarker marker) {
|
||||
auto& frame = m_frames[frameId % m_frames.size()];
|
||||
frame.timestamps[uint32_t(marker)] = high_resolution_clock::now();
|
||||
|
||||
if (marker == DxvkLatencyMarker::CpuFrameStart)
|
||||
frame.markerMask.store(1u << uint32_t(marker), std::memory_order_release);
|
||||
else
|
||||
frame.markerMask.fetch_or(1u << uint32_t(marker), std::memory_order_release);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Stalls the calling thread to reduce latency
|
||||
*
|
||||
* Uses markers from the current and previous frames to determine
|
||||
* when to give control back to the application in order to reduce
|
||||
* overall frame latency without starving the GPU.
|
||||
* \param [in] frameId Current frame ID. All CPU timeline
|
||||
* markers for this frame must be up to date.
|
||||
* \param [in] frameRate Target frame rate
|
||||
*/
|
||||
void sleep(
|
||||
uint64_t frameId,
|
||||
double frameRate);
|
||||
|
||||
/**
|
||||
* \brief Queries last sleep duration
|
||||
* \returns Last sleep duration
|
||||
*/
|
||||
auto getLastSleepDuration() {
|
||||
std::lock_guard lock(m_statLock);
|
||||
return m_statSleepDuration;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
struct FrameEntry {
|
||||
std::atomic<uint32_t> markerMask = { ~0u };
|
||||
std::array<high_resolution_clock::time_point,
|
||||
uint32_t(DxvkLatencyMarker::Count)> timestamps = { };
|
||||
};
|
||||
|
||||
std::atomic<uint32_t> m_refCount = { 0u };
|
||||
std::array<FrameEntry, 8u> m_frames = { };
|
||||
|
||||
double m_frameRateLimit = 0.0;
|
||||
|
||||
sync::Spinlock m_statLock;
|
||||
std::chrono::nanoseconds m_statSleepDuration = { };
|
||||
|
||||
};
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user