mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-24 04:54:14 +01:00
[dxvk] Add latency tracker
Implements a basic latency sleep solution that is intended to work without requiring games to support any related vendor features. This alone is not enough to expose the Reflex API to applications via dxvk-nvapi, but since that relies on NV_low_latency2 specifics anyway, we are going to add an implementation based on that extension later with an extended interface.
This commit is contained in:
parent
bf722ccc57
commit
33febe9859
30
dxvk.conf
30
dxvk.conf
@ -76,6 +76,36 @@
|
||||
# d3d9.maxFrameRate = 0
|
||||
|
||||
|
||||
# Controls latency sleep and Nvidia Reflex support.
|
||||
#
|
||||
# Supported values:
|
||||
# - Auto: By default, DXVK only supports latency sleep in D3D11 games that
|
||||
# use Reflex if the graphics driver supports VK_NV_low_latency2,
|
||||
# and if dxvk-nvapi is enabled in Proton.
|
||||
# - True: Enables built-in latency reduction based on internal timings.
|
||||
# This assumes that input sampling for any given frame happens after
|
||||
# the D3D9 or DXGI Present call returns; games that render and present
|
||||
# asynchronously will not behave as intended.
|
||||
# Similarly, this will not have any effect in games with built-in frame
|
||||
# rate limiters, or if an external limiter (such as MangoHud) is used.
|
||||
# In some games, enabling this may reduce performance or lead to less
|
||||
# consistent frame pacing.
|
||||
# The implementation will either use VK_NV_low_latency2 if supported
|
||||
# by the driver, or a custom algorithm.
|
||||
# - False: Disable Reflex support as well as built-in latency reduction.
|
||||
|
||||
# dxvk.latencySleep = Auto
|
||||
|
||||
|
||||
# Tolerance for the latency sleep heuristic, in microseconds. Higher values
|
||||
# increase latency, but may lead to better frame pacing in some cases. Does
|
||||
# not have any effect if NV_low_latency2 is used.
|
||||
#
|
||||
# Supported values: Any non-negative number
|
||||
|
||||
# dxvk.latencyTolerance = 1000
|
||||
|
||||
|
||||
# Override PCI vendor and device IDs reported to the application. Can
|
||||
# cause the app to adjust behaviour depending on the selected values.
|
||||
#
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "dxvk_device.h"
|
||||
#include "dxvk_instance.h"
|
||||
#include "dxvk_latency_builtin.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
@ -305,6 +306,16 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
|
||||
const Rc<Presenter>& presenter) {
|
||||
if (m_options.latencySleep != Tristate::True)
|
||||
return nullptr;
|
||||
|
||||
return new DxvkBuiltInLatencyTracker(
|
||||
m_options.latencyTolerance);
|
||||
}
|
||||
|
||||
|
||||
void DxvkDevice::presentImage(
|
||||
const Rc<Presenter>& presenter,
|
||||
uint64_t frameId,
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "dxvk_framebuffer.h"
|
||||
#include "dxvk_image.h"
|
||||
#include "dxvk_instance.h"
|
||||
#include "dxvk_latency.h"
|
||||
#include "dxvk_memory.h"
|
||||
#include "dxvk_meta_clear.h"
|
||||
#include "dxvk_objects.h"
|
||||
@ -478,6 +479,16 @@ namespace dxvk {
|
||||
void requestCompileShader(
|
||||
const Rc<DxvkShader>& shader);
|
||||
|
||||
/**
|
||||
* \brief Creates latency tracker for a presenter
|
||||
*
|
||||
* The specicfic implementation and parameters used
|
||||
* depend on user configuration.
|
||||
* \param [in] presenter Presenter instance
|
||||
*/
|
||||
Rc<DxvkLatencyTracker> createLatencyTracker(
|
||||
const Rc<Presenter>& presenter);
|
||||
|
||||
/**
|
||||
* \brief Presents a swap chain image
|
||||
*
|
||||
|
185
src/dxvk/dxvk_latency.h
Normal file
185
src/dxvk/dxvk_latency.h
Normal file
@ -0,0 +1,185 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../util/util_likely.h"
|
||||
#include "../util/util_time.h"
|
||||
|
||||
#include "../util/rc/util_rc_ptr.h"
|
||||
|
||||
#include "../vulkan/vulkan_loader.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/**
|
||||
* \brief Latency tracker statistics
|
||||
*/
|
||||
struct DxvkLatencyStats {
|
||||
std::chrono::microseconds frameLatency;
|
||||
std::chrono::microseconds sleepDuration;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Latency tracker
|
||||
*
|
||||
* Accumulates time stamps of certain parts of a frame.
|
||||
*/
|
||||
class DxvkLatencyTracker {
|
||||
|
||||
public:
|
||||
|
||||
virtual ~DxvkLatencyTracker() { }
|
||||
|
||||
/**
|
||||
* \brief Increments ref count
|
||||
*/
|
||||
void incRef() {
|
||||
m_refCount.fetch_add(1, std::memory_order_acquire);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Decrements ref count
|
||||
*
|
||||
* Destroys the object when there are no users left.
|
||||
*/
|
||||
void decRef() {
|
||||
if (m_refCount.fetch_sub(1, std::memory_order_release) == 1u)
|
||||
delete this;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Called when presentation begins on the CPU timeline
|
||||
*
|
||||
* Must happen before acquiring an image from the presenter.
|
||||
* \param [in] frameId Current frame ID
|
||||
*/
|
||||
virtual void notifyCpuPresentBegin(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when the CS thread reaches a given frame
|
||||
*
|
||||
* Should be recorded into the CS thread after completing
|
||||
* the previous frame on the application's CPU timeline.
|
||||
* \param [in] frameId Current frame ID
|
||||
*/
|
||||
virtual void notifyCsRenderBegin(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when the CS thread completes a frame
|
||||
*
|
||||
* Should be recorded into the CS thread after recording
|
||||
* presentation commands for that frame.
|
||||
* \param [in] frameId Current frame ID
|
||||
*/
|
||||
virtual void notifyCsRenderEnd(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when presentation ends on the CPU timeline
|
||||
*
|
||||
* Must happen after acquiring an image for presentation, but
|
||||
* before synchronizing with previous frames or performing
|
||||
* latency sleep. The intention is to measure acquire delays.
|
||||
* \param [in] frameId Current frame ID
|
||||
*/
|
||||
virtual void notifyCpuPresentEnd(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when a command list is submitted to the GPU
|
||||
*
|
||||
* \param [in] frameId Associated frame ID
|
||||
*/
|
||||
virtual void notifyQueueSubmit(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when a frame is queued for presentation
|
||||
*
|
||||
* \param [in] frameId Associated frame ID
|
||||
*/
|
||||
virtual void notifyQueuePresentBegin(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called after a frame has been queued for presentation
|
||||
*
|
||||
* \param [in] frameId Associated frame ID
|
||||
* \param [in] status Result of the present operation
|
||||
*/
|
||||
virtual void notifyQueuePresentEnd(
|
||||
uint64_t frameId,
|
||||
VkResult status) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when a submission begins execution on the GPU
|
||||
*
|
||||
* Any previous submissions will have completed by this time. This
|
||||
* can be used to measure GPU idle time throughout a frame.
|
||||
* \param [in] frameId Associated frame ID
|
||||
*/
|
||||
virtual void notifyGpuExecutionBegin(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when a submission completes execution on the GPU
|
||||
*
|
||||
* The previous submission will have completed by the time this
|
||||
* gets called. This may be used to measure GPU idle time.
|
||||
* \param [in] frameId Associated frame ID
|
||||
*/
|
||||
virtual void notifyGpuExecutionEnd(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Called when presentation of a given frame finishes on the GPU
|
||||
*
|
||||
* This is generally the last thing that happens within a frame.
|
||||
* \param [in] frameId Associated frame ID
|
||||
*/
|
||||
virtual void notifyGpuPresentEnd(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
/**
|
||||
* \brief Performs latency sleep and begins next frame
|
||||
*
|
||||
* Uses latency data from previous frames to estimate when to wake
|
||||
* up the application thread in order to minimize input latency.
|
||||
* \param [in] frameId Frame ID of the upcoming frame
|
||||
* \param [in] maxFrameRate Maximum frame rate or refresh rate
|
||||
*/
|
||||
virtual void sleepAndBeginFrame(
|
||||
uint64_t frameId,
|
||||
double maxFrameRate) = 0;
|
||||
|
||||
/**
|
||||
* \brief Discards all current timing data
|
||||
*
|
||||
* Should be called to reset latency tracking in case
|
||||
* presentation failed for any given frame.
|
||||
*/
|
||||
virtual void discardTimings() = 0;
|
||||
|
||||
/**
|
||||
* \brief Queries statistics for the given frame
|
||||
*
|
||||
* Returns statistics for the frame closest to \c frameId for
|
||||
* which data is available. If no such frame exists, the stat
|
||||
* counters will return 0.
|
||||
* \param [in] frameId Frame to query
|
||||
*/
|
||||
virtual DxvkLatencyStats getStatistics(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
private:
|
||||
|
||||
std::atomic<uint64_t> m_refCount = { 0u };
|
||||
|
||||
};
|
||||
|
||||
}
|
317
src/dxvk/dxvk_latency_builtin.cpp
Normal file
317
src/dxvk/dxvk_latency_builtin.cpp
Normal file
@ -0,0 +1,317 @@
|
||||
#include <cmath>
|
||||
|
||||
#include "dxvk_latency_builtin.h"
|
||||
|
||||
#include "../util/log/log.h"
|
||||
|
||||
#include "../util/util_fps_limiter.h"
|
||||
#include "../util/util_string.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
DxvkBuiltInLatencyTracker::DxvkBuiltInLatencyTracker(
|
||||
int32_t toleranceUs)
|
||||
: m_tolerance(std::chrono::duration_cast<duration>(
|
||||
std::chrono::microseconds(std::max(toleranceUs, 0)))) {
|
||||
Logger::info("Latency control enabled, using built-in algorithm");
|
||||
auto limit = FpsLimiter::getEnvironmentOverride();
|
||||
|
||||
if (limit)
|
||||
m_envFpsLimit = *limit;
|
||||
}
|
||||
|
||||
|
||||
DxvkBuiltInLatencyTracker::~DxvkBuiltInLatencyTracker() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyCpuPresentBegin(
|
||||
uint64_t frameId) {
|
||||
// Not interesting here
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyCpuPresentEnd(
|
||||
uint64_t frameId) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
auto frame = findFrame(frameId);
|
||||
|
||||
if (frame)
|
||||
frame->cpuPresentEnd = dxvk::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyCsRenderBegin(
|
||||
uint64_t frameId) {
|
||||
// Not interesting here
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyCsRenderEnd(
|
||||
uint64_t frameId) {
|
||||
// Not interesting here
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyQueueSubmit(
|
||||
uint64_t frameId) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
auto frame = findFrame(frameId);
|
||||
|
||||
if (frame && frame->queueSubmit == time_point())
|
||||
frame->queueSubmit = dxvk::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyQueuePresentBegin(
|
||||
uint64_t frameId) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
auto frame = findFrame(frameId);
|
||||
|
||||
if (frame)
|
||||
frame->queuePresent = dxvk::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyQueuePresentEnd(
|
||||
uint64_t frameId,
|
||||
VkResult status) {
|
||||
// Not interesting
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyGpuExecutionBegin(
|
||||
uint64_t frameId) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
auto frame = findFrame(frameId);
|
||||
|
||||
if (frame) {
|
||||
auto now = dxvk::high_resolution_clock::now();
|
||||
|
||||
if (frame->gpuExecStart == time_point())
|
||||
frame->gpuExecStart = now;
|
||||
|
||||
if (frame->gpuIdleStart != time_point()) {
|
||||
frame->gpuIdleTime += now - frame->gpuIdleStart;
|
||||
frame->gpuIdleEnd = now;
|
||||
}
|
||||
}
|
||||
|
||||
m_cond.notify_one();
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyGpuExecutionEnd(
|
||||
uint64_t frameId) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
auto frame = findFrame(frameId);
|
||||
|
||||
if (frame) {
|
||||
auto now = dxvk::high_resolution_clock::now();
|
||||
|
||||
frame->gpuExecEnd = now;
|
||||
frame->gpuIdleStart = now;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::notifyGpuPresentEnd(
|
||||
uint64_t frameId) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
auto frame = findFrame(frameId);
|
||||
|
||||
if (frame)
|
||||
frame->gpuPresent = dxvk::high_resolution_clock::now();
|
||||
|
||||
m_cond.notify_one();
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::sleepAndBeginFrame(
|
||||
uint64_t frameId,
|
||||
double maxFrameRate) {
|
||||
auto duration = sleep(frameId, maxFrameRate);
|
||||
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
auto next = initFrame(frameId);
|
||||
next->frameStart = dxvk::high_resolution_clock::now();
|
||||
next->sleepDuration = duration;
|
||||
}
|
||||
|
||||
|
||||
void DxvkBuiltInLatencyTracker::discardTimings() {
|
||||
std::unique_lock lock(m_mutex);
|
||||
m_validRangeBegin = m_validRangeEnd + 1u;
|
||||
}
|
||||
|
||||
|
||||
DxvkLatencyStats DxvkBuiltInLatencyTracker::getStatistics(
|
||||
uint64_t frameId) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
DxvkLatencyStats stats = { };
|
||||
|
||||
while (frameId && frameId >= m_validRangeBegin) {
|
||||
auto f = findFrame(frameId--);
|
||||
|
||||
if (f && f->gpuPresent != time_point()) {
|
||||
stats.frameLatency = std::chrono::duration_cast<std::chrono::microseconds>(f->gpuPresent - f->frameStart);
|
||||
stats.sleepDuration = std::chrono::duration_cast<std::chrono::microseconds>(f->sleepDuration);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
|
||||
DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::sleep(
|
||||
uint64_t frameId,
|
||||
double maxFrameRate) {
|
||||
// Wait for all relevant timings to become available. This should
|
||||
// generally not stall for very long if a maximum frame latency of
|
||||
// 1 is enforced correctly by the swap chain.
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
for (uint32_t i = 2; i <= FrameCount; i++) {
|
||||
auto f = findFrame(frameId - i);
|
||||
|
||||
if (!f || f->cpuPresentEnd == time_point())
|
||||
return duration(0u);
|
||||
|
||||
m_cond.wait(lock, [f] {
|
||||
return f->gpuPresent != time_point();
|
||||
});
|
||||
}
|
||||
|
||||
// Frame entry of the last frame that fully completed
|
||||
auto prev = findFrame(frameId - 2u);
|
||||
|
||||
// The way we want to align subsequent frames depends on whether
|
||||
// we are limited by GPU performance or display refresh.
|
||||
//
|
||||
// In either case, we estimate the amount of CPU time the game requires
|
||||
// before any GPU work can start to be the delay between frame start and
|
||||
// first submission, plus any GPU idle time during the frame. This is not
|
||||
// accurate if there are forced GPU sync points, but we can't work around
|
||||
// that in a meaningful way.
|
||||
constexpr size_t EntryCount = FrameCount - 1u;
|
||||
|
||||
std::array<duration, EntryCount> cpuTimes = { };
|
||||
std::array<duration, EntryCount> gpuTimes = { };
|
||||
|
||||
for (uint32_t i = 0; i < EntryCount; i++) {
|
||||
auto f = findFrame(frameId - (i + 2u));
|
||||
|
||||
cpuTimes[i] = (f->queueSubmit - f->frameStart) + f->gpuIdleTime;
|
||||
gpuTimes[i] = (f->gpuExecEnd - f->gpuExecStart) - f->gpuIdleTime;
|
||||
}
|
||||
|
||||
duration nextCpuTime = estimateTime(cpuTimes.data(), cpuTimes.size());
|
||||
duration nextGpuTime = estimateTime(gpuTimes.data(), gpuTimes.size());
|
||||
|
||||
// Compute the initial deadline based on GPU execution times
|
||||
time_point gpuDeadline = prev->gpuExecEnd + 2u * nextGpuTime;
|
||||
|
||||
// If we're rendering faster than refresh, use present_wait timings from
|
||||
// previous frames as a starting point and compute an average in order to
|
||||
// account for potentially erratic present_wait delays.
|
||||
duration frameInterval = computeFrameInterval(maxFrameRate);
|
||||
|
||||
if (frameInterval.count()) {
|
||||
duration nextPresentFromPrev = duration(0u);
|
||||
|
||||
for (uint32_t i = 2; i <= FrameCount; i++) {
|
||||
auto f = findFrame(frameId - i);
|
||||
|
||||
time_point deadline = f->gpuPresent + i * frameInterval - m_tolerance;
|
||||
nextPresentFromPrev += deadline - prev->gpuPresent;
|
||||
}
|
||||
|
||||
time_point wsiDeadline = prev->gpuPresent + (nextPresentFromPrev / int32_t(FrameCount - 1u));
|
||||
gpuDeadline = std::max(gpuDeadline, wsiDeadline);
|
||||
}
|
||||
|
||||
// Line up the next frame in such a way that the first GPU submission
|
||||
// happens just before the current frame's final submission completes
|
||||
time_point gpuStartTime = gpuDeadline - nextGpuTime;
|
||||
time_point cpuStartTime = gpuStartTime - nextCpuTime - m_tolerance;
|
||||
|
||||
time_point now = dxvk::high_resolution_clock::now();
|
||||
|
||||
// Release lock before actually sleeping, or
|
||||
// it will affect the time measurements.
|
||||
lock.unlock();
|
||||
|
||||
Sleep::sleepUntil(now, cpuStartTime);
|
||||
return std::max(duration(0u), cpuStartTime - now);
|
||||
}
|
||||
|
||||
|
||||
DxvkLatencyFrameData* DxvkBuiltInLatencyTracker::initFrame(
|
||||
uint64_t frameId) {
|
||||
if (m_validRangeEnd + 1u != frameId)
|
||||
m_validRangeBegin = frameId;
|
||||
|
||||
if (m_validRangeBegin + FrameCount <= frameId)
|
||||
m_validRangeBegin = frameId + 1u - FrameCount;
|
||||
|
||||
m_validRangeEnd = frameId;
|
||||
|
||||
auto& frame = m_frames[frameId % FrameCount];
|
||||
frame = DxvkLatencyFrameData();
|
||||
frame.frameId = frameId;
|
||||
return &frame;
|
||||
}
|
||||
|
||||
|
||||
DxvkLatencyFrameData* DxvkBuiltInLatencyTracker::findFrame(
|
||||
uint64_t frameId) {
|
||||
return frameId >= m_validRangeBegin && frameId <= m_validRangeEnd
|
||||
? &m_frames[frameId % FrameCount]
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
|
||||
DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::computeFrameInterval(
|
||||
double maxFrameRate) {
|
||||
if (m_envFpsLimit > 0.0)
|
||||
maxFrameRate = m_envFpsLimit;
|
||||
|
||||
return computeIntervalFromRate(maxFrameRate);
|
||||
}
|
||||
|
||||
|
||||
DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::computeIntervalFromRate(
|
||||
double frameRate) {
|
||||
if (frameRate <= 0.0 || !std::isnormal(frameRate))
|
||||
return duration(0u);
|
||||
|
||||
uint64_t ns = uint64_t(1'000'000'000.0 / frameRate);
|
||||
return std::chrono::duration_cast<duration>(std::chrono::nanoseconds(ns));
|
||||
}
|
||||
|
||||
|
||||
DxvkBuiltInLatencyTracker::duration DxvkBuiltInLatencyTracker::estimateTime(
|
||||
const duration* frames,
|
||||
size_t frameCount) {
|
||||
// For each frame, find the median of its neighbours, then
|
||||
// use the maximum of those medians as our estimate.
|
||||
duration result = duration(0u);
|
||||
|
||||
for (size_t i = 0u; i < frameCount - 2u; i++) {
|
||||
duration a = frames[i];
|
||||
duration b = frames[i + 1];
|
||||
duration c = frames[i + 2];
|
||||
|
||||
duration min = std::min(std::min(a, b), c);
|
||||
duration max = std::max(std::max(a, b), c);
|
||||
|
||||
result = std::max(result, a + b + c - min - max);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
134
src/dxvk/dxvk_latency_builtin.h
Normal file
134
src/dxvk/dxvk_latency_builtin.h
Normal file
@ -0,0 +1,134 @@
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dxvk_latency.h"
|
||||
|
||||
#include "../util/thread.h"
|
||||
|
||||
#include "../util/util_sleep.h"
|
||||
#include "../util/util_time.h"
|
||||
|
||||
#include "../util/config/config.h"
|
||||
|
||||
#include "../util/sync/sync_spinlock.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/**
|
||||
* \brief Timings for a single tracked frame
|
||||
*/
|
||||
struct DxvkLatencyFrameData {
|
||||
using time_point = dxvk::high_resolution_clock::time_point;
|
||||
using duration = dxvk::high_resolution_clock::duration;
|
||||
|
||||
uint64_t frameId = 0u;
|
||||
time_point frameStart = time_point();
|
||||
time_point cpuPresentEnd = time_point();
|
||||
time_point queueSubmit = time_point();
|
||||
time_point queuePresent = time_point();
|
||||
time_point gpuExecStart = time_point();
|
||||
time_point gpuExecEnd = time_point();
|
||||
time_point gpuIdleStart = time_point();
|
||||
time_point gpuIdleEnd = time_point();
|
||||
duration gpuIdleTime = duration(0u);
|
||||
time_point gpuPresent = time_point();
|
||||
duration sleepDuration = duration(0u);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Built-in latency tracker
|
||||
*
|
||||
* Implements a simple latency reduction algorithm
|
||||
* based on CPU timestamps received from the backend.
|
||||
*/
|
||||
class DxvkBuiltInLatencyTracker : public DxvkLatencyTracker {
|
||||
using time_point = typename DxvkLatencyFrameData::time_point;
|
||||
using duration = typename DxvkLatencyFrameData::duration;
|
||||
|
||||
constexpr static size_t FrameCount = 8u;
|
||||
public:
|
||||
|
||||
DxvkBuiltInLatencyTracker(
|
||||
int32_t toleranceUs);
|
||||
|
||||
~DxvkBuiltInLatencyTracker();
|
||||
|
||||
void notifyCpuPresentBegin(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyCpuPresentEnd(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyCsRenderBegin(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyCsRenderEnd(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyQueueSubmit(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyQueuePresentBegin(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyQueuePresentEnd(
|
||||
uint64_t frameId,
|
||||
VkResult status);
|
||||
|
||||
void notifyGpuExecutionBegin(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyGpuExecutionEnd(
|
||||
uint64_t frameId);
|
||||
|
||||
void notifyGpuPresentEnd(
|
||||
uint64_t frameId);
|
||||
|
||||
void sleepAndBeginFrame(
|
||||
uint64_t frameId,
|
||||
double maxFrameRate);
|
||||
|
||||
void discardTimings();
|
||||
|
||||
DxvkLatencyStats getStatistics(
|
||||
uint64_t frameId);
|
||||
|
||||
private:
|
||||
|
||||
dxvk::mutex m_mutex;
|
||||
dxvk::condition_variable m_cond;
|
||||
|
||||
duration m_tolerance;
|
||||
|
||||
double m_envFpsLimit = 0.0;
|
||||
|
||||
std::array<DxvkLatencyFrameData, FrameCount> m_frames = { };
|
||||
|
||||
uint64_t m_validRangeBegin = 0u;
|
||||
uint64_t m_validRangeEnd = 0u;
|
||||
|
||||
duration sleep(
|
||||
uint64_t frameId,
|
||||
double maxFrameRate);
|
||||
|
||||
DxvkLatencyFrameData* initFrame(
|
||||
uint64_t frameId);
|
||||
|
||||
DxvkLatencyFrameData* findFrame(
|
||||
uint64_t frameId);
|
||||
|
||||
duration computeFrameInterval(
|
||||
double maxFrameRate);
|
||||
|
||||
static duration computeIntervalFromRate(
|
||||
double frameRate);
|
||||
|
||||
static duration estimateTime(
|
||||
const duration* frames,
|
||||
size_t frameCount);
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -12,6 +12,8 @@ namespace dxvk {
|
||||
useRawSsbo = config.getOption<Tristate>("dxvk.useRawSsbo", Tristate::Auto);
|
||||
hud = config.getOption<std::string>("dxvk.hud", "");
|
||||
tearFree = config.getOption<Tristate>("dxvk.tearFree", Tristate::Auto);
|
||||
latencySleep = config.getOption<Tristate>("dxvk.latencySleep", Tristate::Auto);
|
||||
latencyTolerance = config.getOption<int32_t> ("dxvk.latencyTolerance", 1000);
|
||||
hideIntegratedGraphics = config.getOption<bool> ("dxvk.hideIntegratedGraphics", false);
|
||||
zeroMappedMemory = config.getOption<bool> ("dxvk.zeroMappedMemory", false);
|
||||
allowFse = config.getOption<bool> ("dxvk.allowFse", false);
|
||||
|
@ -37,6 +37,12 @@ namespace dxvk {
|
||||
/// or FIFO_RELAXED (if false) present mode
|
||||
Tristate tearFree = Tristate::Auto;
|
||||
|
||||
/// Enables latency sleep
|
||||
Tristate latencySleep = Tristate::Auto;
|
||||
|
||||
/// Latency tolerance, in microseconds
|
||||
int32_t latencyTolerance = 0u;
|
||||
|
||||
// Hides integrated GPUs if dedicated GPUs are
|
||||
// present. May be necessary for some games that
|
||||
// incorrectly assume monitor layouts.
|
||||
|
@ -90,6 +90,7 @@ dxvk_src = [
|
||||
'dxvk_graphics.cpp',
|
||||
'dxvk_image.cpp',
|
||||
'dxvk_instance.cpp',
|
||||
'dxvk_latency_builtin.cpp',
|
||||
'dxvk_memory.cpp',
|
||||
'dxvk_meta_blit.cpp',
|
||||
'dxvk_meta_clear.cpp',
|
||||
|
Loading…
x
Reference in New Issue
Block a user