1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-03-14 04:29:15 +01:00

Merge 8e2a509eb6711afe20f2a5426ca5b111add82373 into c04410ca00f33162d0875bc8500d3f8185bc73df

This commit is contained in:
netborg-afps 2025-02-28 20:52:05 +07:00 committed by GitHub
commit 46aeecc4e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 1144 additions and 59 deletions

View File

@ -18,6 +18,51 @@
# dxgi.enableHDR = True
# Frame pacing mode managing CPU-GPU synchronization.
# Defaults to "low-latency" in the draft-PR for demonstration purposes.
#
# "max-frame-latency" provides stable latency in the GPU-limit as long as
# GPU render times are stable. Latency generally is higher but offers great
# visual smoothness.
#
# "low-latency" provides lower latency in the GPU-limit and can be fine-tuned
# via dxvk.lowLatencyOffset and dxvk.lowLatencyAllowCpuFramesOverlap.
#
# "min-latency" possibly provides the lowest latency (low-latency can be
# quicker in some situations), and offers less fps in the GPU-limit
# due to stalling the GPU between frames. Generally not recommended,
# but helpful to get insights to fine-tune the low-latency mode and
# possibly is useful for running games in the CPU-limit.
#
# "low/min-latency" also supports its own fps-limiting enabled via common
# variables.
#
# Supported values: "max-frame-latency", "low-latency", "min-latency"
# dxvk.framePacing = ""
# Allows fine-tuning the low-latency frame pacing mode.
# Positive values make a frame begin later which might improve responsiveness,
# although only very slightly, but may be relevant for edge cases.
# Negative values make a frame begin earlier which might improve fps.
# Values are given in microseconds. Defaults to 0.
#
# Supported values: -10000 to 10000
# dxvk.lowLatencyOffset = 0
# Determines whether a frame is allowed to begin before finishing processing
# the cpu-part of the previous one, when low-latency frame pacing is used.
# Snappiness may be improved when disallowing overlap. On the other hand, this
# might also decrease fps in certain cases. Defaults to True.
#
# Supported values: True, False
# dxvk.lowLatencyAllowCpuFramesOverlap = True
# Expose support for dcomp swap chains with a dummy window.
#
# This is not a valid implementation of DirectComposition swapchains,
@ -104,8 +149,13 @@
# The implementation will either use VK_NV_low_latency2 if supported
# by the driver, or a custom algorithm.
# - False: Disable Reflex support as well as built-in latency reduction.
# This build defaults to False to enable dxvk.framePacing. You need to
# enable Reflex manually (Auto) until we support switching back and
# forth between Reflex and the low-latency frame pacing - for example
# via the ingame options - and more critically we want to enable
# low-latency frame pacing if the game doesn't support Reflex.
# dxvk.latencySleep = Auto
# dxvk.latencySleep = False
# Tolerance for the latency sleep heuristic, in microseconds. Higher values

View File

@ -3,6 +3,7 @@
#include "d3d11_swapchain.h"
#include "../dxvk/dxvk_latency_builtin.h"
#include "../dxvk/framepacer/dxvk_framepacer.h"
#include "../util/util_win32_compat.h"
@ -294,6 +295,9 @@ namespace dxvk {
if (m_latencyHud)
m_latencyHud->accumulateStats(latencyStats);
if (m_renderLatencyHud)
m_renderLatencyHud->updateLatencyTracker(m_latency);
return hr;
}
@ -354,6 +358,10 @@ namespace dxvk {
if (m_presenter != nullptr)
m_presenter->setFrameRateLimit(m_targetFrameRate, GetActualFrameLatency());
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latency.ptr());
if (framePacer != nullptr)
framePacer->setTargetFrameRate(FrameRate);
}
@ -599,8 +607,14 @@ namespace dxvk {
if (hud) {
hud->addItem<hud::HudClientApiItem>("api", 1, GetApiName());
if (m_latency)
if (m_latency) {
m_latencyHud = hud->addItem<hud::HudLatencyItem>("latency", 4);
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latency.ptr());
if (framePacer) {
int32_t fpsItemPos = hud->getItemPos<hud::HudFpsItem>();
m_renderLatencyHud = hud->addItem<hud::HudRenderLatencyItem>("renderlatency", fpsItemPos+1);
}
}
}
m_blitter = new DxvkSwapchainBlitter(m_device, std::move(hud));

View File

@ -125,7 +125,8 @@ namespace dxvk {
dxvk::mutex m_frameStatisticsLock;
DXGI_VK_FRAME_STATISTICS m_frameStatistics = { };
Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudRenderLatencyItem> m_renderLatencyHud;
Rc<DxvkImageView> GetBackBufferView();

View File

@ -5,6 +5,8 @@
#include "d3d9_hud.h"
#include "d3d9_window.h"
#include "../dxvk/framepacer/dxvk_framepacer.h"
namespace dxvk {
static uint16_t MapGammaControlPoint(float x) {
@ -923,6 +925,9 @@ namespace dxvk {
if (m_latencyHud)
m_latencyHud->accumulateStats(latencyStats);
if (m_renderLatencyHud)
m_renderLatencyHud->updateLatencyTracker(m_latencyTracker);
// Rotate swap chain buffers so that the back
// buffer at index 0 becomes the front buffer.
for (uint32_t i = 1; i < m_backBuffers.size(); i++)
@ -1060,8 +1065,14 @@ namespace dxvk {
if (hud) {
m_apiHud = hud->addItem<hud::HudClientApiItem>("api", 1, GetApiName());
if (m_latencyTracking)
if (m_latencyTracking) {
m_latencyHud = hud->addItem<hud::HudLatencyItem>("latency", 4);
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latencyTracker.ptr());
if (framePacer) {
int32_t fpsItemPos = hud->getItemPos<hud::HudFpsItem>();
m_renderLatencyHud = hud->addItem<hud::HudRenderLatencyItem>("renderlatency", fpsItemPos+1);
}
}
hud->addItem<hud::HudSamplerCount>("samplers", -1, m_parent);
hud->addItem<hud::HudFixedFunctionShaders>("ffshaders", -1, m_parent);
@ -1112,6 +1123,9 @@ namespace dxvk {
}
m_wctx->presenter->setFrameRateLimit(frameRate, GetActualFrameLatency());
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latencyTracker.ptr());
if (framePacer != nullptr)
framePacer->setTargetFrameRate(frameRate);
m_targetFrameRate = frameRate;
}

View File

@ -183,8 +183,9 @@ namespace dxvk {
bool m_latencyTracking = false;
Rc<DxvkLatencyTracker> m_latencyTracker = nullptr;
Rc<hud::HudClientApiItem> m_apiHud;
Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudClientApiItem> m_apiHud;
Rc<hud::HudLatencyItem> m_latencyHud;
Rc<hud::HudRenderLatencyItem> m_renderLatencyHud;
std::optional<VkHdrMetadataEXT> m_hdrMetadata;
bool m_unlockAdditionalFormats = false;

View File

@ -110,7 +110,7 @@ namespace dxvk {
void DxvkContext::beginLatencyTracking(
const Rc<DxvkLatencyTracker>& tracker,
uint64_t frameId) {
if (tracker && (!m_latencyTracker || m_latencyTracker == tracker)) {
if (tracker && m_latencyTracker != tracker) {
tracker->notifyCsRenderBegin(frameId);
m_latencyTracker = tracker;

View File

@ -2,6 +2,7 @@
#include "dxvk_instance.h"
#include "dxvk_latency_builtin.h"
#include "dxvk_latency_reflex.h"
#include "framepacer/dxvk_framepacer.h"
namespace dxvk {
@ -310,13 +311,13 @@ namespace dxvk {
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
const Rc<Presenter>& presenter) {
if (m_options.latencySleep == Tristate::False)
return nullptr;
return new FramePacer(m_options);
if (m_options.latencySleep == Tristate::Auto) {
if (m_features.nvLowLatency2)
return new DxvkReflexLatencyTrackerNv(presenter);
else
return nullptr;
return new FramePacer(m_options);
}
return new DxvkBuiltInLatencyTracker(presenter,

View File

@ -128,6 +128,10 @@ namespace dxvk {
virtual void notifyCpuPresentEnd(
uint64_t frameId) = 0;
virtual void notifySubmit() { }
virtual void notifyPresent(
uint64_t frameId) { }
/**
* \brief Called when a command list is submitted to the GPU
*
@ -174,6 +178,9 @@ namespace dxvk {
virtual void notifyGpuExecutionEnd(
uint64_t frameId) = 0;
virtual void notifyGpuPresentBegin(
uint64_t frameId) { }
/**
* \brief Called when presentation of a given frame finishes on the GPU
*

View File

@ -12,12 +12,16 @@ namespace dxvk {
useRawSsbo = config.getOption<Tristate>("dxvk.useRawSsbo", Tristate::Auto);
hud = config.getOption<std::string>("dxvk.hud", "");
tearFree = config.getOption<Tristate>("dxvk.tearFree", Tristate::Auto);
latencySleep = config.getOption<Tristate>("dxvk.latencySleep", Tristate::Auto);
latencySleep = config.getOption<Tristate>("dxvk.latencySleep", Tristate::False);
latencyTolerance = config.getOption<int32_t> ("dxvk.latencyTolerance", 1000);
disableNvLowLatency2 = config.getOption<Tristate>("dxvk.disableNvLowLatency2", Tristate::Auto);
hideIntegratedGraphics = config.getOption<bool> ("dxvk.hideIntegratedGraphics", false);
zeroMappedMemory = config.getOption<bool> ("dxvk.zeroMappedMemory", false);
allowFse = config.getOption<bool> ("dxvk.allowFse", false);
framePace = config.getOption<std::string>("dxvk.framePace", "");
lowLatencyOffset = config.getOption<int32_t> ("dxvk.lowLatencyOffset", 0);
lowLatencyAllowCpuFramesOverlap
= config.getOption<bool> ("dxvk.lowLatencyAllowCpuFramesOverlap", true);
deviceFilter = config.getOption<std::string>("dxvk.deviceFilter", "");
tilerMode = config.getOption<Tristate>("dxvk.tilerMode", Tristate::Auto);
}

View File

@ -38,7 +38,9 @@ namespace dxvk {
Tristate tearFree = Tristate::Auto;
/// Enables latency sleep
Tristate latencySleep = Tristate::Auto;
/// Defaults to false in this build to activate the FramePacer,
/// especially for the case when the game doesn't support Reflex
Tristate latencySleep = Tristate::False;
/// Latency tolerance, in microseconds
int32_t latencyTolerance = 0u;
@ -61,6 +63,18 @@ namespace dxvk {
/// Whether to enable tiler optimizations
Tristate tilerMode = Tristate::Auto;
/// Frame pacing
std::string framePace;
/// A value in microseconds to fine-tune the low-latency frame pacing.
/// Positive values make a frame begin later which might improve responsiveness.
/// Negative values make a frame begin earlier which might improve fps.
int32_t lowLatencyOffset;
/// Determines whether a frame is allowed to begin before finishing processing
/// the cpu-part of the previous one, when low-latency frame pacing is used.
bool lowLatencyAllowCpuFramesOverlap;
// Device name
std::string deviceFilter;
};

View File

@ -259,18 +259,11 @@ namespace dxvk {
return;
if (m_device->features().khrPresentWait.presentWait) {
bool canSignal = false;
{ std::unique_lock lock(m_frameMutex);
m_lastSignaled = frameId;
canSignal = m_lastCompleted >= frameId;
}
if (canSignal)
m_signal->signal(frameId);
std::lock_guard lock(m_frameMutex);
m_lastSignaled = frameId;
m_frameCond.notify_one();
} else {
m_fpsLimiter.delay();
m_fpsLimiter.delay(tracker);
m_signal->signal(frameId);
if (tracker)
@ -1210,26 +1203,25 @@ namespace dxvk {
void Presenter::runFrameThread() {
env::setThreadName("dxvk-frame");
while (true) {
PresenterFrame frame = { };
std::unique_lock lock(m_frameMutex);
while (true) {
// Wait for all GPU work for this frame to complete in order to maintain
// ordering guarantees of the frame signal w.r.t. objects being released
{ std::unique_lock lock(m_frameMutex);
m_frameCond.wait(lock, [this] {
return !m_frameQueue.empty() && m_frameQueue.front().frameId <= m_lastSignaled;
});
m_frameCond.wait(lock, [this] {
return !m_frameQueue.empty();
});
// Use a frame ID of 0 as an exit condition
PresenterFrame frame = m_frameQueue.front();
// Use a frame ID of 0 as an exit condition
frame = m_frameQueue.front();
if (!frame.frameId) {
m_frameQueue.pop();
return;
}
if (!frame.frameId) {
m_frameQueue.pop();
return;
}
lock.unlock();
// If the present operation has succeeded, actually wait for it to complete.
// Don't bother with it on MAILBOX / IMMEDIATE modes since doing so would
// restrict us to the display refresh rate on some platforms (XWayland).
@ -1243,32 +1235,24 @@ namespace dxvk {
// Signal latency tracker right away to get more accurate
// measurements if the frame rate limiter is enabled.
if (frame.tracker) {
if (frame.tracker)
frame.tracker->notifyGpuPresentEnd(frame.frameId);
frame.tracker = nullptr;
}
// Apply FPS limiter here to align it as closely with scanout as we can,
// Apply FPS limtier here to align it as closely with scanout as we can,
// and delay signaling the frame latency event to emulate behaviour of a
// low refresh rate display as closely as we can.
m_fpsLimiter.delay();
// Wake up any thread that may be waiting for the queue to become empty
bool canSignal = false;
{ std::unique_lock lock(m_frameMutex);
m_frameQueue.pop();
m_frameDrain.notify_one();
m_lastCompleted = frame.frameId;
canSignal = m_lastSignaled >= frame.frameId;
}
m_fpsLimiter.delay(frame.tracker);
frame.tracker = nullptr;
// Always signal even on error, since failures here
// are transparent to the front-end.
if (canSignal)
m_signal->signal(frame.frameId);
m_signal->signal(frame.frameId);
// Wake up any thread that may be waiting for the queue to become empty
lock.lock();
m_frameQueue.pop();
m_frameDrain.notify_one();
}
}

View File

@ -315,7 +315,6 @@ namespace dxvk {
std::queue<PresenterFrame> m_frameQueue;
uint64_t m_lastSignaled = 0u;
uint64_t m_lastCompleted = 0u;
alignas(CACHE_LINE_SIZE)
FpsLimiter m_fpsLimiter;

View File

@ -1,5 +1,6 @@
#include "dxvk_device.h"
#include "dxvk_queue.h"
#include "framepacer/dxvk_framepacer.h"
namespace dxvk {
@ -46,6 +47,8 @@ namespace dxvk {
DxvkSubmitInfo submitInfo,
DxvkLatencyInfo latencyInfo,
DxvkSubmitStatus* status) {
if (latencyInfo.tracker)
latencyInfo.tracker->notifySubmit();
std::unique_lock<dxvk::mutex> lock(m_mutex);
m_finishCond.wait(lock, [this] {
@ -66,6 +69,8 @@ namespace dxvk {
DxvkPresentInfo presentInfo,
DxvkLatencyInfo latencyInfo,
DxvkSubmitStatus* status) {
if (latencyInfo.tracker)
latencyInfo.tracker->notifyPresent(presentInfo.frameId);
std::unique_lock<dxvk::mutex> lock(m_mutex);
DxvkSubmitEntry entry = { };
@ -274,7 +279,9 @@ namespace dxvk {
} else if (entry.present.presenter != nullptr) {
// Signal the frame and then immediately destroy the reference.
// This is necessary since the front-end may want to explicitly
// destroy the presenter object.
// destroy the presenter object.
if (entry.latency.tracker)
entry.latency.tracker->notifyGpuPresentBegin(entry.present.frameId);
entry.present.presenter->signalFrame(entry.present.frameId, entry.latency.tracker);
entry.present.presenter = nullptr;
}

View File

@ -0,0 +1,64 @@
#include "dxvk_framepacer.h"
#include "dxvk_framepacer_mode_low_latency.h"
#include "dxvk_framepacer_mode_min_latency.h"
#include "dxvk_options.h"
#include "../../util/util_env.h"
#include "../../util/log/log.h"
namespace dxvk {
FramePacer::FramePacer( const DxvkOptions& options ) {
// we'll default to LOW_LATENCY in the draft-PR for now, for demonstration purposes,
// highlighting the generally much better input lag and medium-term time consistency.
// although MAX_FRAME_LATENCY has advantages in many games and is likely the better default,
// for its higher fps throughput and less susceptibility to short-term time inconsistencies.
// which mode being smoother depends on the game.
FramePacerMode::Mode mode = FramePacerMode::LOW_LATENCY;
std::string configStr = env::getEnvVar("DXVK_FRAME_PACE");
if (configStr.find("max-frame-latency") != std::string::npos) {
mode = FramePacerMode::MAX_FRAME_LATENCY;
} else if (configStr.find("low-latency") != std::string::npos) {
mode = FramePacerMode::LOW_LATENCY;
} else if (configStr.find("min-latency") != std::string::npos) {
mode = FramePacerMode::MIN_LATENCY;
} else if (options.framePace.find("max-frame-latency") != std::string::npos) {
mode = FramePacerMode::MAX_FRAME_LATENCY;
} else if (options.framePace.find("low-latency") != std::string::npos) {
mode = FramePacerMode::LOW_LATENCY;
} else if (options.framePace.find("min-latency") != std::string::npos) {
mode = FramePacerMode::MIN_LATENCY;
}
switch (mode) {
case FramePacerMode::MAX_FRAME_LATENCY:
Logger::info( "Frame pace: max-frame-latency" );
m_mode = std::make_unique<FramePacerMode>(FramePacerMode::MAX_FRAME_LATENCY, &m_latencyMarkersStorage);
break;
case FramePacerMode::LOW_LATENCY:
Logger::info( "Frame pace: low-latency" );
m_mode = std::make_unique<LowLatencyMode>(mode, &m_latencyMarkersStorage, options);
break;
case FramePacerMode::MIN_LATENCY:
Logger::info( "Frame pace: min-latency" );
m_mode = std::make_unique<MinLatencyMode>(mode, &m_latencyMarkersStorage);
break;
}
for (auto& gpuStart: m_gpuStarts) {
gpuStart.store(0);
}
// be consistent that every frame has a gpuReady event from the previous frame
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(DXGI_MAX_SWAP_CHAIN_BUFFERS+1);
m->gpuReady.push_back(high_resolution_clock::now());
}
FramePacer::~FramePacer() {}
}

View File

@ -0,0 +1,191 @@
#pragma once
#include "dxvk_framepacer_mode.h"
#include "dxvk_latency_markers.h"
#include "../dxvk_latency.h"
#include "../../util/util_time.h"
#include <dxgi.h>
namespace dxvk {
struct DxvkOptions;
/* \brief Frame pacer interface managing the CPU - GPU synchronization.
*
* GPUs render frames asynchronously to the game's and dxvk's CPU-side work
* in order to improve fps-throughput. Aligning the cpu work to chosen time-
* points allows to tune certain characteristics of the video presentation,
* like smoothness and latency.
*/
class FramePacer : public DxvkLatencyTracker {
using microseconds = std::chrono::microseconds;
public:
FramePacer( const DxvkOptions& options );
~FramePacer();
void sleepAndBeginFrame(
uint64_t frameId,
double maxFrameRate) override {
// wait for finished rendering of a previous frame, typically the one before last
m_mode->waitRenderFinished(frameId);
// potentially wait some more if the cpu gets too much ahead
m_mode->startFrame(frameId);
m_latencyMarkersStorage.registerFrameStart(frameId);
m_gpuStarts[ frameId % m_gpuStarts.size() ].store(0);
}
void notifyGpuPresentEnd( uint64_t frameId ) override {
// the frame has been displayed to the screen
m_latencyMarkersStorage.registerFrameEnd(frameId);
m_mode->endFrame(frameId);
}
void notifyCsRenderBegin( uint64_t frameId ) override {
auto now = high_resolution_clock::now();
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
m->csStart = std::chrono::duration_cast<microseconds>(now - m->start).count();
}
void notifyCsRenderEnd( uint64_t frameId ) override {
auto now = high_resolution_clock::now();
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
m->csFinished = std::chrono::duration_cast<microseconds>(now - m->start).count();
m_mode->signalCsFinished( frameId );
}
void notifySubmit() override {
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastSubmitFrameId+1);
m->gpuSubmit.push_back(high_resolution_clock::now());
}
void notifyPresent( uint64_t frameId ) override {
// dx to vk translation is finished
if (frameId != 0) {
auto now = high_resolution_clock::now();
m_lastSubmitFrameId = frameId;
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
m->gpuSubmit.push_back(now);
m->cpuFinished = std::chrono::duration_cast<microseconds>(now - m->start).count();
next->gpuSubmit.clear();
m_latencyMarkersStorage.m_timeline.cpuFinished.store(frameId);
}
}
void notifyQueueSubmit( uint64_t frameId ) override {
assert( frameId == m_lastQueueSubmitFrameId + 1 );
auto now = high_resolution_clock::now();
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
m->gpuQueueSubmit.push_back(now);
queueSubmitCheckGpuStart(frameId, m, now);
}
void notifyQueuePresentBegin( uint64_t frameId ) override {
if (frameId != 0) {
auto now = high_resolution_clock::now();
m_lastQueueSubmitFrameId = frameId;
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
m->gpuQueueSubmit.push_back(now);
next->gpuQueueSubmit.clear();
queueSubmitCheckGpuStart(frameId, m, now);
}
}
void notifyGpuExecutionBegin( uint64_t frameId ) override {
assert( frameId == m_lastFinishedFrameId+1 );
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
gpuExecutionCheckGpuStart(frameId, m, high_resolution_clock::now());
}
void notifyGpuExecutionEnd( uint64_t frameId ) override {
auto now = high_resolution_clock::now();
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
m->gpuReady.push_back(now);
}
virtual void notifyGpuPresentBegin( uint64_t frameId ) override {
// we get frameId == 0 for repeated presents (SyncInterval)
if (frameId != 0) {
m_lastFinishedFrameId = frameId;
auto now = high_resolution_clock::now();
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
m->gpuReady.push_back(now);
m->gpuFinished = std::chrono::duration_cast<microseconds>(now - m->start).count();
next->gpuReady.clear();
next->gpuReady.push_back(now);
gpuExecutionCheckGpuStart(frameId, m, now);
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
m_mode->finishRender(frameId);
m_mode->signalRenderFinished(frameId);
}
}
FramePacerMode::Mode getMode() const {
return m_mode->m_mode;
}
void setTargetFrameRate( double frameRate ) {
m_mode->setTargetFrameRate(frameRate);
}
bool needsAutoMarkers() override {
return true;
}
LatencyMarkersStorage m_latencyMarkersStorage;
// not implemented methods
void notifyCpuPresentBegin( uint64_t frameId) override { }
void notifyCpuPresentEnd( uint64_t frameId ) override { }
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
void discardTimings() override { }
DxvkLatencyStats getStatistics( uint64_t frameId ) override
{ return DxvkLatencyStats(); }
private:
void signalGpuStart( uint64_t frameId, LatencyMarkers* m, const high_resolution_clock::time_point& t ) {
m->gpuStart = std::chrono::duration_cast<microseconds>(t - m->start).count();
m_latencyMarkersStorage.m_timeline.gpuStart.store(frameId);
m_mode->signalGpuStart(frameId);
}
void queueSubmitCheckGpuStart( uint64_t frameId, LatencyMarkers* m, const high_resolution_clock::time_point& t ) {
auto& gpuStart = m_gpuStarts[ frameId % m_gpuStarts.size() ];
uint16_t val = gpuStart.fetch_or(queueSubmitBit);
if (val == gpuReadyBit)
signalGpuStart( frameId, m, t );
}
void gpuExecutionCheckGpuStart( uint64_t frameId, LatencyMarkers* m, const high_resolution_clock::time_point& t ) {
auto& gpuStart = m_gpuStarts[ frameId % m_gpuStarts.size() ];
uint16_t val = gpuStart.fetch_or(gpuReadyBit);
if (val == queueSubmitBit)
signalGpuStart( frameId, m, t );
}
std::unique_ptr<FramePacerMode> m_mode;
uint64_t m_lastSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
std::array< std::atomic< uint16_t >, 16 > m_gpuStarts = { };
static constexpr uint16_t queueSubmitBit = 1;
static constexpr uint16_t gpuReadyBit = 2;
};
}

View File

@ -0,0 +1,117 @@
#pragma once
#include "dxvk_latency_markers.h"
#include "../../util/sync/sync_signal.h"
#include "../../util/util_env.h"
#include <dxgi.h>
namespace dxvk {
/*
* /brief Abstract frame pacer mode in order to support different strategies of synchronization.
*/
class FramePacerMode {
public:
enum Mode {
MAX_FRAME_LATENCY = 0,
LOW_LATENCY,
MIN_LATENCY
};
FramePacerMode( Mode mode, LatencyMarkersStorage* markerStorage, uint32_t maxFrameLatency=1 )
: m_mode( mode ),
m_waitLatency( maxFrameLatency+1 ),
m_latencyMarkersStorage( markerStorage ) {
setFpsLimitFrametimeFromEnv();
}
virtual ~FramePacerMode() { }
virtual void startFrame( uint64_t frameId ) { }
virtual void endFrame( uint64_t frameId ) { }
virtual void finishRender( uint64_t frameId ) { }
void waitRenderFinished( uint64_t frameId ) {
if (m_mode) m_fenceGpuFinished.wait(frameId-m_waitLatency); }
void signalRenderFinished( uint64_t frameId ) {
if (m_mode) m_fenceGpuFinished.signal(frameId); }
void signalGpuStart( uint64_t frameId ) {
if (m_mode) m_fenceGpuStart.signal(frameId); }
void signalCsFinished( uint64_t frameId ) {
if (m_mode) m_fenceCsFinished.signal(frameId); }
void setTargetFrameRate( double frameRate ) {
if (!m_fpsLimitEnvOverride && frameRate > 1.0)
m_fpsLimitFrametime.store( 1'000'000/frameRate );
}
const Mode m_mode;
static bool getDoubleFromEnv( const char* name, double* result );
static bool getIntFromEnv( const char* name, int* result );
protected:
void setFpsLimitFrametimeFromEnv();
const uint32_t m_waitLatency;
LatencyMarkersStorage* m_latencyMarkersStorage;
std::atomic<int32_t> m_fpsLimitFrametime = { 0 };
bool m_fpsLimitEnvOverride = { false };
sync::Fence m_fenceGpuStart = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
sync::Fence m_fenceGpuFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
sync::Fence m_fenceCsFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS+50) };
};
inline bool FramePacerMode::getDoubleFromEnv( const char* name, double* result ) {
std::string env = env::getEnvVar(name);
if (env.empty())
return false;
try {
*result = std::stod(env);
return true;
} catch (const std::invalid_argument&) {
return false;
}
}
inline bool FramePacerMode::getIntFromEnv( const char* name, int* result ) {
std::string env = env::getEnvVar(name);
if (env.empty())
return false;
try {
*result = std::stoi(env);
return true;
} catch (const std::invalid_argument&) {
return false;
}
}
inline void FramePacerMode::setFpsLimitFrametimeFromEnv() {
double fpsLimit;
if (!getDoubleFromEnv("DXVK_FRAME_RATE", &fpsLimit))
return;
m_fpsLimitEnvOverride = true;
if (fpsLimit < 1.0)
return;
m_fpsLimitFrametime = 1'000'000/fpsLimit;
}
}

View File

@ -0,0 +1,43 @@
#include "dxvk_framepacer_mode_low_latency.h"
namespace dxvk {
bool getLowLatencyOffsetFromEnv( int32_t& offset ) {
if (!FramePacerMode::getIntFromEnv("DXVK_LOW_LATENCY_OFFSET", &offset))
return false;
return true;
}
bool getLowLatencyAllowCpuFramesOverlapFromEnv( bool& allowOverlap ) {
int32_t o;
if (!FramePacerMode::getIntFromEnv("DXVK_LOW_LATENCY_ALLOW_CPU_FRAMES_OVERLAP", &o))
return false;
allowOverlap = (bool) o;
return true;
}
int32_t LowLatencyMode::getLowLatencyOffset( const DxvkOptions& options ) {
int32_t offset = options.lowLatencyOffset;
int32_t o;
if (getLowLatencyOffsetFromEnv(o))
offset = o;
offset = std::max( -10000, offset );
offset = std::min( 10000, offset );
return offset;
}
bool LowLatencyMode::getLowLatencyAllowCpuFramesOverlap( const DxvkOptions& options ) {
bool allowOverlap = options.lowLatencyAllowCpuFramesOverlap;
bool o;
if (getLowLatencyAllowCpuFramesOverlapFromEnv(o))
allowOverlap = o;
return allowOverlap;
}
}

View File

@ -0,0 +1,255 @@
#pragma once
#include "dxvk_framepacer_mode.h"
#include "../dxvk_options.h"
#include "../../util/log/log.h"
#include "../../util/util_string.h"
#include <assert.h>
namespace dxvk {
/*
* This low-latency mode aims to reduce latency with minimal impact in fps.
* Effective when operating in the GPU-limit. Efficient to be used in the CPU-limit as well.
*
* Greatly reduces input lag variations when switching between CPU- and GPU-limit, and
* compared to the max-frame-latency approach, it has a much more stable input lag when
* GPU running times change dramatically, which can happen for example when rotating within a scene.
*
* The current implementation rather generates fluctuations alternating frame-by-frame
* depending on the game's and dxvk's CPU-time variations. This might be visible as a loss
* in smoothness, which is an area this implementation can be further improved. Unsuitable
* smoothing however might degrade input-lag feel, so it's not implemented for now, but
* more advanced smoothing techniques will be investigated in the future.
* In some situations however, this low-latency pacing actually improves smoothing though,
* it will depend on the game.
*
* An interesting observation while playtesting was that not only the input lag was affected,
* but the video generated did progress more cleanly in time as well with regards to
* medium-term time consistency, in other words, the video playback speed remained more steady.
*
* Optimized for VRR and VK_PRESENT_MODE_IMMEDIATE_KHR. It also comes with its own fps-limiter
* which is typically used to prevent the game's fps exceeding the monitor's refresh rate,
* and which is tightly integrated into the pacing logic.
*
* Can be fine-tuned via the dxvk.lowLatencyOffset and dxvk.lowLatencyAllowCpuFramesOverlap
* variables (or their respective environment variables)
* Compared to maxFrameLatency = 3, render-latency reductions of up to 67% are achieved.
*/
class LowLatencyMode : public FramePacerMode {
using microseconds = std::chrono::microseconds;
using time_point = high_resolution_clock::time_point;
public:
LowLatencyMode(Mode mode, LatencyMarkersStorage* storage, const DxvkOptions& options)
: FramePacerMode(mode, storage),
m_lowLatencyOffset(getLowLatencyOffset(options)),
m_allowCpuFramesOverlap(getLowLatencyAllowCpuFramesOverlap(options)) {
Logger::info( str::format("Using lowLatencyOffset: ", m_lowLatencyOffset) );
Logger::info( str::format("Using lowLatencyAllowCpuFramesOverlap: ", m_allowCpuFramesOverlap) );
}
~LowLatencyMode() {}
void startFrame( uint64_t frameId ) override {
using std::chrono::duration_cast;
if (!m_allowCpuFramesOverlap)
m_fenceCsFinished.wait( frameId-1 );
m_fenceGpuStart.wait( frameId-1 );
time_point now = high_resolution_clock::now();
uint64_t finishedId = m_latencyMarkersStorage->getTimeline()->gpuFinished.load();
if (finishedId <= DXGI_MAX_SWAP_CHAIN_BUFFERS+1ull)
return;
if (finishedId == frameId-1) {
// we are the only in-flight frame, nothing to do other then to apply fps-limiter if needed
m_lastStart = sleepFor( now, 0 );
return;
}
if (finishedId != frameId-2) {
Logger::err( str::format("internal error during low-latency frame pacing: expected finished frameId=",
frameId-2, ", got: ", finishedId) );
}
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId-1);
// estimate the target gpu sync point for this frame
// and calculate backwards when we want to start this frame
const SyncProps props = getSyncPrediction();
int32_t gpuReadyPrediction = duration_cast<microseconds>(
m->start + microseconds(m->gpuStart+getGpuStartToFinishPrediction()) - now).count();
int32_t targetGpuSync = gpuReadyPrediction + props.gpuSync;
int32_t delay = targetGpuSync - props.cpuUntilGpuSync + m_lowLatencyOffset;
m_lastStart = sleepFor( now, delay );
}
void finishRender( uint64_t frameId ) override {
using std::chrono::duration_cast;
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId);
int32_t numLoop = (int32_t)(m->gpuReady.size())-1;
if (numLoop <= 1) {
m_props[frameId % m_props.size()] = SyncProps();
m_props[frameId % m_props.size()].isOutlier = true;
m_propsFinished.store( frameId );
return;
}
// estimates the optimal overlap for cpu/gpu work by optimizing gpu scheduling first
// such that the gpu doesn't go into idle for this frame, and then aligning cpu submits
// where gpuSubmit[i] <= gpuRun[i] for all i
std::vector<int32_t>& gpuRun = m_tempGpuRun;
std::vector<int32_t>& gpuRunDurations = m_tempGpuRunDurations;
gpuRun.clear();
gpuRunDurations.clear();
int32_t optimizedGpuTime = 0;
gpuRun.push_back(optimizedGpuTime);
for (int i=0; i<numLoop; ++i) {
time_point _gpuRun = std::max( m->gpuReady[i], m->gpuQueueSubmit[i] );
int32_t duration = duration_cast<microseconds>( m->gpuReady[i+1] - _gpuRun ).count();
optimizedGpuTime += duration;
gpuRun.push_back(optimizedGpuTime);
gpuRunDurations.push_back(duration);
}
int32_t alignment = duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->gpuSubmit[0] ).count()
- gpuRun[numLoop-1];
int32_t offset = 0;
for (int i=numLoop-2; i>=0; --i) {
int32_t curSubmit = duration_cast<microseconds>( m->gpuSubmit[i] - m->gpuSubmit[0] ).count();
int32_t diff = curSubmit - gpuRun[i] - alignment;
diff = std::max( 0, diff );
offset += diff;
alignment += diff;
}
SyncProps& props = m_props[frameId % m_props.size()];
props.gpuSync = gpuRun[numLoop-1];
props.cpuUntilGpuSync = offset + duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->start ).count();
props.optimizedGpuTime = optimizedGpuTime;
props.isOutlier = isOutlier(frameId);
m_propsFinished.store( frameId );
}
Sleep::TimePoint sleepFor( const Sleep::TimePoint t, int32_t delay ) {
// account for the fps limit and ensure we won't sleep too long, just in case
int32_t frametime = std::chrono::duration_cast<microseconds>( t - m_lastStart ).count();
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
delay = std::max( delay, frametimeDiff );
delay = std::max( 0, std::min( delay, 20000 ) );
Sleep::TimePoint nextStart = t + microseconds(delay);
Sleep::sleepUntil( t, nextStart );
return nextStart;
}
private:
struct SyncProps {
int32_t optimizedGpuTime; // gpu executing packed submits in one go
int32_t gpuSync; // us after gpuStart
int32_t cpuUntilGpuSync;
bool isOutlier;
};
SyncProps getSyncPrediction() {
// in the future we might use more samples to get a prediction
// however, simple averaging gives a slightly artificial mouse input
// more advanced methods will be investigated
SyncProps res = {};
uint64_t id = m_propsFinished;
if (id < DXGI_MAX_SWAP_CHAIN_BUFFERS+7)
return res;
for (size_t i=0; i<7; ++i) {
const SyncProps& props = m_props[ (id-i) % m_props.size() ];
if (!props.isOutlier) {
id = id-i;
break;
}
}
return m_props[ id % m_props.size() ];
};
int32_t getGpuStartToFinishPrediction() {
uint64_t id = m_propsFinished;
if (id < DXGI_MAX_SWAP_CHAIN_BUFFERS+7)
return 0;
for (size_t i=0; i<7; ++i) {
const SyncProps& props = m_props[ (id-i) % m_props.size() ];
if (!props.isOutlier) {
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(id-i);
if (m->gpuReady.empty() || m->gpuSubmit.empty())
return m->gpuFinished - m->gpuStart;
time_point t = std::max( m->gpuReady[0], m->gpuSubmit[0] );
return std::chrono::duration_cast<microseconds>( t - m->start ).count()
+ props.optimizedGpuTime
- m->gpuStart;
}
}
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(id);
return m->gpuFinished - m->gpuStart;
};
bool isOutlier( uint64_t frameId ) {
constexpr size_t numLoop = 7;
int32_t totalCpuTime = 0;
for (size_t i=0; i<numLoop; ++i) {
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId-i);
totalCpuTime += m->cpuFinished;
}
int32_t avgCpuTime = totalCpuTime / numLoop;
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId);
if (m->cpuFinished > 1.7*avgCpuTime || m->gpuSubmit.empty() || m->gpuReady.size() != (m->gpuSubmit.size()+1) )
return true;
return false;
}
int32_t getLowLatencyOffset( const DxvkOptions& options );
bool getLowLatencyAllowCpuFramesOverlap( const DxvkOptions& options );
const int32_t m_lowLatencyOffset;
const bool m_allowCpuFramesOverlap;
Sleep::TimePoint m_lastStart = { high_resolution_clock::now() };
std::array<SyncProps, 16> m_props;
std::atomic<uint64_t> m_propsFinished = { 0 };
std::vector<int32_t> m_tempGpuRun;
std::vector<int32_t> m_tempGpuRunDurations;
};
}

View File

@ -0,0 +1,45 @@
#pragma once
#include "dxvk_framepacer_mode.h"
namespace dxvk {
/*
* Minimal latency is achieved here by waiting for the previous
* frame to complete, which results in very much reduced fps.
* Generally not recommended, but helpful to get insights to fine-tune
* the low-latency mode, and possibly is useful for running games
* in the cpu limit.
*/
class MinLatencyMode : public FramePacerMode {
public:
MinLatencyMode(Mode mode, LatencyMarkersStorage* storage)
: FramePacerMode(mode, storage, 0) {}
~MinLatencyMode() {}
void startFrame( uint64_t frameId ) override {
Sleep::TimePoint now = high_resolution_clock::now();
int32_t frametime = std::chrono::duration_cast<std::chrono::microseconds>(
now - m_lastStart ).count();
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
int32_t delay = std::max( 0, frametimeDiff );
delay = std::min( delay, 20000 );
Sleep::TimePoint nextStart = now + std::chrono::microseconds(delay);
Sleep::sleepUntil( now, nextStart );
m_lastStart = nextStart;
}
private:
Sleep::TimePoint m_lastStart = { high_resolution_clock::now() };
};
}

View File

@ -0,0 +1,148 @@
#pragma once
#include <atomic>
#include <dxgi.h>
#include <vector>
#include <array>
#include <assert.h>
#include "../../util/util_sleep.h"
#include "../../util/log/log.h"
#include "../../util/util_string.h"
namespace dxvk {
class FramePacer;
class LatencyMarkersStorage;
struct LatencyMarkers {
using time_point = high_resolution_clock::time_point;
time_point start;
time_point end;
int32_t csStart;
int32_t csFinished;
int32_t cpuFinished;
int32_t gpuStart;
int32_t gpuFinished;
int32_t presentFinished;
std::vector<time_point> gpuReady;
std::vector<time_point> gpuSubmit;
std::vector<time_point> gpuQueueSubmit;
};
/*
* stores which information is accessible for which frame
*/
struct LatencyMarkersTimeline {
std::atomic<uint64_t> cpuFinished = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
std::atomic<uint64_t> gpuStart = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
std::atomic<uint64_t> gpuFinished = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
std::atomic<uint64_t> frameFinished = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
};
class LatencyMarkersReader {
public:
LatencyMarkersReader( const LatencyMarkersStorage* storage, uint32_t numEntries );
bool getNext( const LatencyMarkers*& result );
private:
const LatencyMarkersStorage* m_storage;
uint64_t m_index;
};
class LatencyMarkersStorage {
friend class LatencyMarkersReader;
friend class FramePacer;
public:
LatencyMarkersStorage() { }
~LatencyMarkersStorage() { }
LatencyMarkersReader getReader( uint32_t numEntries ) const {
return LatencyMarkersReader(this, numEntries);
}
void registerFrameStart( uint64_t frameId ) {
if (frameId <= m_timeline.frameFinished.load()) {
Logger::warn( str::format("internal error during registerFrameStart: expected frameId=",
m_timeline.frameFinished.load()+1, ", got: ", frameId) );
}
auto now = high_resolution_clock::now();
LatencyMarkers* markers = getMarkers(frameId);
markers->start = now;
}
void registerFrameEnd( uint64_t frameId ) {
if (frameId <= m_timeline.frameFinished.load()) {
Logger::warn( str::format("internal error during registerFrameEnd: expected frameId=",
m_timeline.frameFinished.load()+1, ", got: ", frameId) );
}
auto now = high_resolution_clock::now();
LatencyMarkers* markers = getMarkers(frameId);
markers->presentFinished = std::chrono::duration_cast<std::chrono::microseconds>(
now - markers->start).count();
markers->end = now;
m_timeline.frameFinished.store(frameId);
}
const LatencyMarkersTimeline* getTimeline() const {
return &m_timeline;
}
const LatencyMarkers* getConstMarkers( uint64_t frameId ) const {
return &m_markers[frameId % m_numMarkers];
}
private:
LatencyMarkers* getMarkers( uint64_t frameId ) {
return &m_markers[frameId % m_numMarkers];
}
// simple modulo hash mapping is used for frameIds. They are expected to monotonically increase by one.
// select the size large enough, so we never come into a situation where the reader cannot keep up with the producer
static constexpr uint16_t m_numMarkers = 128;
std::array<LatencyMarkers, m_numMarkers> m_markers = { };
LatencyMarkersTimeline m_timeline;
};
inline LatencyMarkersReader::LatencyMarkersReader( const LatencyMarkersStorage* storage, uint32_t numEntries )
: m_storage(storage) {
m_index = 0;
if (m_storage->m_timeline.frameFinished.load() > numEntries + DXGI_MAX_SWAP_CHAIN_BUFFERS + 2)
m_index = m_storage->m_timeline.frameFinished.load() - numEntries;
}
inline bool LatencyMarkersReader::getNext( const LatencyMarkers*& result ) {
if (m_index == 0 || m_index > m_storage->m_timeline.frameFinished.load())
return false;
result = &m_storage->m_markers[m_index % m_storage->m_numMarkers];
m_index++;
return true;
}
}

View File

@ -59,6 +59,11 @@ namespace dxvk::hud {
Rc<T> addItem(const char* name, int32_t at, Args... args) {
return m_hudItems.add<T>(name, at, std::forward<Args>(args)...);
}
template<typename T>
int32_t getItemPos() {
return m_hudItems.getItemPos<T>();
}
/**
* \brief Creates the HUD

View File

@ -1,4 +1,5 @@
#include "dxvk_hud_item.h"
#include "../framepacer/dxvk_framepacer.h"
#include <hud_chunk_frag_background.h>
#include <hud_chunk_frag_visualize.h>
@ -213,6 +214,63 @@ namespace dxvk::hud {
}
HudRenderLatencyItem::HudRenderLatencyItem() { }
HudRenderLatencyItem::~HudRenderLatencyItem() { }
void HudRenderLatencyItem::update(dxvk::high_resolution_clock::time_point time) {
// we cannot measure latency when fps-limiting is performed in Presenter::runFrameThread()
// because it's interfering with getting the right timestamp from vkWaitForPresent()
// if we truely wanted to measure it, we would need one additional thread
if (FpsLimiter::m_isActive) {
m_latency = "N/A";
return;
}
const Rc<DxvkLatencyTracker> tracker = m_tracker;
const FramePacer* framePacer = dynamic_cast<FramePacer*>( tracker.ptr() );
if (!framePacer)
return;
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(time - m_lastUpdate);
if (elapsed.count() >= UpdateInterval) {
m_lastUpdate = time;
LatencyMarkersReader reader = framePacer->m_latencyMarkersStorage.getReader(100);
const LatencyMarkers* markers;
uint32_t count = 0;
uint64_t totalLatency = 0;
while (reader.getNext(markers)) {
totalLatency += markers->presentFinished;
++count;
}
if (!count)
return;
uint64_t latency = totalLatency / count;
m_latency = str::format(latency / 1000, ".", (latency/100) % 10, " ms");
}
}
HudPos HudRenderLatencyItem::render(
const DxvkContextObjects& ctx,
const HudPipelineKey& key,
const HudOptions& options,
HudRenderer& renderer,
HudPos position) {
position.y += 12;
renderer.drawText(16, position, 0xff4040ffu, "Render latency:");
renderer.drawText(16, { position.x + 195, position.y },
0xffffffffu, m_latency);
position.y += 8;
return position;
}
HudFrameTimeItem::HudFrameTimeItem(const Rc<DxvkDevice>& device, HudRenderer* renderer)
: m_device (device),
m_gfxSetLayout (createDescriptorSetLayout()),

View File

@ -131,6 +131,15 @@ namespace dxvk::hud {
return value;
}
template<typename T>
int32_t getItemPos() {
for (int i=0; i<(int)m_items.size(); ++i) {
if (dynamic_cast<T*>(m_items[i].ptr()))
return i;
}
return -1;
}
private:
bool m_enableFull = false;
@ -244,6 +253,42 @@ namespace dxvk::hud {
};
/**
* \brief HUD item to display render latency
*/
class HudRenderLatencyItem : public HudItem {
constexpr static int64_t UpdateInterval = 500'000;
public:
HudRenderLatencyItem();
~HudRenderLatencyItem();
void updateLatencyTracker( const Rc<DxvkLatencyTracker>& tracker ) {
m_tracker = tracker;
}
void update(dxvk::high_resolution_clock::time_point time);
HudPos render(
const DxvkContextObjects& ctx,
const HudPipelineKey& key,
const HudOptions& options,
HudRenderer& renderer,
HudPos position);
private:
Rc<DxvkLatencyTracker> m_tracker;
dxvk::high_resolution_clock::time_point m_lastUpdate
= dxvk::high_resolution_clock::now();
std::string m_latency;
};
/**
* \brief HUD item to display the frame rate
*/

View File

@ -120,6 +120,9 @@ dxvk_src = [
'hud/dxvk_hud_font.cpp',
'hud/dxvk_hud_item.cpp',
'hud/dxvk_hud_renderer.cpp',
'framepacer/dxvk_framepacer.cpp',
'framepacer/dxvk_framepacer_mode_low_latency.cpp',
]
if platform == 'windows'

View File

@ -5,12 +5,15 @@
#include "util_fps_limiter.h"
#include "util_sleep.h"
#include "util_string.h"
#include "../dxvk/framepacer/dxvk_framepacer.h"
#include "./log/log.h"
using namespace std::chrono_literals;
namespace dxvk {
std::atomic<bool> FpsLimiter::m_isActive = { false };
FpsLimiter::FpsLimiter() {
auto override = getEnvironmentOverride();
@ -48,7 +51,12 @@ namespace dxvk {
}
void FpsLimiter::delay() {
void FpsLimiter::delay(const Rc<DxvkLatencyTracker>& tracker) {
FramePacer* framePacer = dynamic_cast<FramePacer*>(tracker.ptr());
if (framePacer && framePacer->getMode()) {
return;
}
std::unique_lock<dxvk::mutex> lock(m_mutex);
auto interval = m_targetInterval;
auto latency = m_maxLatency;
@ -71,8 +79,11 @@ namespace dxvk {
// that can be written by setTargetFrameRate
lock.unlock();
if (t1 < m_nextFrame)
m_isActive.store(false);
if (t1 < m_nextFrame) {
m_isActive.store(true);
Sleep::sleepUntil(t1, m_nextFrame);
}
m_nextFrame = (t1 < m_nextFrame + interval)
? m_nextFrame + interval

View File

@ -7,6 +7,8 @@
#include "util_time.h"
namespace dxvk {
class DxvkLatencyTracker;
/**
* \brief Frame rate limiter
@ -38,7 +40,7 @@ namespace dxvk {
* and the time since the last call to \ref delay is
* shorter than the target interval.
*/
void delay();
void delay(const Rc<DxvkLatencyTracker>& tracker);
/**
* \brief Queries environment override
@ -46,6 +48,8 @@ namespace dxvk {
*/
static std::optional<double> getEnvironmentOverride();
static std::atomic<bool> m_isActive;
private:
using TimePoint = dxvk::high_resolution_clock::time_point;