mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-14 04:29:15 +01:00
Merge 8e2a509eb6711afe20f2a5426ca5b111add82373 into c04410ca00f33162d0875bc8500d3f8185bc73df
This commit is contained in:
commit
46aeecc4e4
52
dxvk.conf
52
dxvk.conf
@ -18,6 +18,51 @@
|
||||
# dxgi.enableHDR = True
|
||||
|
||||
|
||||
# Frame pacing mode managing CPU-GPU synchronization.
|
||||
# Defaults to "low-latency" in the draft-PR for demonstration purposes.
|
||||
#
|
||||
# "max-frame-latency" provides stable latency in the GPU-limit as long as
|
||||
# GPU render times are stable. Latency generally is higher but offers great
|
||||
# visual smoothness.
|
||||
#
|
||||
# "low-latency" provides lower latency in the GPU-limit and can be fine-tuned
|
||||
# via dxvk.lowLatencyOffset and dxvk.lowLatencyAllowCpuFramesOverlap.
|
||||
#
|
||||
# "min-latency" possibly provides the lowest latency (low-latency can be
|
||||
# quicker in some situations), and offers less fps in the GPU-limit
|
||||
# due to stalling the GPU between frames. Generally not recommended,
|
||||
# but helpful to get insights to fine-tune the low-latency mode and
|
||||
# possibly is useful for running games in the CPU-limit.
|
||||
#
|
||||
# "low/min-latency" also supports its own fps-limiting enabled via common
|
||||
# variables.
|
||||
#
|
||||
# Supported values: "max-frame-latency", "low-latency", "min-latency"
|
||||
|
||||
# dxvk.framePacing = ""
|
||||
|
||||
|
||||
# Allows fine-tuning the low-latency frame pacing mode.
|
||||
# Positive values make a frame begin later which might improve responsiveness,
|
||||
# although only very slightly, but may be relevant for edge cases.
|
||||
# Negative values make a frame begin earlier which might improve fps.
|
||||
# Values are given in microseconds. Defaults to 0.
|
||||
#
|
||||
# Supported values: -10000 to 10000
|
||||
|
||||
# dxvk.lowLatencyOffset = 0
|
||||
|
||||
|
||||
# Determines whether a frame is allowed to begin before finishing processing
|
||||
# the cpu-part of the previous one, when low-latency frame pacing is used.
|
||||
# Snappiness may be improved when disallowing overlap. On the other hand, this
|
||||
# might also decrease fps in certain cases. Defaults to True.
|
||||
#
|
||||
# Supported values: True, False
|
||||
|
||||
# dxvk.lowLatencyAllowCpuFramesOverlap = True
|
||||
|
||||
|
||||
# Expose support for dcomp swap chains with a dummy window.
|
||||
#
|
||||
# This is not a valid implementation of DirectComposition swapchains,
|
||||
@ -104,8 +149,13 @@
|
||||
# The implementation will either use VK_NV_low_latency2 if supported
|
||||
# by the driver, or a custom algorithm.
|
||||
# - False: Disable Reflex support as well as built-in latency reduction.
|
||||
# This build defaults to False to enable dxvk.framePacing. You need to
|
||||
# enable Reflex manually (Auto) until we support switching back and
|
||||
# forth between Reflex and the low-latency frame pacing - for example
|
||||
# via the ingame options - and more critically we want to enable
|
||||
# low-latency frame pacing if the game doesn't support Reflex.
|
||||
|
||||
# dxvk.latencySleep = Auto
|
||||
# dxvk.latencySleep = False
|
||||
|
||||
|
||||
# Tolerance for the latency sleep heuristic, in microseconds. Higher values
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "d3d11_swapchain.h"
|
||||
|
||||
#include "../dxvk/dxvk_latency_builtin.h"
|
||||
#include "../dxvk/framepacer/dxvk_framepacer.h"
|
||||
|
||||
#include "../util/util_win32_compat.h"
|
||||
|
||||
@ -294,6 +295,9 @@ namespace dxvk {
|
||||
if (m_latencyHud)
|
||||
m_latencyHud->accumulateStats(latencyStats);
|
||||
|
||||
if (m_renderLatencyHud)
|
||||
m_renderLatencyHud->updateLatencyTracker(m_latency);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
@ -354,6 +358,10 @@ namespace dxvk {
|
||||
|
||||
if (m_presenter != nullptr)
|
||||
m_presenter->setFrameRateLimit(m_targetFrameRate, GetActualFrameLatency());
|
||||
|
||||
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latency.ptr());
|
||||
if (framePacer != nullptr)
|
||||
framePacer->setTargetFrameRate(FrameRate);
|
||||
}
|
||||
|
||||
|
||||
@ -599,8 +607,14 @@ namespace dxvk {
|
||||
if (hud) {
|
||||
hud->addItem<hud::HudClientApiItem>("api", 1, GetApiName());
|
||||
|
||||
if (m_latency)
|
||||
if (m_latency) {
|
||||
m_latencyHud = hud->addItem<hud::HudLatencyItem>("latency", 4);
|
||||
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latency.ptr());
|
||||
if (framePacer) {
|
||||
int32_t fpsItemPos = hud->getItemPos<hud::HudFpsItem>();
|
||||
m_renderLatencyHud = hud->addItem<hud::HudRenderLatencyItem>("renderlatency", fpsItemPos+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_blitter = new DxvkSwapchainBlitter(m_device, std::move(hud));
|
||||
|
@ -125,7 +125,8 @@ namespace dxvk {
|
||||
dxvk::mutex m_frameStatisticsLock;
|
||||
DXGI_VK_FRAME_STATISTICS m_frameStatistics = { };
|
||||
|
||||
Rc<hud::HudLatencyItem> m_latencyHud;
|
||||
Rc<hud::HudLatencyItem> m_latencyHud;
|
||||
Rc<hud::HudRenderLatencyItem> m_renderLatencyHud;
|
||||
|
||||
Rc<DxvkImageView> GetBackBufferView();
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include "d3d9_hud.h"
|
||||
#include "d3d9_window.h"
|
||||
|
||||
#include "../dxvk/framepacer/dxvk_framepacer.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
static uint16_t MapGammaControlPoint(float x) {
|
||||
@ -923,6 +925,9 @@ namespace dxvk {
|
||||
if (m_latencyHud)
|
||||
m_latencyHud->accumulateStats(latencyStats);
|
||||
|
||||
if (m_renderLatencyHud)
|
||||
m_renderLatencyHud->updateLatencyTracker(m_latencyTracker);
|
||||
|
||||
// Rotate swap chain buffers so that the back
|
||||
// buffer at index 0 becomes the front buffer.
|
||||
for (uint32_t i = 1; i < m_backBuffers.size(); i++)
|
||||
@ -1060,8 +1065,14 @@ namespace dxvk {
|
||||
if (hud) {
|
||||
m_apiHud = hud->addItem<hud::HudClientApiItem>("api", 1, GetApiName());
|
||||
|
||||
if (m_latencyTracking)
|
||||
if (m_latencyTracking) {
|
||||
m_latencyHud = hud->addItem<hud::HudLatencyItem>("latency", 4);
|
||||
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latencyTracker.ptr());
|
||||
if (framePacer) {
|
||||
int32_t fpsItemPos = hud->getItemPos<hud::HudFpsItem>();
|
||||
m_renderLatencyHud = hud->addItem<hud::HudRenderLatencyItem>("renderlatency", fpsItemPos+1);
|
||||
}
|
||||
}
|
||||
|
||||
hud->addItem<hud::HudSamplerCount>("samplers", -1, m_parent);
|
||||
hud->addItem<hud::HudFixedFunctionShaders>("ffshaders", -1, m_parent);
|
||||
@ -1112,6 +1123,9 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
m_wctx->presenter->setFrameRateLimit(frameRate, GetActualFrameLatency());
|
||||
FramePacer* framePacer = dynamic_cast<FramePacer*>(m_latencyTracker.ptr());
|
||||
if (framePacer != nullptr)
|
||||
framePacer->setTargetFrameRate(frameRate);
|
||||
m_targetFrameRate = frameRate;
|
||||
}
|
||||
|
||||
|
@ -183,8 +183,9 @@ namespace dxvk {
|
||||
bool m_latencyTracking = false;
|
||||
Rc<DxvkLatencyTracker> m_latencyTracker = nullptr;
|
||||
|
||||
Rc<hud::HudClientApiItem> m_apiHud;
|
||||
Rc<hud::HudLatencyItem> m_latencyHud;
|
||||
Rc<hud::HudClientApiItem> m_apiHud;
|
||||
Rc<hud::HudLatencyItem> m_latencyHud;
|
||||
Rc<hud::HudRenderLatencyItem> m_renderLatencyHud;
|
||||
|
||||
std::optional<VkHdrMetadataEXT> m_hdrMetadata;
|
||||
bool m_unlockAdditionalFormats = false;
|
||||
|
@ -110,7 +110,7 @@ namespace dxvk {
|
||||
void DxvkContext::beginLatencyTracking(
|
||||
const Rc<DxvkLatencyTracker>& tracker,
|
||||
uint64_t frameId) {
|
||||
if (tracker && (!m_latencyTracker || m_latencyTracker == tracker)) {
|
||||
if (tracker && m_latencyTracker != tracker) {
|
||||
tracker->notifyCsRenderBegin(frameId);
|
||||
|
||||
m_latencyTracker = tracker;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include "dxvk_instance.h"
|
||||
#include "dxvk_latency_builtin.h"
|
||||
#include "dxvk_latency_reflex.h"
|
||||
#include "framepacer/dxvk_framepacer.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
@ -310,13 +311,13 @@ namespace dxvk {
|
||||
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
|
||||
const Rc<Presenter>& presenter) {
|
||||
if (m_options.latencySleep == Tristate::False)
|
||||
return nullptr;
|
||||
return new FramePacer(m_options);
|
||||
|
||||
if (m_options.latencySleep == Tristate::Auto) {
|
||||
if (m_features.nvLowLatency2)
|
||||
return new DxvkReflexLatencyTrackerNv(presenter);
|
||||
else
|
||||
return nullptr;
|
||||
return new FramePacer(m_options);
|
||||
}
|
||||
|
||||
return new DxvkBuiltInLatencyTracker(presenter,
|
||||
|
@ -128,6 +128,10 @@ namespace dxvk {
|
||||
virtual void notifyCpuPresentEnd(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
virtual void notifySubmit() { }
|
||||
virtual void notifyPresent(
|
||||
uint64_t frameId) { }
|
||||
|
||||
/**
|
||||
* \brief Called when a command list is submitted to the GPU
|
||||
*
|
||||
@ -174,6 +178,9 @@ namespace dxvk {
|
||||
virtual void notifyGpuExecutionEnd(
|
||||
uint64_t frameId) = 0;
|
||||
|
||||
virtual void notifyGpuPresentBegin(
|
||||
uint64_t frameId) { }
|
||||
|
||||
/**
|
||||
* \brief Called when presentation of a given frame finishes on the GPU
|
||||
*
|
||||
|
@ -12,12 +12,16 @@ namespace dxvk {
|
||||
useRawSsbo = config.getOption<Tristate>("dxvk.useRawSsbo", Tristate::Auto);
|
||||
hud = config.getOption<std::string>("dxvk.hud", "");
|
||||
tearFree = config.getOption<Tristate>("dxvk.tearFree", Tristate::Auto);
|
||||
latencySleep = config.getOption<Tristate>("dxvk.latencySleep", Tristate::Auto);
|
||||
latencySleep = config.getOption<Tristate>("dxvk.latencySleep", Tristate::False);
|
||||
latencyTolerance = config.getOption<int32_t> ("dxvk.latencyTolerance", 1000);
|
||||
disableNvLowLatency2 = config.getOption<Tristate>("dxvk.disableNvLowLatency2", Tristate::Auto);
|
||||
hideIntegratedGraphics = config.getOption<bool> ("dxvk.hideIntegratedGraphics", false);
|
||||
zeroMappedMemory = config.getOption<bool> ("dxvk.zeroMappedMemory", false);
|
||||
allowFse = config.getOption<bool> ("dxvk.allowFse", false);
|
||||
framePace = config.getOption<std::string>("dxvk.framePace", "");
|
||||
lowLatencyOffset = config.getOption<int32_t> ("dxvk.lowLatencyOffset", 0);
|
||||
lowLatencyAllowCpuFramesOverlap
|
||||
= config.getOption<bool> ("dxvk.lowLatencyAllowCpuFramesOverlap", true);
|
||||
deviceFilter = config.getOption<std::string>("dxvk.deviceFilter", "");
|
||||
tilerMode = config.getOption<Tristate>("dxvk.tilerMode", Tristate::Auto);
|
||||
}
|
||||
|
@ -38,7 +38,9 @@ namespace dxvk {
|
||||
Tristate tearFree = Tristate::Auto;
|
||||
|
||||
/// Enables latency sleep
|
||||
Tristate latencySleep = Tristate::Auto;
|
||||
/// Defaults to false in this build to activate the FramePacer,
|
||||
/// especially for the case when the game doesn't support Reflex
|
||||
Tristate latencySleep = Tristate::False;
|
||||
|
||||
/// Latency tolerance, in microseconds
|
||||
int32_t latencyTolerance = 0u;
|
||||
@ -61,6 +63,18 @@ namespace dxvk {
|
||||
/// Whether to enable tiler optimizations
|
||||
Tristate tilerMode = Tristate::Auto;
|
||||
|
||||
/// Frame pacing
|
||||
std::string framePace;
|
||||
|
||||
/// A value in microseconds to fine-tune the low-latency frame pacing.
|
||||
/// Positive values make a frame begin later which might improve responsiveness.
|
||||
/// Negative values make a frame begin earlier which might improve fps.
|
||||
int32_t lowLatencyOffset;
|
||||
|
||||
/// Determines whether a frame is allowed to begin before finishing processing
|
||||
/// the cpu-part of the previous one, when low-latency frame pacing is used.
|
||||
bool lowLatencyAllowCpuFramesOverlap;
|
||||
|
||||
// Device name
|
||||
std::string deviceFilter;
|
||||
};
|
||||
|
@ -259,18 +259,11 @@ namespace dxvk {
|
||||
return;
|
||||
|
||||
if (m_device->features().khrPresentWait.presentWait) {
|
||||
bool canSignal = false;
|
||||
|
||||
{ std::unique_lock lock(m_frameMutex);
|
||||
|
||||
m_lastSignaled = frameId;
|
||||
canSignal = m_lastCompleted >= frameId;
|
||||
}
|
||||
|
||||
if (canSignal)
|
||||
m_signal->signal(frameId);
|
||||
std::lock_guard lock(m_frameMutex);
|
||||
m_lastSignaled = frameId;
|
||||
m_frameCond.notify_one();
|
||||
} else {
|
||||
m_fpsLimiter.delay();
|
||||
m_fpsLimiter.delay(tracker);
|
||||
m_signal->signal(frameId);
|
||||
|
||||
if (tracker)
|
||||
@ -1210,26 +1203,25 @@ namespace dxvk {
|
||||
void Presenter::runFrameThread() {
|
||||
env::setThreadName("dxvk-frame");
|
||||
|
||||
while (true) {
|
||||
PresenterFrame frame = { };
|
||||
std::unique_lock lock(m_frameMutex);
|
||||
|
||||
while (true) {
|
||||
// Wait for all GPU work for this frame to complete in order to maintain
|
||||
// ordering guarantees of the frame signal w.r.t. objects being released
|
||||
{ std::unique_lock lock(m_frameMutex);
|
||||
m_frameCond.wait(lock, [this] {
|
||||
return !m_frameQueue.empty() && m_frameQueue.front().frameId <= m_lastSignaled;
|
||||
});
|
||||
|
||||
m_frameCond.wait(lock, [this] {
|
||||
return !m_frameQueue.empty();
|
||||
});
|
||||
// Use a frame ID of 0 as an exit condition
|
||||
PresenterFrame frame = m_frameQueue.front();
|
||||
|
||||
// Use a frame ID of 0 as an exit condition
|
||||
frame = m_frameQueue.front();
|
||||
|
||||
if (!frame.frameId) {
|
||||
m_frameQueue.pop();
|
||||
return;
|
||||
}
|
||||
if (!frame.frameId) {
|
||||
m_frameQueue.pop();
|
||||
return;
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
|
||||
// If the present operation has succeeded, actually wait for it to complete.
|
||||
// Don't bother with it on MAILBOX / IMMEDIATE modes since doing so would
|
||||
// restrict us to the display refresh rate on some platforms (XWayland).
|
||||
@ -1243,32 +1235,24 @@ namespace dxvk {
|
||||
|
||||
// Signal latency tracker right away to get more accurate
|
||||
// measurements if the frame rate limiter is enabled.
|
||||
if (frame.tracker) {
|
||||
if (frame.tracker)
|
||||
frame.tracker->notifyGpuPresentEnd(frame.frameId);
|
||||
frame.tracker = nullptr;
|
||||
}
|
||||
|
||||
// Apply FPS limiter here to align it as closely with scanout as we can,
|
||||
// Apply FPS limtier here to align it as closely with scanout as we can,
|
||||
// and delay signaling the frame latency event to emulate behaviour of a
|
||||
// low refresh rate display as closely as we can.
|
||||
m_fpsLimiter.delay();
|
||||
|
||||
// Wake up any thread that may be waiting for the queue to become empty
|
||||
bool canSignal = false;
|
||||
|
||||
{ std::unique_lock lock(m_frameMutex);
|
||||
|
||||
m_frameQueue.pop();
|
||||
m_frameDrain.notify_one();
|
||||
|
||||
m_lastCompleted = frame.frameId;
|
||||
canSignal = m_lastSignaled >= frame.frameId;
|
||||
}
|
||||
m_fpsLimiter.delay(frame.tracker);
|
||||
frame.tracker = nullptr;
|
||||
|
||||
// Always signal even on error, since failures here
|
||||
// are transparent to the front-end.
|
||||
if (canSignal)
|
||||
m_signal->signal(frame.frameId);
|
||||
m_signal->signal(frame.frameId);
|
||||
|
||||
// Wake up any thread that may be waiting for the queue to become empty
|
||||
lock.lock();
|
||||
|
||||
m_frameQueue.pop();
|
||||
m_frameDrain.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -315,7 +315,6 @@ namespace dxvk {
|
||||
std::queue<PresenterFrame> m_frameQueue;
|
||||
|
||||
uint64_t m_lastSignaled = 0u;
|
||||
uint64_t m_lastCompleted = 0u;
|
||||
|
||||
alignas(CACHE_LINE_SIZE)
|
||||
FpsLimiter m_fpsLimiter;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "dxvk_device.h"
|
||||
#include "dxvk_queue.h"
|
||||
#include "framepacer/dxvk_framepacer.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
@ -46,6 +47,8 @@ namespace dxvk {
|
||||
DxvkSubmitInfo submitInfo,
|
||||
DxvkLatencyInfo latencyInfo,
|
||||
DxvkSubmitStatus* status) {
|
||||
if (latencyInfo.tracker)
|
||||
latencyInfo.tracker->notifySubmit();
|
||||
std::unique_lock<dxvk::mutex> lock(m_mutex);
|
||||
|
||||
m_finishCond.wait(lock, [this] {
|
||||
@ -66,6 +69,8 @@ namespace dxvk {
|
||||
DxvkPresentInfo presentInfo,
|
||||
DxvkLatencyInfo latencyInfo,
|
||||
DxvkSubmitStatus* status) {
|
||||
if (latencyInfo.tracker)
|
||||
latencyInfo.tracker->notifyPresent(presentInfo.frameId);
|
||||
std::unique_lock<dxvk::mutex> lock(m_mutex);
|
||||
|
||||
DxvkSubmitEntry entry = { };
|
||||
@ -274,7 +279,9 @@ namespace dxvk {
|
||||
} else if (entry.present.presenter != nullptr) {
|
||||
// Signal the frame and then immediately destroy the reference.
|
||||
// This is necessary since the front-end may want to explicitly
|
||||
// destroy the presenter object.
|
||||
// destroy the presenter object.
|
||||
if (entry.latency.tracker)
|
||||
entry.latency.tracker->notifyGpuPresentBegin(entry.present.frameId);
|
||||
entry.present.presenter->signalFrame(entry.present.frameId, entry.latency.tracker);
|
||||
entry.present.presenter = nullptr;
|
||||
}
|
||||
|
64
src/dxvk/framepacer/dxvk_framepacer.cpp
Normal file
64
src/dxvk/framepacer/dxvk_framepacer.cpp
Normal file
@ -0,0 +1,64 @@
|
||||
#include "dxvk_framepacer.h"
|
||||
#include "dxvk_framepacer_mode_low_latency.h"
|
||||
#include "dxvk_framepacer_mode_min_latency.h"
|
||||
#include "dxvk_options.h"
|
||||
#include "../../util/util_env.h"
|
||||
#include "../../util/log/log.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
|
||||
FramePacer::FramePacer( const DxvkOptions& options ) {
|
||||
// we'll default to LOW_LATENCY in the draft-PR for now, for demonstration purposes,
|
||||
// highlighting the generally much better input lag and medium-term time consistency.
|
||||
// although MAX_FRAME_LATENCY has advantages in many games and is likely the better default,
|
||||
// for its higher fps throughput and less susceptibility to short-term time inconsistencies.
|
||||
// which mode being smoother depends on the game.
|
||||
FramePacerMode::Mode mode = FramePacerMode::LOW_LATENCY;
|
||||
|
||||
std::string configStr = env::getEnvVar("DXVK_FRAME_PACE");
|
||||
|
||||
if (configStr.find("max-frame-latency") != std::string::npos) {
|
||||
mode = FramePacerMode::MAX_FRAME_LATENCY;
|
||||
} else if (configStr.find("low-latency") != std::string::npos) {
|
||||
mode = FramePacerMode::LOW_LATENCY;
|
||||
} else if (configStr.find("min-latency") != std::string::npos) {
|
||||
mode = FramePacerMode::MIN_LATENCY;
|
||||
} else if (options.framePace.find("max-frame-latency") != std::string::npos) {
|
||||
mode = FramePacerMode::MAX_FRAME_LATENCY;
|
||||
} else if (options.framePace.find("low-latency") != std::string::npos) {
|
||||
mode = FramePacerMode::LOW_LATENCY;
|
||||
} else if (options.framePace.find("min-latency") != std::string::npos) {
|
||||
mode = FramePacerMode::MIN_LATENCY;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case FramePacerMode::MAX_FRAME_LATENCY:
|
||||
Logger::info( "Frame pace: max-frame-latency" );
|
||||
m_mode = std::make_unique<FramePacerMode>(FramePacerMode::MAX_FRAME_LATENCY, &m_latencyMarkersStorage);
|
||||
break;
|
||||
|
||||
case FramePacerMode::LOW_LATENCY:
|
||||
Logger::info( "Frame pace: low-latency" );
|
||||
m_mode = std::make_unique<LowLatencyMode>(mode, &m_latencyMarkersStorage, options);
|
||||
break;
|
||||
|
||||
case FramePacerMode::MIN_LATENCY:
|
||||
Logger::info( "Frame pace: min-latency" );
|
||||
m_mode = std::make_unique<MinLatencyMode>(mode, &m_latencyMarkersStorage);
|
||||
break;
|
||||
}
|
||||
|
||||
for (auto& gpuStart: m_gpuStarts) {
|
||||
gpuStart.store(0);
|
||||
}
|
||||
|
||||
// be consistent that every frame has a gpuReady event from the previous frame
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(DXGI_MAX_SWAP_CHAIN_BUFFERS+1);
|
||||
m->gpuReady.push_back(high_resolution_clock::now());
|
||||
}
|
||||
|
||||
|
||||
FramePacer::~FramePacer() {}
|
||||
|
||||
}
|
191
src/dxvk/framepacer/dxvk_framepacer.h
Normal file
191
src/dxvk/framepacer/dxvk_framepacer.h
Normal file
@ -0,0 +1,191 @@
|
||||
#pragma once
|
||||
|
||||
#include "dxvk_framepacer_mode.h"
|
||||
#include "dxvk_latency_markers.h"
|
||||
#include "../dxvk_latency.h"
|
||||
#include "../../util/util_time.h"
|
||||
#include <dxgi.h>
|
||||
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
struct DxvkOptions;
|
||||
|
||||
/* \brief Frame pacer interface managing the CPU - GPU synchronization.
|
||||
*
|
||||
* GPUs render frames asynchronously to the game's and dxvk's CPU-side work
|
||||
* in order to improve fps-throughput. Aligning the cpu work to chosen time-
|
||||
* points allows to tune certain characteristics of the video presentation,
|
||||
* like smoothness and latency.
|
||||
*/
|
||||
|
||||
class FramePacer : public DxvkLatencyTracker {
|
||||
using microseconds = std::chrono::microseconds;
|
||||
public:
|
||||
|
||||
FramePacer( const DxvkOptions& options );
|
||||
~FramePacer();
|
||||
|
||||
void sleepAndBeginFrame(
|
||||
uint64_t frameId,
|
||||
double maxFrameRate) override {
|
||||
// wait for finished rendering of a previous frame, typically the one before last
|
||||
m_mode->waitRenderFinished(frameId);
|
||||
// potentially wait some more if the cpu gets too much ahead
|
||||
m_mode->startFrame(frameId);
|
||||
m_latencyMarkersStorage.registerFrameStart(frameId);
|
||||
m_gpuStarts[ frameId % m_gpuStarts.size() ].store(0);
|
||||
}
|
||||
|
||||
void notifyGpuPresentEnd( uint64_t frameId ) override {
|
||||
// the frame has been displayed to the screen
|
||||
m_latencyMarkersStorage.registerFrameEnd(frameId);
|
||||
m_mode->endFrame(frameId);
|
||||
}
|
||||
|
||||
void notifyCsRenderBegin( uint64_t frameId ) override {
|
||||
auto now = high_resolution_clock::now();
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||
m->csStart = std::chrono::duration_cast<microseconds>(now - m->start).count();
|
||||
}
|
||||
|
||||
void notifyCsRenderEnd( uint64_t frameId ) override {
|
||||
auto now = high_resolution_clock::now();
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||
m->csFinished = std::chrono::duration_cast<microseconds>(now - m->start).count();
|
||||
m_mode->signalCsFinished( frameId );
|
||||
}
|
||||
|
||||
void notifySubmit() override {
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastSubmitFrameId+1);
|
||||
m->gpuSubmit.push_back(high_resolution_clock::now());
|
||||
}
|
||||
|
||||
void notifyPresent( uint64_t frameId ) override {
|
||||
// dx to vk translation is finished
|
||||
if (frameId != 0) {
|
||||
auto now = high_resolution_clock::now();
|
||||
m_lastSubmitFrameId = frameId;
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
|
||||
m->gpuSubmit.push_back(now);
|
||||
m->cpuFinished = std::chrono::duration_cast<microseconds>(now - m->start).count();
|
||||
next->gpuSubmit.clear();
|
||||
|
||||
m_latencyMarkersStorage.m_timeline.cpuFinished.store(frameId);
|
||||
}
|
||||
}
|
||||
|
||||
void notifyQueueSubmit( uint64_t frameId ) override {
|
||||
assert( frameId == m_lastQueueSubmitFrameId + 1 );
|
||||
auto now = high_resolution_clock::now();
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||
m->gpuQueueSubmit.push_back(now);
|
||||
queueSubmitCheckGpuStart(frameId, m, now);
|
||||
}
|
||||
|
||||
void notifyQueuePresentBegin( uint64_t frameId ) override {
|
||||
if (frameId != 0) {
|
||||
auto now = high_resolution_clock::now();
|
||||
m_lastQueueSubmitFrameId = frameId;
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
|
||||
m->gpuQueueSubmit.push_back(now);
|
||||
next->gpuQueueSubmit.clear();
|
||||
queueSubmitCheckGpuStart(frameId, m, now);
|
||||
}
|
||||
}
|
||||
|
||||
void notifyGpuExecutionBegin( uint64_t frameId ) override {
|
||||
assert( frameId == m_lastFinishedFrameId+1 );
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
||||
gpuExecutionCheckGpuStart(frameId, m, high_resolution_clock::now());
|
||||
}
|
||||
|
||||
void notifyGpuExecutionEnd( uint64_t frameId ) override {
|
||||
auto now = high_resolution_clock::now();
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(m_lastFinishedFrameId+1);
|
||||
m->gpuReady.push_back(now);
|
||||
}
|
||||
|
||||
virtual void notifyGpuPresentBegin( uint64_t frameId ) override {
|
||||
// we get frameId == 0 for repeated presents (SyncInterval)
|
||||
if (frameId != 0) {
|
||||
m_lastFinishedFrameId = frameId;
|
||||
auto now = high_resolution_clock::now();
|
||||
|
||||
LatencyMarkers* m = m_latencyMarkersStorage.getMarkers(frameId);
|
||||
LatencyMarkers* next = m_latencyMarkersStorage.getMarkers(frameId+1);
|
||||
m->gpuReady.push_back(now);
|
||||
m->gpuFinished = std::chrono::duration_cast<microseconds>(now - m->start).count();
|
||||
next->gpuReady.clear();
|
||||
next->gpuReady.push_back(now);
|
||||
|
||||
gpuExecutionCheckGpuStart(frameId, m, now);
|
||||
|
||||
m_latencyMarkersStorage.m_timeline.gpuFinished.store(frameId);
|
||||
m_mode->finishRender(frameId);
|
||||
m_mode->signalRenderFinished(frameId);
|
||||
}
|
||||
}
|
||||
|
||||
FramePacerMode::Mode getMode() const {
|
||||
return m_mode->m_mode;
|
||||
}
|
||||
|
||||
void setTargetFrameRate( double frameRate ) {
|
||||
m_mode->setTargetFrameRate(frameRate);
|
||||
}
|
||||
|
||||
bool needsAutoMarkers() override {
|
||||
return true;
|
||||
}
|
||||
|
||||
LatencyMarkersStorage m_latencyMarkersStorage;
|
||||
|
||||
|
||||
// not implemented methods
|
||||
|
||||
|
||||
void notifyCpuPresentBegin( uint64_t frameId) override { }
|
||||
void notifyCpuPresentEnd( uint64_t frameId ) override { }
|
||||
void notifyQueuePresentEnd( uint64_t frameId, VkResult status) override { }
|
||||
void discardTimings() override { }
|
||||
DxvkLatencyStats getStatistics( uint64_t frameId ) override
|
||||
{ return DxvkLatencyStats(); }
|
||||
|
||||
private:
|
||||
|
||||
void signalGpuStart( uint64_t frameId, LatencyMarkers* m, const high_resolution_clock::time_point& t ) {
|
||||
m->gpuStart = std::chrono::duration_cast<microseconds>(t - m->start).count();
|
||||
m_latencyMarkersStorage.m_timeline.gpuStart.store(frameId);
|
||||
m_mode->signalGpuStart(frameId);
|
||||
}
|
||||
|
||||
void queueSubmitCheckGpuStart( uint64_t frameId, LatencyMarkers* m, const high_resolution_clock::time_point& t ) {
|
||||
auto& gpuStart = m_gpuStarts[ frameId % m_gpuStarts.size() ];
|
||||
uint16_t val = gpuStart.fetch_or(queueSubmitBit);
|
||||
if (val == gpuReadyBit)
|
||||
signalGpuStart( frameId, m, t );
|
||||
}
|
||||
|
||||
void gpuExecutionCheckGpuStart( uint64_t frameId, LatencyMarkers* m, const high_resolution_clock::time_point& t ) {
|
||||
auto& gpuStart = m_gpuStarts[ frameId % m_gpuStarts.size() ];
|
||||
uint16_t val = gpuStart.fetch_or(gpuReadyBit);
|
||||
if (val == queueSubmitBit)
|
||||
signalGpuStart( frameId, m, t );
|
||||
}
|
||||
|
||||
std::unique_ptr<FramePacerMode> m_mode;
|
||||
|
||||
uint64_t m_lastSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
uint64_t m_lastQueueSubmitFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
uint64_t m_lastFinishedFrameId = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
|
||||
std::array< std::atomic< uint16_t >, 16 > m_gpuStarts = { };
|
||||
static constexpr uint16_t queueSubmitBit = 1;
|
||||
static constexpr uint16_t gpuReadyBit = 2;
|
||||
|
||||
};
|
||||
|
||||
}
|
117
src/dxvk/framepacer/dxvk_framepacer_mode.h
Normal file
117
src/dxvk/framepacer/dxvk_framepacer_mode.h
Normal file
@ -0,0 +1,117 @@
|
||||
#pragma once
|
||||
|
||||
#include "dxvk_latency_markers.h"
|
||||
#include "../../util/sync/sync_signal.h"
|
||||
#include "../../util/util_env.h"
|
||||
#include <dxgi.h>
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/*
|
||||
* /brief Abstract frame pacer mode in order to support different strategies of synchronization.
|
||||
*/
|
||||
|
||||
class FramePacerMode {
|
||||
|
||||
public:
|
||||
|
||||
enum Mode {
|
||||
MAX_FRAME_LATENCY = 0,
|
||||
LOW_LATENCY,
|
||||
MIN_LATENCY
|
||||
};
|
||||
|
||||
FramePacerMode( Mode mode, LatencyMarkersStorage* markerStorage, uint32_t maxFrameLatency=1 )
|
||||
: m_mode( mode ),
|
||||
m_waitLatency( maxFrameLatency+1 ),
|
||||
m_latencyMarkersStorage( markerStorage ) {
|
||||
setFpsLimitFrametimeFromEnv();
|
||||
}
|
||||
|
||||
virtual ~FramePacerMode() { }
|
||||
|
||||
virtual void startFrame( uint64_t frameId ) { }
|
||||
virtual void endFrame( uint64_t frameId ) { }
|
||||
|
||||
virtual void finishRender( uint64_t frameId ) { }
|
||||
|
||||
void waitRenderFinished( uint64_t frameId ) {
|
||||
if (m_mode) m_fenceGpuFinished.wait(frameId-m_waitLatency); }
|
||||
|
||||
void signalRenderFinished( uint64_t frameId ) {
|
||||
if (m_mode) m_fenceGpuFinished.signal(frameId); }
|
||||
|
||||
void signalGpuStart( uint64_t frameId ) {
|
||||
if (m_mode) m_fenceGpuStart.signal(frameId); }
|
||||
|
||||
void signalCsFinished( uint64_t frameId ) {
|
||||
if (m_mode) m_fenceCsFinished.signal(frameId); }
|
||||
|
||||
void setTargetFrameRate( double frameRate ) {
|
||||
if (!m_fpsLimitEnvOverride && frameRate > 1.0)
|
||||
m_fpsLimitFrametime.store( 1'000'000/frameRate );
|
||||
}
|
||||
|
||||
const Mode m_mode;
|
||||
|
||||
static bool getDoubleFromEnv( const char* name, double* result );
|
||||
static bool getIntFromEnv( const char* name, int* result );
|
||||
|
||||
protected:
|
||||
|
||||
void setFpsLimitFrametimeFromEnv();
|
||||
|
||||
const uint32_t m_waitLatency;
|
||||
LatencyMarkersStorage* m_latencyMarkersStorage;
|
||||
std::atomic<int32_t> m_fpsLimitFrametime = { 0 };
|
||||
bool m_fpsLimitEnvOverride = { false };
|
||||
|
||||
sync::Fence m_fenceGpuStart = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
|
||||
sync::Fence m_fenceGpuFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS) };
|
||||
sync::Fence m_fenceCsFinished = { sync::Fence(DXGI_MAX_SWAP_CHAIN_BUFFERS+50) };
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
inline bool FramePacerMode::getDoubleFromEnv( const char* name, double* result ) {
|
||||
std::string env = env::getEnvVar(name);
|
||||
if (env.empty())
|
||||
return false;
|
||||
|
||||
try {
|
||||
*result = std::stod(env);
|
||||
return true;
|
||||
} catch (const std::invalid_argument&) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline bool FramePacerMode::getIntFromEnv( const char* name, int* result ) {
|
||||
std::string env = env::getEnvVar(name);
|
||||
if (env.empty())
|
||||
return false;
|
||||
|
||||
try {
|
||||
*result = std::stoi(env);
|
||||
return true;
|
||||
} catch (const std::invalid_argument&) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void FramePacerMode::setFpsLimitFrametimeFromEnv() {
|
||||
double fpsLimit;
|
||||
if (!getDoubleFromEnv("DXVK_FRAME_RATE", &fpsLimit))
|
||||
return;
|
||||
|
||||
m_fpsLimitEnvOverride = true;
|
||||
if (fpsLimit < 1.0)
|
||||
return;
|
||||
|
||||
m_fpsLimitFrametime = 1'000'000/fpsLimit;
|
||||
}
|
||||
|
||||
}
|
43
src/dxvk/framepacer/dxvk_framepacer_mode_low_latency.cpp
Normal file
43
src/dxvk/framepacer/dxvk_framepacer_mode_low_latency.cpp
Normal file
@ -0,0 +1,43 @@
|
||||
#include "dxvk_framepacer_mode_low_latency.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
|
||||
bool getLowLatencyOffsetFromEnv( int32_t& offset ) {
|
||||
if (!FramePacerMode::getIntFromEnv("DXVK_LOW_LATENCY_OFFSET", &offset))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool getLowLatencyAllowCpuFramesOverlapFromEnv( bool& allowOverlap ) {
|
||||
int32_t o;
|
||||
if (!FramePacerMode::getIntFromEnv("DXVK_LOW_LATENCY_ALLOW_CPU_FRAMES_OVERLAP", &o))
|
||||
return false;
|
||||
allowOverlap = (bool) o;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int32_t LowLatencyMode::getLowLatencyOffset( const DxvkOptions& options ) {
|
||||
int32_t offset = options.lowLatencyOffset;
|
||||
int32_t o;
|
||||
if (getLowLatencyOffsetFromEnv(o))
|
||||
offset = o;
|
||||
|
||||
offset = std::max( -10000, offset );
|
||||
offset = std::min( 10000, offset );
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
||||
bool LowLatencyMode::getLowLatencyAllowCpuFramesOverlap( const DxvkOptions& options ) {
|
||||
bool allowOverlap = options.lowLatencyAllowCpuFramesOverlap;
|
||||
bool o;
|
||||
if (getLowLatencyAllowCpuFramesOverlapFromEnv(o))
|
||||
allowOverlap = o;
|
||||
return allowOverlap;
|
||||
}
|
||||
|
||||
|
||||
}
|
255
src/dxvk/framepacer/dxvk_framepacer_mode_low_latency.h
Normal file
255
src/dxvk/framepacer/dxvk_framepacer_mode_low_latency.h
Normal file
@ -0,0 +1,255 @@
|
||||
#pragma once
|
||||
|
||||
#include "dxvk_framepacer_mode.h"
|
||||
#include "../dxvk_options.h"
|
||||
#include "../../util/log/log.h"
|
||||
#include "../../util/util_string.h"
|
||||
#include <assert.h>
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/*
|
||||
* This low-latency mode aims to reduce latency with minimal impact in fps.
|
||||
* Effective when operating in the GPU-limit. Efficient to be used in the CPU-limit as well.
|
||||
*
|
||||
* Greatly reduces input lag variations when switching between CPU- and GPU-limit, and
|
||||
* compared to the max-frame-latency approach, it has a much more stable input lag when
|
||||
* GPU running times change dramatically, which can happen for example when rotating within a scene.
|
||||
*
|
||||
* The current implementation rather generates fluctuations alternating frame-by-frame
|
||||
* depending on the game's and dxvk's CPU-time variations. This might be visible as a loss
|
||||
* in smoothness, which is an area this implementation can be further improved. Unsuitable
|
||||
* smoothing however might degrade input-lag feel, so it's not implemented for now, but
|
||||
* more advanced smoothing techniques will be investigated in the future.
|
||||
* In some situations however, this low-latency pacing actually improves smoothing though,
|
||||
* it will depend on the game.
|
||||
*
|
||||
* An interesting observation while playtesting was that not only the input lag was affected,
|
||||
* but the video generated did progress more cleanly in time as well with regards to
|
||||
* medium-term time consistency, in other words, the video playback speed remained more steady.
|
||||
*
|
||||
* Optimized for VRR and VK_PRESENT_MODE_IMMEDIATE_KHR. It also comes with its own fps-limiter
|
||||
* which is typically used to prevent the game's fps exceeding the monitor's refresh rate,
|
||||
* and which is tightly integrated into the pacing logic.
|
||||
*
|
||||
* Can be fine-tuned via the dxvk.lowLatencyOffset and dxvk.lowLatencyAllowCpuFramesOverlap
|
||||
* variables (or their respective environment variables)
|
||||
* Compared to maxFrameLatency = 3, render-latency reductions of up to 67% are achieved.
|
||||
*/
|
||||
|
||||
class LowLatencyMode : public FramePacerMode {
|
||||
using microseconds = std::chrono::microseconds;
|
||||
using time_point = high_resolution_clock::time_point;
|
||||
public:
|
||||
|
||||
LowLatencyMode(Mode mode, LatencyMarkersStorage* storage, const DxvkOptions& options)
|
||||
: FramePacerMode(mode, storage),
|
||||
m_lowLatencyOffset(getLowLatencyOffset(options)),
|
||||
m_allowCpuFramesOverlap(getLowLatencyAllowCpuFramesOverlap(options)) {
|
||||
Logger::info( str::format("Using lowLatencyOffset: ", m_lowLatencyOffset) );
|
||||
Logger::info( str::format("Using lowLatencyAllowCpuFramesOverlap: ", m_allowCpuFramesOverlap) );
|
||||
}
|
||||
|
||||
~LowLatencyMode() {}
|
||||
|
||||
|
||||
void startFrame( uint64_t frameId ) override {
|
||||
using std::chrono::duration_cast;
|
||||
|
||||
if (!m_allowCpuFramesOverlap)
|
||||
m_fenceCsFinished.wait( frameId-1 );
|
||||
|
||||
m_fenceGpuStart.wait( frameId-1 );
|
||||
|
||||
time_point now = high_resolution_clock::now();
|
||||
uint64_t finishedId = m_latencyMarkersStorage->getTimeline()->gpuFinished.load();
|
||||
if (finishedId <= DXGI_MAX_SWAP_CHAIN_BUFFERS+1ull)
|
||||
return;
|
||||
|
||||
if (finishedId == frameId-1) {
|
||||
// we are the only in-flight frame, nothing to do other then to apply fps-limiter if needed
|
||||
m_lastStart = sleepFor( now, 0 );
|
||||
return;
|
||||
}
|
||||
|
||||
if (finishedId != frameId-2) {
|
||||
Logger::err( str::format("internal error during low-latency frame pacing: expected finished frameId=",
|
||||
frameId-2, ", got: ", finishedId) );
|
||||
}
|
||||
|
||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId-1);
|
||||
|
||||
// estimate the target gpu sync point for this frame
|
||||
// and calculate backwards when we want to start this frame
|
||||
|
||||
const SyncProps props = getSyncPrediction();
|
||||
int32_t gpuReadyPrediction = duration_cast<microseconds>(
|
||||
m->start + microseconds(m->gpuStart+getGpuStartToFinishPrediction()) - now).count();
|
||||
|
||||
int32_t targetGpuSync = gpuReadyPrediction + props.gpuSync;
|
||||
int32_t delay = targetGpuSync - props.cpuUntilGpuSync + m_lowLatencyOffset;
|
||||
|
||||
m_lastStart = sleepFor( now, delay );
|
||||
|
||||
}
|
||||
|
||||
|
||||
void finishRender( uint64_t frameId ) override {
|
||||
|
||||
using std::chrono::duration_cast;
|
||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId);
|
||||
|
||||
int32_t numLoop = (int32_t)(m->gpuReady.size())-1;
|
||||
if (numLoop <= 1) {
|
||||
m_props[frameId % m_props.size()] = SyncProps();
|
||||
m_props[frameId % m_props.size()].isOutlier = true;
|
||||
m_propsFinished.store( frameId );
|
||||
return;
|
||||
}
|
||||
|
||||
// estimates the optimal overlap for cpu/gpu work by optimizing gpu scheduling first
|
||||
// such that the gpu doesn't go into idle for this frame, and then aligning cpu submits
|
||||
// where gpuSubmit[i] <= gpuRun[i] for all i
|
||||
|
||||
std::vector<int32_t>& gpuRun = m_tempGpuRun;
|
||||
std::vector<int32_t>& gpuRunDurations = m_tempGpuRunDurations;
|
||||
gpuRun.clear();
|
||||
gpuRunDurations.clear();
|
||||
int32_t optimizedGpuTime = 0;
|
||||
gpuRun.push_back(optimizedGpuTime);
|
||||
|
||||
for (int i=0; i<numLoop; ++i) {
|
||||
time_point _gpuRun = std::max( m->gpuReady[i], m->gpuQueueSubmit[i] );
|
||||
int32_t duration = duration_cast<microseconds>( m->gpuReady[i+1] - _gpuRun ).count();
|
||||
optimizedGpuTime += duration;
|
||||
gpuRun.push_back(optimizedGpuTime);
|
||||
gpuRunDurations.push_back(duration);
|
||||
}
|
||||
|
||||
int32_t alignment = duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->gpuSubmit[0] ).count()
|
||||
- gpuRun[numLoop-1];
|
||||
|
||||
int32_t offset = 0;
|
||||
for (int i=numLoop-2; i>=0; --i) {
|
||||
int32_t curSubmit = duration_cast<microseconds>( m->gpuSubmit[i] - m->gpuSubmit[0] ).count();
|
||||
int32_t diff = curSubmit - gpuRun[i] - alignment;
|
||||
diff = std::max( 0, diff );
|
||||
offset += diff;
|
||||
alignment += diff;
|
||||
}
|
||||
|
||||
|
||||
SyncProps& props = m_props[frameId % m_props.size()];
|
||||
props.gpuSync = gpuRun[numLoop-1];
|
||||
props.cpuUntilGpuSync = offset + duration_cast<microseconds>( m->gpuSubmit[numLoop-1] - m->start ).count();
|
||||
props.optimizedGpuTime = optimizedGpuTime;
|
||||
props.isOutlier = isOutlier(frameId);
|
||||
|
||||
m_propsFinished.store( frameId );
|
||||
|
||||
}
|
||||
|
||||
|
||||
Sleep::TimePoint sleepFor( const Sleep::TimePoint t, int32_t delay ) {
|
||||
|
||||
// account for the fps limit and ensure we won't sleep too long, just in case
|
||||
int32_t frametime = std::chrono::duration_cast<microseconds>( t - m_lastStart ).count();
|
||||
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
|
||||
delay = std::max( delay, frametimeDiff );
|
||||
delay = std::max( 0, std::min( delay, 20000 ) );
|
||||
|
||||
Sleep::TimePoint nextStart = t + microseconds(delay);
|
||||
Sleep::sleepUntil( t, nextStart );
|
||||
return nextStart;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
struct SyncProps {
|
||||
int32_t optimizedGpuTime; // gpu executing packed submits in one go
|
||||
int32_t gpuSync; // us after gpuStart
|
||||
int32_t cpuUntilGpuSync;
|
||||
bool isOutlier;
|
||||
};
|
||||
|
||||
|
||||
SyncProps getSyncPrediction() {
|
||||
// in the future we might use more samples to get a prediction
|
||||
// however, simple averaging gives a slightly artificial mouse input
|
||||
// more advanced methods will be investigated
|
||||
SyncProps res = {};
|
||||
uint64_t id = m_propsFinished;
|
||||
if (id < DXGI_MAX_SWAP_CHAIN_BUFFERS+7)
|
||||
return res;
|
||||
|
||||
for (size_t i=0; i<7; ++i) {
|
||||
const SyncProps& props = m_props[ (id-i) % m_props.size() ];
|
||||
if (!props.isOutlier) {
|
||||
id = id-i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return m_props[ id % m_props.size() ];
|
||||
};
|
||||
|
||||
|
||||
int32_t getGpuStartToFinishPrediction() {
|
||||
uint64_t id = m_propsFinished;
|
||||
if (id < DXGI_MAX_SWAP_CHAIN_BUFFERS+7)
|
||||
return 0;
|
||||
|
||||
for (size_t i=0; i<7; ++i) {
|
||||
const SyncProps& props = m_props[ (id-i) % m_props.size() ];
|
||||
if (!props.isOutlier) {
|
||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(id-i);
|
||||
if (m->gpuReady.empty() || m->gpuSubmit.empty())
|
||||
return m->gpuFinished - m->gpuStart;
|
||||
|
||||
time_point t = std::max( m->gpuReady[0], m->gpuSubmit[0] );
|
||||
return std::chrono::duration_cast<microseconds>( t - m->start ).count()
|
||||
+ props.optimizedGpuTime
|
||||
- m->gpuStart;
|
||||
}
|
||||
}
|
||||
|
||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(id);
|
||||
return m->gpuFinished - m->gpuStart;
|
||||
};
|
||||
|
||||
|
||||
bool isOutlier( uint64_t frameId ) {
|
||||
constexpr size_t numLoop = 7;
|
||||
int32_t totalCpuTime = 0;
|
||||
for (size_t i=0; i<numLoop; ++i) {
|
||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId-i);
|
||||
totalCpuTime += m->cpuFinished;
|
||||
}
|
||||
|
||||
int32_t avgCpuTime = totalCpuTime / numLoop;
|
||||
const LatencyMarkers* m = m_latencyMarkersStorage->getConstMarkers(frameId);
|
||||
if (m->cpuFinished > 1.7*avgCpuTime || m->gpuSubmit.empty() || m->gpuReady.size() != (m->gpuSubmit.size()+1) )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
int32_t getLowLatencyOffset( const DxvkOptions& options );
|
||||
bool getLowLatencyAllowCpuFramesOverlap( const DxvkOptions& options );
|
||||
|
||||
const int32_t m_lowLatencyOffset;
|
||||
const bool m_allowCpuFramesOverlap;
|
||||
|
||||
Sleep::TimePoint m_lastStart = { high_resolution_clock::now() };
|
||||
std::array<SyncProps, 16> m_props;
|
||||
std::atomic<uint64_t> m_propsFinished = { 0 };
|
||||
|
||||
std::vector<int32_t> m_tempGpuRun;
|
||||
std::vector<int32_t> m_tempGpuRunDurations;
|
||||
|
||||
};
|
||||
|
||||
}
|
45
src/dxvk/framepacer/dxvk_framepacer_mode_min_latency.h
Normal file
45
src/dxvk/framepacer/dxvk_framepacer_mode_min_latency.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include "dxvk_framepacer_mode.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/*
|
||||
* Minimal latency is achieved here by waiting for the previous
|
||||
* frame to complete, which results in very much reduced fps.
|
||||
* Generally not recommended, but helpful to get insights to fine-tune
|
||||
* the low-latency mode, and possibly is useful for running games
|
||||
* in the cpu limit.
|
||||
*/
|
||||
|
||||
class MinLatencyMode : public FramePacerMode {
|
||||
|
||||
public:
|
||||
|
||||
MinLatencyMode(Mode mode, LatencyMarkersStorage* storage)
|
||||
: FramePacerMode(mode, storage, 0) {}
|
||||
|
||||
~MinLatencyMode() {}
|
||||
|
||||
void startFrame( uint64_t frameId ) override {
|
||||
|
||||
Sleep::TimePoint now = high_resolution_clock::now();
|
||||
int32_t frametime = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now - m_lastStart ).count();
|
||||
int32_t frametimeDiff = std::max( 0, m_fpsLimitFrametime.load() - frametime );
|
||||
int32_t delay = std::max( 0, frametimeDiff );
|
||||
delay = std::min( delay, 20000 );
|
||||
|
||||
Sleep::TimePoint nextStart = now + std::chrono::microseconds(delay);
|
||||
Sleep::sleepUntil( now, nextStart );
|
||||
m_lastStart = nextStart;
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Sleep::TimePoint m_lastStart = { high_resolution_clock::now() };
|
||||
|
||||
};
|
||||
|
||||
}
|
148
src/dxvk/framepacer/dxvk_latency_markers.h
Normal file
148
src/dxvk/framepacer/dxvk_latency_markers.h
Normal file
@ -0,0 +1,148 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <dxgi.h>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <assert.h>
|
||||
#include "../../util/util_sleep.h"
|
||||
#include "../../util/log/log.h"
|
||||
#include "../../util/util_string.h"
|
||||
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
class FramePacer;
|
||||
class LatencyMarkersStorage;
|
||||
|
||||
|
||||
struct LatencyMarkers {
|
||||
|
||||
using time_point = high_resolution_clock::time_point;
|
||||
|
||||
time_point start;
|
||||
time_point end;
|
||||
|
||||
int32_t csStart;
|
||||
int32_t csFinished;
|
||||
int32_t cpuFinished;
|
||||
int32_t gpuStart;
|
||||
int32_t gpuFinished;
|
||||
int32_t presentFinished;
|
||||
|
||||
std::vector<time_point> gpuReady;
|
||||
std::vector<time_point> gpuSubmit;
|
||||
std::vector<time_point> gpuQueueSubmit;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* stores which information is accessible for which frame
|
||||
*/
|
||||
struct LatencyMarkersTimeline {
|
||||
|
||||
std::atomic<uint64_t> cpuFinished = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
std::atomic<uint64_t> gpuStart = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
std::atomic<uint64_t> gpuFinished = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
std::atomic<uint64_t> frameFinished = { DXGI_MAX_SWAP_CHAIN_BUFFERS };
|
||||
|
||||
};
|
||||
|
||||
|
||||
class LatencyMarkersReader {
|
||||
|
||||
public:
|
||||
|
||||
LatencyMarkersReader( const LatencyMarkersStorage* storage, uint32_t numEntries );
|
||||
bool getNext( const LatencyMarkers*& result );
|
||||
|
||||
private:
|
||||
|
||||
const LatencyMarkersStorage* m_storage;
|
||||
uint64_t m_index;
|
||||
|
||||
};
|
||||
|
||||
|
||||
class LatencyMarkersStorage {
|
||||
friend class LatencyMarkersReader;
|
||||
friend class FramePacer;
|
||||
public:
|
||||
|
||||
LatencyMarkersStorage() { }
|
||||
~LatencyMarkersStorage() { }
|
||||
|
||||
LatencyMarkersReader getReader( uint32_t numEntries ) const {
|
||||
return LatencyMarkersReader(this, numEntries);
|
||||
}
|
||||
|
||||
void registerFrameStart( uint64_t frameId ) {
|
||||
if (frameId <= m_timeline.frameFinished.load()) {
|
||||
Logger::warn( str::format("internal error during registerFrameStart: expected frameId=",
|
||||
m_timeline.frameFinished.load()+1, ", got: ", frameId) );
|
||||
}
|
||||
auto now = high_resolution_clock::now();
|
||||
|
||||
LatencyMarkers* markers = getMarkers(frameId);
|
||||
markers->start = now;
|
||||
}
|
||||
|
||||
void registerFrameEnd( uint64_t frameId ) {
|
||||
if (frameId <= m_timeline.frameFinished.load()) {
|
||||
Logger::warn( str::format("internal error during registerFrameEnd: expected frameId=",
|
||||
m_timeline.frameFinished.load()+1, ", got: ", frameId) );
|
||||
}
|
||||
auto now = high_resolution_clock::now();
|
||||
|
||||
LatencyMarkers* markers = getMarkers(frameId);
|
||||
markers->presentFinished = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now - markers->start).count();
|
||||
markers->end = now;
|
||||
|
||||
m_timeline.frameFinished.store(frameId);
|
||||
}
|
||||
|
||||
const LatencyMarkersTimeline* getTimeline() const {
|
||||
return &m_timeline;
|
||||
}
|
||||
|
||||
const LatencyMarkers* getConstMarkers( uint64_t frameId ) const {
|
||||
return &m_markers[frameId % m_numMarkers];
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
LatencyMarkers* getMarkers( uint64_t frameId ) {
|
||||
return &m_markers[frameId % m_numMarkers];
|
||||
}
|
||||
|
||||
// simple modulo hash mapping is used for frameIds. They are expected to monotonically increase by one.
|
||||
// select the size large enough, so we never come into a situation where the reader cannot keep up with the producer
|
||||
static constexpr uint16_t m_numMarkers = 128;
|
||||
std::array<LatencyMarkers, m_numMarkers> m_markers = { };
|
||||
LatencyMarkersTimeline m_timeline;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
inline LatencyMarkersReader::LatencyMarkersReader( const LatencyMarkersStorage* storage, uint32_t numEntries )
|
||||
: m_storage(storage) {
|
||||
m_index = 0;
|
||||
if (m_storage->m_timeline.frameFinished.load() > numEntries + DXGI_MAX_SWAP_CHAIN_BUFFERS + 2)
|
||||
m_index = m_storage->m_timeline.frameFinished.load() - numEntries;
|
||||
}
|
||||
|
||||
|
||||
inline bool LatencyMarkersReader::getNext( const LatencyMarkers*& result ) {
|
||||
if (m_index == 0 || m_index > m_storage->m_timeline.frameFinished.load())
|
||||
return false;
|
||||
|
||||
result = &m_storage->m_markers[m_index % m_storage->m_numMarkers];
|
||||
m_index++;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
@ -59,6 +59,11 @@ namespace dxvk::hud {
|
||||
Rc<T> addItem(const char* name, int32_t at, Args... args) {
|
||||
return m_hudItems.add<T>(name, at, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int32_t getItemPos() {
|
||||
return m_hudItems.getItemPos<T>();
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Creates the HUD
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "dxvk_hud_item.h"
|
||||
#include "../framepacer/dxvk_framepacer.h"
|
||||
|
||||
#include <hud_chunk_frag_background.h>
|
||||
#include <hud_chunk_frag_visualize.h>
|
||||
@ -213,6 +214,63 @@ namespace dxvk::hud {
|
||||
}
|
||||
|
||||
|
||||
HudRenderLatencyItem::HudRenderLatencyItem() { }
|
||||
HudRenderLatencyItem::~HudRenderLatencyItem() { }
|
||||
|
||||
void HudRenderLatencyItem::update(dxvk::high_resolution_clock::time_point time) {
|
||||
// we cannot measure latency when fps-limiting is performed in Presenter::runFrameThread()
|
||||
// because it's interfering with getting the right timestamp from vkWaitForPresent()
|
||||
// if we truely wanted to measure it, we would need one additional thread
|
||||
if (FpsLimiter::m_isActive) {
|
||||
m_latency = "N/A";
|
||||
return;
|
||||
}
|
||||
|
||||
const Rc<DxvkLatencyTracker> tracker = m_tracker;
|
||||
const FramePacer* framePacer = dynamic_cast<FramePacer*>( tracker.ptr() );
|
||||
if (!framePacer)
|
||||
return;
|
||||
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(time - m_lastUpdate);
|
||||
|
||||
if (elapsed.count() >= UpdateInterval) {
|
||||
m_lastUpdate = time;
|
||||
|
||||
LatencyMarkersReader reader = framePacer->m_latencyMarkersStorage.getReader(100);
|
||||
const LatencyMarkers* markers;
|
||||
uint32_t count = 0;
|
||||
uint64_t totalLatency = 0;
|
||||
while (reader.getNext(markers)) {
|
||||
totalLatency += markers->presentFinished;
|
||||
++count;
|
||||
}
|
||||
|
||||
if (!count)
|
||||
return;
|
||||
|
||||
uint64_t latency = totalLatency / count;
|
||||
m_latency = str::format(latency / 1000, ".", (latency/100) % 10, " ms");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HudPos HudRenderLatencyItem::render(
|
||||
const DxvkContextObjects& ctx,
|
||||
const HudPipelineKey& key,
|
||||
const HudOptions& options,
|
||||
HudRenderer& renderer,
|
||||
HudPos position) {
|
||||
|
||||
position.y += 12;
|
||||
renderer.drawText(16, position, 0xff4040ffu, "Render latency:");
|
||||
renderer.drawText(16, { position.x + 195, position.y },
|
||||
0xffffffffu, m_latency);
|
||||
|
||||
position.y += 8;
|
||||
return position;
|
||||
}
|
||||
|
||||
|
||||
HudFrameTimeItem::HudFrameTimeItem(const Rc<DxvkDevice>& device, HudRenderer* renderer)
|
||||
: m_device (device),
|
||||
m_gfxSetLayout (createDescriptorSetLayout()),
|
||||
|
@ -131,6 +131,15 @@ namespace dxvk::hud {
|
||||
return value;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
int32_t getItemPos() {
|
||||
for (int i=0; i<(int)m_items.size(); ++i) {
|
||||
if (dynamic_cast<T*>(m_items[i].ptr()))
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
bool m_enableFull = false;
|
||||
@ -244,6 +253,42 @@ namespace dxvk::hud {
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief HUD item to display render latency
|
||||
*/
|
||||
class HudRenderLatencyItem : public HudItem {
|
||||
constexpr static int64_t UpdateInterval = 500'000;
|
||||
public:
|
||||
|
||||
HudRenderLatencyItem();
|
||||
|
||||
~HudRenderLatencyItem();
|
||||
|
||||
void updateLatencyTracker( const Rc<DxvkLatencyTracker>& tracker ) {
|
||||
m_tracker = tracker;
|
||||
}
|
||||
|
||||
void update(dxvk::high_resolution_clock::time_point time);
|
||||
|
||||
HudPos render(
|
||||
const DxvkContextObjects& ctx,
|
||||
const HudPipelineKey& key,
|
||||
const HudOptions& options,
|
||||
HudRenderer& renderer,
|
||||
HudPos position);
|
||||
|
||||
private:
|
||||
|
||||
Rc<DxvkLatencyTracker> m_tracker;
|
||||
|
||||
dxvk::high_resolution_clock::time_point m_lastUpdate
|
||||
= dxvk::high_resolution_clock::now();
|
||||
|
||||
std::string m_latency;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief HUD item to display the frame rate
|
||||
*/
|
||||
|
@ -120,6 +120,9 @@ dxvk_src = [
|
||||
'hud/dxvk_hud_font.cpp',
|
||||
'hud/dxvk_hud_item.cpp',
|
||||
'hud/dxvk_hud_renderer.cpp',
|
||||
|
||||
'framepacer/dxvk_framepacer.cpp',
|
||||
'framepacer/dxvk_framepacer_mode_low_latency.cpp',
|
||||
]
|
||||
|
||||
if platform == 'windows'
|
||||
|
@ -5,12 +5,15 @@
|
||||
#include "util_fps_limiter.h"
|
||||
#include "util_sleep.h"
|
||||
#include "util_string.h"
|
||||
#include "../dxvk/framepacer/dxvk_framepacer.h"
|
||||
|
||||
#include "./log/log.h"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
std::atomic<bool> FpsLimiter::m_isActive = { false };
|
||||
|
||||
FpsLimiter::FpsLimiter() {
|
||||
auto override = getEnvironmentOverride();
|
||||
@ -48,7 +51,12 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
void FpsLimiter::delay() {
|
||||
void FpsLimiter::delay(const Rc<DxvkLatencyTracker>& tracker) {
|
||||
FramePacer* framePacer = dynamic_cast<FramePacer*>(tracker.ptr());
|
||||
if (framePacer && framePacer->getMode()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::unique_lock<dxvk::mutex> lock(m_mutex);
|
||||
auto interval = m_targetInterval;
|
||||
auto latency = m_maxLatency;
|
||||
@ -71,8 +79,11 @@ namespace dxvk {
|
||||
// that can be written by setTargetFrameRate
|
||||
lock.unlock();
|
||||
|
||||
if (t1 < m_nextFrame)
|
||||
m_isActive.store(false);
|
||||
if (t1 < m_nextFrame) {
|
||||
m_isActive.store(true);
|
||||
Sleep::sleepUntil(t1, m_nextFrame);
|
||||
}
|
||||
|
||||
m_nextFrame = (t1 < m_nextFrame + interval)
|
||||
? m_nextFrame + interval
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include "util_time.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
class DxvkLatencyTracker;
|
||||
|
||||
/**
|
||||
* \brief Frame rate limiter
|
||||
@ -38,7 +40,7 @@ namespace dxvk {
|
||||
* and the time since the last call to \ref delay is
|
||||
* shorter than the target interval.
|
||||
*/
|
||||
void delay();
|
||||
void delay(const Rc<DxvkLatencyTracker>& tracker);
|
||||
|
||||
/**
|
||||
* \brief Queries environment override
|
||||
@ -46,6 +48,8 @@ namespace dxvk {
|
||||
*/
|
||||
static std::optional<double> getEnvironmentOverride();
|
||||
|
||||
static std::atomic<bool> m_isActive;
|
||||
|
||||
private:
|
||||
|
||||
using TimePoint = dxvk::high_resolution_clock::time_point;
|
||||
|
Loading…
x
Reference in New Issue
Block a user