mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-24 04:54:14 +01:00
[dxvk] Implement Reflex latency tracker
This commit is contained in:
parent
d1c33c3328
commit
95e2635397
@ -1,6 +1,7 @@
|
|||||||
#include "dxvk_device.h"
|
#include "dxvk_device.h"
|
||||||
#include "dxvk_instance.h"
|
#include "dxvk_instance.h"
|
||||||
#include "dxvk_latency_builtin.h"
|
#include "dxvk_latency_builtin.h"
|
||||||
|
#include "dxvk_latency_reflex.h"
|
||||||
|
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
@ -308,9 +309,16 @@ namespace dxvk {
|
|||||||
|
|
||||||
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
|
Rc<DxvkLatencyTracker> DxvkDevice::createLatencyTracker(
|
||||||
const Rc<Presenter>& presenter) {
|
const Rc<Presenter>& presenter) {
|
||||||
if (m_options.latencySleep != Tristate::True)
|
if (m_options.latencySleep == Tristate::False)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
if (m_options.latencySleep == Tristate::Auto) {
|
||||||
|
if (m_features.nvLowLatency2)
|
||||||
|
return new DxvkReflexLatencyTrackerNv(presenter);
|
||||||
|
else
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
return new DxvkBuiltInLatencyTracker(presenter,
|
return new DxvkBuiltInLatencyTracker(presenter,
|
||||||
m_options.latencyTolerance, m_features.nvLowLatency2);
|
m_options.latencyTolerance, m_features.nvLowLatency2);
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,36 @@ namespace dxvk {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Timings for a single tracked frame
|
||||||
|
*/
|
||||||
|
struct DxvkLatencyFrameData {
|
||||||
|
using time_point = dxvk::high_resolution_clock::time_point;
|
||||||
|
using duration = dxvk::high_resolution_clock::duration;
|
||||||
|
|
||||||
|
uint64_t frameId = 0u;
|
||||||
|
uint64_t appFrameId = 0u;
|
||||||
|
time_point frameStart = time_point();
|
||||||
|
time_point frameEnd = time_point();
|
||||||
|
time_point cpuInputSample = time_point();
|
||||||
|
time_point cpuSimBegin = time_point();
|
||||||
|
time_point cpuSimEnd = time_point();
|
||||||
|
time_point cpuRenderBegin = time_point();
|
||||||
|
time_point cpuRenderEnd = time_point();
|
||||||
|
time_point cpuPresentBegin = time_point();
|
||||||
|
time_point cpuPresentEnd = time_point();
|
||||||
|
time_point queueSubmit = time_point();
|
||||||
|
time_point queuePresent = time_point();
|
||||||
|
time_point gpuExecStart = time_point();
|
||||||
|
time_point gpuExecEnd = time_point();
|
||||||
|
time_point gpuIdleStart = time_point();
|
||||||
|
time_point gpuIdleEnd = time_point();
|
||||||
|
duration gpuIdleTime = duration(0u);
|
||||||
|
duration sleepDuration = duration(0u);
|
||||||
|
VkResult presentStatus = VK_NOT_READY;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Latency tracker
|
* \brief Latency tracker
|
||||||
*
|
*
|
||||||
|
@ -16,30 +16,6 @@
|
|||||||
|
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Timings for a single tracked frame
|
|
||||||
*/
|
|
||||||
struct DxvkLatencyFrameData {
|
|
||||||
using time_point = dxvk::high_resolution_clock::time_point;
|
|
||||||
using duration = dxvk::high_resolution_clock::duration;
|
|
||||||
|
|
||||||
uint64_t frameId = 0u;
|
|
||||||
time_point frameStart = time_point();
|
|
||||||
time_point frameEnd = time_point();
|
|
||||||
time_point cpuPresentBegin = time_point();
|
|
||||||
time_point cpuPresentEnd = time_point();
|
|
||||||
time_point queueSubmit = time_point();
|
|
||||||
time_point queuePresent = time_point();
|
|
||||||
time_point gpuExecStart = time_point();
|
|
||||||
time_point gpuExecEnd = time_point();
|
|
||||||
time_point gpuIdleStart = time_point();
|
|
||||||
time_point gpuIdleEnd = time_point();
|
|
||||||
duration gpuIdleTime = duration(0u);
|
|
||||||
duration sleepDuration = duration(0u);
|
|
||||||
VkResult presentStatus = VK_NOT_READY;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Built-in latency tracker
|
* \brief Built-in latency tracker
|
||||||
*
|
*
|
||||||
|
484
src/dxvk/dxvk_latency_reflex.cpp
Normal file
484
src/dxvk/dxvk_latency_reflex.cpp
Normal file
@ -0,0 +1,484 @@
|
|||||||
|
#include "dxvk_latency_reflex.h"
|
||||||
|
|
||||||
|
namespace dxvk {
|
||||||
|
|
||||||
|
DxvkReflexLatencyTrackerNv::DxvkReflexLatencyTrackerNv(
|
||||||
|
const Rc<Presenter>& presenter)
|
||||||
|
: m_presenter(presenter) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
DxvkReflexLatencyTrackerNv::~DxvkReflexLatencyTrackerNv() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool DxvkReflexLatencyTrackerNv::needsAutoMarkers() {
|
||||||
|
// In markerless mode we want to avoid submitting
|
||||||
|
// any markers at all and ignore the context
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyCpuPresentBegin(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
|
||||||
|
if (m_lastPresentAppFrameId) {
|
||||||
|
uint64_t expectedFrameId = lookupFrameId(m_lastPresentAppFrameId);
|
||||||
|
|
||||||
|
if (frameId != expectedFrameId) {
|
||||||
|
// This is a normal occurence after a swapchain recreation, or if
|
||||||
|
// tracking got reset for any reason. Remap the current app frame
|
||||||
|
// to the current internal frame, and map any app frames with a
|
||||||
|
// higher frame ID to subsequent frame IDs in order to fix the
|
||||||
|
// mapping; we should catch up within a few frames.
|
||||||
|
Logger::warn(str::format("Reflex: Expected internal frame ID ",
|
||||||
|
expectedFrameId, " for ", m_lastPresentAppFrameId, ", got ", frameId));
|
||||||
|
|
||||||
|
uint64_t nextAppFrameId = m_lastPresentAppFrameId;
|
||||||
|
uint64_t nextDxvkFrameId = frameId;
|
||||||
|
|
||||||
|
auto entry = m_appToDxvkFrameIds.find(nextAppFrameId);
|
||||||
|
|
||||||
|
while (entry != m_appToDxvkFrameIds.end()) {
|
||||||
|
nextAppFrameId = entry->first;
|
||||||
|
|
||||||
|
mapFrameId(nextAppFrameId, nextDxvkFrameId++);
|
||||||
|
|
||||||
|
entry = m_appToDxvkFrameIds.upper_bound(nextAppFrameId);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_nextAllocFrameId = nextDxvkFrameId;
|
||||||
|
m_nextValidFrameId = nextDxvkFrameId + 1u;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_lowLatencyNoMarkers = false;
|
||||||
|
} else if (m_lowLatencyMode) {
|
||||||
|
// Game seemingly doesn't use markers?
|
||||||
|
if (!m_lowLatencyNoMarkers) {
|
||||||
|
Logger::warn("Reflex: No latency markers provided");
|
||||||
|
m_lowLatencyNoMarkers = true;
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update sleep duration since we haven't had the chance yet
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
frame.sleepDuration = m_lastSleepDuration;
|
||||||
|
|
||||||
|
m_lastSleepDuration = duration(0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_lastPresentAppFrameId = 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyCpuPresentEnd(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
m_lastPresentQueued = frameId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyCsRenderBegin(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
|
||||||
|
if (frame.appFrameId && frameId >= m_nextValidFrameId)
|
||||||
|
m_presenter->setLatencyMarkerNv(frameId, VK_LATENCY_MARKER_RENDERSUBMIT_START_NV);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyCsRenderEnd(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
|
||||||
|
if (frame.appFrameId && frameId >= m_nextValidFrameId)
|
||||||
|
m_presenter->setLatencyMarkerNv(frameId, VK_LATENCY_MARKER_RENDERSUBMIT_END_NV);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyQueueSubmit(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
|
||||||
|
if (frame.queueSubmit == time_point())
|
||||||
|
frame.queueSubmit = dxvk::high_resolution_clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyQueuePresentBegin(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
|
||||||
|
if (frame.appFrameId && frameId >= m_nextValidFrameId)
|
||||||
|
m_presenter->setLatencyMarkerNv(frameId, VK_LATENCY_MARKER_PRESENT_START_NV);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyQueuePresentEnd(
|
||||||
|
uint64_t frameId,
|
||||||
|
VkResult status) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
|
||||||
|
if (frame.appFrameId && frameId >= m_nextValidFrameId) {
|
||||||
|
frame.queuePresent = m_presenter->setLatencyMarkerNv(frameId, VK_LATENCY_MARKER_PRESENT_END_NV);
|
||||||
|
frame.presentStatus = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ignore errors or we might never wake up a waiting thread
|
||||||
|
m_lastPresentComplete = frameId;
|
||||||
|
|
||||||
|
m_cond.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyGpuExecutionBegin(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
|
||||||
|
auto now = dxvk::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
frame.gpuIdleEnd = now;
|
||||||
|
|
||||||
|
if (frame.gpuExecStart == time_point())
|
||||||
|
frame.gpuExecStart = now;
|
||||||
|
|
||||||
|
if (frame.gpuIdleStart != time_point())
|
||||||
|
frame.gpuIdleTime += frame.gpuIdleEnd - frame.gpuIdleStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyGpuExecutionEnd(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
|
||||||
|
auto now = dxvk::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
frame.gpuExecEnd = now;
|
||||||
|
frame.gpuIdleStart = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::notifyGpuPresentEnd(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
frame.frameEnd = dxvk::high_resolution_clock::now();
|
||||||
|
|
||||||
|
m_lastCompletedFrameId = frameId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::sleepAndBeginFrame(
|
||||||
|
uint64_t frameId,
|
||||||
|
double maxFrameRate) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
m_lastNoMarkerFrameId = frameId;
|
||||||
|
|
||||||
|
if (m_lowLatencyMode) {
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
frame.frameStart = dxvk::high_resolution_clock::now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::discardTimings() {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DxvkLatencyStats DxvkReflexLatencyTrackerNv::getStatistics(
|
||||||
|
uint64_t frameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
|
||||||
|
if (!m_lastCompletedFrameId)
|
||||||
|
return DxvkLatencyStats();
|
||||||
|
|
||||||
|
auto& frame = getFrameData(m_lastCompletedFrameId);
|
||||||
|
|
||||||
|
if (frame.frameEnd == time_point())
|
||||||
|
return DxvkLatencyStats();
|
||||||
|
|
||||||
|
time_point frameStart = frame.cpuSimBegin;
|
||||||
|
|
||||||
|
if (frame.cpuInputSample != time_point())
|
||||||
|
frameStart = frame.cpuInputSample;
|
||||||
|
|
||||||
|
if (frameStart == time_point())
|
||||||
|
frameStart = frame.frameStart;
|
||||||
|
|
||||||
|
if (frameStart == time_point())
|
||||||
|
return DxvkLatencyStats();
|
||||||
|
|
||||||
|
DxvkLatencyStats stats = { };
|
||||||
|
stats.frameLatency = std::chrono::duration_cast<std::chrono::microseconds>(frame.frameEnd - frameStart);
|
||||||
|
stats.sleepDuration = std::chrono::duration_cast<std::chrono::microseconds>(frame.sleepDuration);
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::setLatencySleepMode(
|
||||||
|
bool enableLowLatency,
|
||||||
|
bool enableBoost,
|
||||||
|
uint64_t minIntervalUs) {
|
||||||
|
if (m_lowLatencyMode != enableLowLatency)
|
||||||
|
Logger::info(str::format("Reflex: Low latency mode ", enableLowLatency ? "enabled" : "disabled"));
|
||||||
|
|
||||||
|
VkLatencySleepModeInfoNV modeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV };
|
||||||
|
modeInfo.lowLatencyMode = enableLowLatency;
|
||||||
|
modeInfo.lowLatencyBoost = enableBoost;
|
||||||
|
modeInfo.minimumIntervalUs = minIntervalUs;
|
||||||
|
|
||||||
|
m_presenter->setLatencySleepModeNv(modeInfo);
|
||||||
|
|
||||||
|
m_lowLatencyMode = enableLowLatency;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::setLatencyMarker(
|
||||||
|
uint64_t appFrameId,
|
||||||
|
VkLatencyMarkerNV marker) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
|
||||||
|
// Find frame ID. If this is the first marker in a new frame,
|
||||||
|
// try to map it to a new internal frame ID.
|
||||||
|
uint64_t frameId = lookupFrameId(appFrameId);
|
||||||
|
|
||||||
|
if (!frameId && (marker == VK_LATENCY_MARKER_SIMULATION_START_NV
|
||||||
|
|| marker == VK_LATENCY_MARKER_INPUT_SAMPLE_NV))
|
||||||
|
frameId = allocateFrameId(appFrameId);
|
||||||
|
|
||||||
|
// This can hapen if we reset tracking state and receive
|
||||||
|
// a stray present or render submit marker. Ignore these
|
||||||
|
// so that the next presents can recalibrate properly.
|
||||||
|
if (!frameId)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// We use present markers to correlate app frame IDs
|
||||||
|
// with internal frame IDs, so always write this back.
|
||||||
|
if (marker == VK_LATENCY_MARKER_PRESENT_START_NV)
|
||||||
|
m_lastPresentAppFrameId = appFrameId;
|
||||||
|
|
||||||
|
// Don't submit markers for invalid frames since
|
||||||
|
// that could potentially confuse the algorithm
|
||||||
|
if (frameId < m_nextValidFrameId)
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto& frame = getFrameData(frameId);
|
||||||
|
|
||||||
|
switch (marker) {
|
||||||
|
case VK_LATENCY_MARKER_INPUT_SAMPLE_NV:
|
||||||
|
frame.cpuInputSample = m_presenter->setLatencyMarkerNv(frameId, marker);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_LATENCY_MARKER_SIMULATION_START_NV:
|
||||||
|
frame.cpuSimBegin = m_presenter->setLatencyMarkerNv(frameId, marker);
|
||||||
|
|
||||||
|
if (m_lastSleepDuration != duration(0u))
|
||||||
|
frame.sleepDuration = std::exchange(m_lastSleepDuration, duration(0u));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_LATENCY_MARKER_SIMULATION_END_NV:
|
||||||
|
frame.cpuSimEnd = m_presenter->setLatencyMarkerNv(frameId, marker);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV:
|
||||||
|
frame.cpuRenderBegin = dxvk::high_resolution_clock::now();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_LATENCY_MARKER_RENDERSUBMIT_END_NV:
|
||||||
|
frame.cpuRenderEnd = dxvk::high_resolution_clock::now();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_LATENCY_MARKER_PRESENT_START_NV:
|
||||||
|
frame.cpuPresentBegin = dxvk::high_resolution_clock::now();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VK_LATENCY_MARKER_PRESENT_END_NV:
|
||||||
|
frame.cpuPresentEnd = dxvk::high_resolution_clock::now();
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
Logger::warn(str::format("Reflex: Unknown marker ", marker));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::latencySleep() {
|
||||||
|
{ std::unique_lock lock(m_mutex);
|
||||||
|
// If the app doesn't use markers, wait for the previous present
|
||||||
|
// call to complete so that we don't confuse the algorithm by
|
||||||
|
// sleeping at random times relative to actual graphics work.
|
||||||
|
if (m_lowLatencyNoMarkers) {
|
||||||
|
m_cond.wait(lock, [this] {
|
||||||
|
return m_lastPresentComplete >= m_lastPresentQueued;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Actually sleep and write back sleep duration for the next frame
|
||||||
|
auto sleepDuration = m_presenter->latencySleepNv();
|
||||||
|
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
m_lastSleepAppFrameId = m_lastBeginAppFrameId;
|
||||||
|
m_lastSleepDuration = sleepDuration;
|
||||||
|
|
||||||
|
if (m_lowLatencyNoMarkers && m_lastNoMarkerFrameId > m_lastPresentQueued) {
|
||||||
|
// In markerless mode, assume that this gets called before any
|
||||||
|
// work is done for the next frame and update the frame start
|
||||||
|
// time accordingly.
|
||||||
|
auto& frame = getFrameData(m_lastNoMarkerFrameId);
|
||||||
|
frame.frameStart = dxvk::high_resolution_clock::now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t DxvkReflexLatencyTrackerNv::getFrameReports(
|
||||||
|
uint32_t maxCount,
|
||||||
|
DxvkReflexFrameReport* reports) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
|
||||||
|
small_vector<VkLatencyTimingsFrameReportNV, 64> nvReports(maxCount);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < maxCount; i++)
|
||||||
|
nvReports[i] = { VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV };
|
||||||
|
|
||||||
|
// Adjust some statistics so that we actually return the
|
||||||
|
// correct timestamps for the application-defined markers
|
||||||
|
uint32_t count = m_presenter->getLatencyTimingsNv(maxCount, nvReports.data());
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < count; i++) {
|
||||||
|
auto& report = nvReports[i];
|
||||||
|
const auto& currFrame = m_frames[report.presentID % FrameCount];
|
||||||
|
|
||||||
|
if (report.presentID != currFrame.frameId || report.presentID < m_nextValidFrameId)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
report.presentID = currFrame.appFrameId;
|
||||||
|
|
||||||
|
// These represent when the CS thread starts processing the frame
|
||||||
|
report.driverStartTimeUs = report.renderSubmitStartTimeUs;
|
||||||
|
report.driverEndTimeUs = report.renderSubmitEndTimeUs;
|
||||||
|
|
||||||
|
// Return when the app set these markers rather than the time when
|
||||||
|
// we forward them to the driver
|
||||||
|
report.renderSubmitStartTimeUs = mapFrameTimestampToReportUs(currFrame, report, currFrame.cpuRenderBegin);
|
||||||
|
report.renderSubmitEndTimeUs = mapFrameTimestampToReportUs(currFrame, report, currFrame.cpuRenderEnd);
|
||||||
|
report.presentStartTimeUs = mapFrameTimestampToReportUs(currFrame, report, currFrame.cpuPresentBegin);
|
||||||
|
report.presentEndTimeUs = mapFrameTimestampToReportUs(currFrame, report, currFrame.cpuPresentEnd);
|
||||||
|
|
||||||
|
// Documentation for the OS timers seems nonsensical, but it seems to
|
||||||
|
// be the time from the the first submission to the end of the frame
|
||||||
|
report.osRenderQueueStartTimeUs = mapFrameTimestampToReportUs(currFrame, report, currFrame.queueSubmit);
|
||||||
|
report.osRenderQueueEndTimeUs = report.gpuRenderEndTimeUs;
|
||||||
|
|
||||||
|
// Apparently gpuRenderEndTime is when presentation completes rather
|
||||||
|
// than rendering, so we need to compute the active render time using
|
||||||
|
// our own timestamps
|
||||||
|
auto gpuActiveTime = currFrame.gpuExecEnd - currFrame.gpuExecStart - currFrame.gpuIdleTime;
|
||||||
|
|
||||||
|
reports[i].report = report;
|
||||||
|
reports[i].gpuActiveTimeUs = std::max<uint64_t>(0u,
|
||||||
|
std::chrono::duration_cast<std::chrono::microseconds>(gpuActiveTime).count());
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t DxvkReflexLatencyTrackerNv::frameIdFromAppFrameId(
|
||||||
|
uint64_t appFrameId) {
|
||||||
|
std::lock_guard lock(m_mutex);
|
||||||
|
return lookupFrameId(appFrameId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DxvkReflexLatencyFrameData& DxvkReflexLatencyTrackerNv::getFrameData(
|
||||||
|
uint64_t dxvkFrameId) {
|
||||||
|
auto& frameData = m_frames[dxvkFrameId % FrameCount];
|
||||||
|
|
||||||
|
if (frameData.frameId != dxvkFrameId) {
|
||||||
|
m_appToDxvkFrameIds.erase(frameData.appFrameId);
|
||||||
|
|
||||||
|
frameData = DxvkReflexLatencyFrameData();
|
||||||
|
frameData.frameId = dxvkFrameId;
|
||||||
|
}
|
||||||
|
|
||||||
|
return frameData;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t DxvkReflexLatencyTrackerNv::lookupFrameId(
|
||||||
|
uint64_t appFrameId) {
|
||||||
|
auto entry = m_appToDxvkFrameIds.find(appFrameId);
|
||||||
|
|
||||||
|
if (entry == m_appToDxvkFrameIds.end())
|
||||||
|
return 0u;
|
||||||
|
|
||||||
|
return entry->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t DxvkReflexLatencyTrackerNv::allocateFrameId(
|
||||||
|
uint64_t appFrameId) {
|
||||||
|
if (appFrameId <= m_lastBeginAppFrameId) {
|
||||||
|
Logger::warn(str::format("Reflex: Frame ID ", appFrameId, " not monotonic, last was ", m_lastBeginAppFrameId));
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t frameId = m_nextAllocFrameId++;
|
||||||
|
mapFrameId(appFrameId, frameId);
|
||||||
|
|
||||||
|
m_lastBeginAppFrameId = appFrameId;
|
||||||
|
return frameId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::mapFrameId(
|
||||||
|
uint64_t appFrameId,
|
||||||
|
uint64_t dxvkFrameId) {
|
||||||
|
while (m_appToDxvkFrameIds.size() > FrameCount)
|
||||||
|
m_appToDxvkFrameIds.erase(m_appToDxvkFrameIds.begin());
|
||||||
|
|
||||||
|
m_appToDxvkFrameIds.insert_or_assign(appFrameId, dxvkFrameId);
|
||||||
|
getFrameData(dxvkFrameId).appFrameId = appFrameId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkReflexLatencyTrackerNv::reset() {
|
||||||
|
m_nextValidFrameId = uint64_t(-1);
|
||||||
|
|
||||||
|
m_lastSleepDuration = duration(0u);
|
||||||
|
|
||||||
|
m_lastBeginAppFrameId = 0u;
|
||||||
|
m_lastPresentAppFrameId = 0u;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < FrameCount; i++)
|
||||||
|
m_frames[i].appFrameId = 0u;
|
||||||
|
|
||||||
|
m_appToDxvkFrameIds.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t DxvkReflexLatencyTrackerNv::mapFrameTimestampToReportUs(
|
||||||
|
const DxvkReflexLatencyFrameData& frame,
|
||||||
|
const VkLatencyTimingsFrameReportNV& report,
|
||||||
|
time_point timestamp) {
|
||||||
|
if (frame.cpuSimBegin == time_point() || !report.simStartTimeUs)
|
||||||
|
return 0u;
|
||||||
|
|
||||||
|
int64_t diffUs = std::chrono::duration_cast<std::chrono::microseconds>(timestamp - frame.cpuSimBegin).count();
|
||||||
|
return report.simStartTimeUs + diffUs;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
199
src/dxvk/dxvk_latency_reflex.h
Normal file
199
src/dxvk/dxvk_latency_reflex.h
Normal file
@ -0,0 +1,199 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "dxvk_latency.h"
|
||||||
|
#include "dxvk_presenter.h"
|
||||||
|
|
||||||
|
#include "../util/thread.h"
|
||||||
|
|
||||||
|
#include "../util/util_sleep.h"
|
||||||
|
#include "../util/util_time.h"
|
||||||
|
|
||||||
|
#include "../util/config/config.h"
|
||||||
|
|
||||||
|
#include "../util/sync/sync_spinlock.h"
|
||||||
|
|
||||||
|
namespace dxvk {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Reflex frame info
|
||||||
|
*
|
||||||
|
* Stores frame ID mapping and all sorts of time stamps
|
||||||
|
* that are used for latency sleep or frame reports.
|
||||||
|
*/
|
||||||
|
using DxvkReflexLatencyFrameData = DxvkLatencyFrameData;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Additional frame report info
|
||||||
|
*/
|
||||||
|
struct DxvkReflexFrameReport {
|
||||||
|
VkLatencyTimingsFrameReportNV report;
|
||||||
|
uint64_t gpuActiveTimeUs;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Built-in latency tracker based on VK_NV_low_latency2
|
||||||
|
*
|
||||||
|
* Implements a simple latency reduction algorithm
|
||||||
|
* based on CPU timestamps received from the backend.
|
||||||
|
*/
|
||||||
|
class DxvkReflexLatencyTrackerNv : public DxvkLatencyTracker {
|
||||||
|
using time_point = typename DxvkReflexLatencyFrameData::time_point;
|
||||||
|
using duration = typename DxvkReflexLatencyFrameData::duration;
|
||||||
|
|
||||||
|
// Keep data for a large number of frames around to support
|
||||||
|
// retrieving statistics from the driver properly.
|
||||||
|
constexpr static size_t FrameCount = 256u;
|
||||||
|
public:
|
||||||
|
|
||||||
|
DxvkReflexLatencyTrackerNv(
|
||||||
|
const Rc<Presenter>& presenter);
|
||||||
|
|
||||||
|
~DxvkReflexLatencyTrackerNv();
|
||||||
|
|
||||||
|
bool needsAutoMarkers();
|
||||||
|
|
||||||
|
void notifyCpuPresentBegin(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyCpuPresentEnd(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyCsRenderBegin(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyCsRenderEnd(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyQueueSubmit(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyQueuePresentBegin(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyQueuePresentEnd(
|
||||||
|
uint64_t frameId,
|
||||||
|
VkResult status);
|
||||||
|
|
||||||
|
void notifyGpuExecutionBegin(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyGpuExecutionEnd(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void notifyGpuPresentEnd(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
void sleepAndBeginFrame(
|
||||||
|
uint64_t frameId,
|
||||||
|
double maxFrameRate);
|
||||||
|
|
||||||
|
void discardTimings();
|
||||||
|
|
||||||
|
DxvkLatencyStats getStatistics(
|
||||||
|
uint64_t frameId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Sets Reflex state
|
||||||
|
*
|
||||||
|
* \param [in] enableLowLatency Whether to enable latency control
|
||||||
|
* \param [in] enableBoost Whether to enable boost
|
||||||
|
* \param [in] minIntervalUs Minimum frame interval
|
||||||
|
*/
|
||||||
|
void setLatencySleepMode(
|
||||||
|
bool enableLowLatency,
|
||||||
|
bool enableBoost,
|
||||||
|
uint64_t minIntervalUs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Sets latency marker from application
|
||||||
|
*
|
||||||
|
* \param [in] appFrameId Application-provided frame ID
|
||||||
|
* \param [in] marker Marker to set
|
||||||
|
*/
|
||||||
|
void setLatencyMarker(
|
||||||
|
uint64_t appFrameId,
|
||||||
|
VkLatencyMarkerNV marker);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Performs latency sleep
|
||||||
|
*/
|
||||||
|
void latencySleep();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Retrieves frame reports
|
||||||
|
*
|
||||||
|
* \param [in] maxCount Maximum number of reports
|
||||||
|
* \param [out] reports Frame reports
|
||||||
|
* \returns Number of reports retrieved
|
||||||
|
*/
|
||||||
|
uint32_t getFrameReports(
|
||||||
|
uint32_t maxCount,
|
||||||
|
DxvkReflexFrameReport* reports);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Looks up frame ID from application frame ID
|
||||||
|
*
|
||||||
|
* \param [in] appFrameId Application-provided frame ID
|
||||||
|
* \returns Internal frame ID, or 0 if none was found
|
||||||
|
*/
|
||||||
|
uint64_t frameIdFromAppFrameId(
|
||||||
|
uint64_t appFrameId);
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
Rc<Presenter> m_presenter;
|
||||||
|
|
||||||
|
dxvk::mutex m_mutex;
|
||||||
|
dxvk::condition_variable m_cond;
|
||||||
|
|
||||||
|
uint64_t m_lastBeginAppFrameId = 0u;
|
||||||
|
uint64_t m_lastSleepAppFrameId = 0u;
|
||||||
|
uint64_t m_lastPresentAppFrameId = 0u;
|
||||||
|
|
||||||
|
uint64_t m_nextAllocFrameId = 1u;
|
||||||
|
uint64_t m_nextValidFrameId = uint64_t(-1);
|
||||||
|
|
||||||
|
uint64_t m_lastCompletedFrameId = 0u;
|
||||||
|
|
||||||
|
uint64_t m_lastPresentQueued = 0u;
|
||||||
|
uint64_t m_lastPresentComplete = 0u;
|
||||||
|
|
||||||
|
uint64_t m_lastNoMarkerFrameId = 0u;
|
||||||
|
|
||||||
|
duration m_lastSleepDuration = duration(0u);
|
||||||
|
|
||||||
|
bool m_lowLatencyMode = false;
|
||||||
|
bool m_lowLatencyNoMarkers = false;
|
||||||
|
|
||||||
|
std::array<DxvkReflexLatencyFrameData, FrameCount> m_frames = { };
|
||||||
|
|
||||||
|
std::map<uint64_t, uint64_t> m_appToDxvkFrameIds;
|
||||||
|
|
||||||
|
DxvkReflexLatencyFrameData& getFrameData(
|
||||||
|
uint64_t dxvkFrameId);
|
||||||
|
|
||||||
|
uint64_t lookupFrameId(
|
||||||
|
uint64_t appFrameId);
|
||||||
|
|
||||||
|
uint64_t allocateFrameId(
|
||||||
|
uint64_t appFrameId);
|
||||||
|
|
||||||
|
void mapFrameId(
|
||||||
|
uint64_t appFrameId,
|
||||||
|
uint64_t dxvkFrameId);
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
|
||||||
|
static uint64_t mapFrameTimestampToReportUs(
|
||||||
|
const DxvkReflexLatencyFrameData& frame,
|
||||||
|
const VkLatencyTimingsFrameReportNV& report,
|
||||||
|
time_point timestamp);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -91,6 +91,7 @@ dxvk_src = [
|
|||||||
'dxvk_image.cpp',
|
'dxvk_image.cpp',
|
||||||
'dxvk_instance.cpp',
|
'dxvk_instance.cpp',
|
||||||
'dxvk_latency_builtin.cpp',
|
'dxvk_latency_builtin.cpp',
|
||||||
|
'dxvk_latency_reflex.cpp',
|
||||||
'dxvk_memory.cpp',
|
'dxvk_memory.cpp',
|
||||||
'dxvk_meta_blit.cpp',
|
'dxvk_meta_blit.cpp',
|
||||||
'dxvk_meta_clear.cpp',
|
'dxvk_meta_clear.cpp',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user