diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index 6dc8a832f..6011e290f 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -819,9 +819,28 @@ namespace dxvk { } - void D3D11ImmediateContext::EndFrame() { + void D3D11ImmediateContext::BeginFrame( + Rc LatencyControl, + uint64_t FrameId) { + if (LatencyControl) + m_latencyFrames.push_back(std::make_pair(std::move(LatencyControl), FrameId)); + } + + + void D3D11ImmediateContext::EndFrame( + Rc LatencyControl, + uint64_t FrameId) { D3D10DeviceLock lock = LockContext(); + if (LatencyControl) { + m_submissionFence->setCallback(m_submissionId + 1u, [ + cLatencyControl = std::move(LatencyControl), + cFrameId = FrameId + ] { + cLatencyControl->setMarker(cFrameId, DxvkLatencyMarker::GpuFrameEnd); + }); + } + EmitCs([] (DxvkContext* ctx) { ctx->endFrame(); }); @@ -944,6 +963,12 @@ namespace dxvk { if (!GetPendingCsChunks() && !hEvent) return; + // Notify latency control objects about the submission + for (size_t i = 0; i < m_latencyFrames.size(); i++) { + const auto& entry = m_latencyFrames[i]; + entry.first->setMarker(entry.second, DxvkLatencyMarker::CpuFirstSubmit); + } + // Signal the submission fence and flush the command list uint64_t submissionId = ++m_submissionId; @@ -958,11 +983,23 @@ namespace dxvk { cSubmissionId = submissionId, cSubmissionStatus = synchronizeSubmission ? &m_submitStatus : nullptr, cStagingFence = m_stagingBufferFence, - cStagingMemory = m_staging.getStatistics().allocatedTotal - ] (DxvkContext* ctx) { + cStagingMemory = m_staging.getStatistics().allocatedTotal, + cBegunFrames = std::move(m_latencyFrames) + ] (DxvkContext* ctx) mutable { ctx->signal(cSubmissionFence, cSubmissionId); ctx->signal(cStagingFence, cStagingMemory); ctx->flushCommandList(cSubmissionStatus); + + // Use the previous submission ID for the GPU-side frame start markers. + // This way we will signal no sooner than the CS thread submitting to the + // queue worker when CPU-bound, or the previous submission completing on + // the GPU when GPU-bound. + for (size_t i = 0; i < cBegunFrames.size(); i++) { + cSubmissionFence->setCallback(cSubmissionId - 1u, + [cEntry = std::move(cBegunFrames[i])] { + cEntry.first->setMarker(cEntry.second, DxvkLatencyMarker::GpuFrameStart); + }); + } }); FlushCsChunk(); diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h index b57d579c2..9ef93e970 100644 --- a/src/d3d11/d3d11_context_imm.h +++ b/src/d3d11/d3d11_context_imm.h @@ -1,5 +1,7 @@ #pragma once +#include "../util/util_latency.h" +#include "../util/util_small_vector.h" #include "../util/util_time.h" #include "../util/sync/sync_signal.h" @@ -133,6 +135,8 @@ namespace dxvk { Com m_stateObject; + small_vector, uint64_t>, 1> m_latencyFrames; + HRESULT MapBuffer( D3D11Buffer* pResource, D3D11_MAP MapType, @@ -168,7 +172,13 @@ namespace dxvk { void SynchronizeDevice(); - void EndFrame(); + void BeginFrame( + Rc LatencyControl, + uint64_t FrameId); + + void EndFrame( + Rc LatencyControl, + uint64_t FrameId); bool WaitForResource( const DxvkPagedResource& Resource, diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp index d1c66090e..74cec1449 100644 --- a/src/d3d11/d3d11_swapchain.cpp +++ b/src/d3d11/d3d11_swapchain.cpp @@ -64,7 +64,9 @@ namespace dxvk { m_desc(*pDesc), m_device(pDevice->GetDXVKDevice()), m_frameLatencyCap(pDevice->GetOptions()->maxFrameLatency) { + CreateFrameLatencyEvent(); + CreateFrameLatencyControl(); CreatePresenter(); CreateBackBuffers(); CreateBlitter(); @@ -273,6 +275,8 @@ namespace dxvk { if (PresentFlags & DXGI_PRESENT_TEST) return hr; + NotifyLatencyControlCpuPresent(); + if (hr != S_OK) { SyncFrameLatency(); return hr; @@ -293,6 +297,7 @@ namespace dxvk { // applications using the semaphore may deadlock. This works because // we do not increment the frame ID in those situations. SyncFrameLatency(); + SyncLatencyControl(); return hr; } @@ -378,7 +383,7 @@ namespace dxvk { auto immediateContext = m_parent->GetContext(); auto immediateContextLock = immediateContext->LockContext(); - immediateContext->EndFrame(); + immediateContext->EndFrame(m_latencyControl, m_frameId + 1u); immediateContext->Flush(); SynchronizePresent(); @@ -458,6 +463,7 @@ namespace dxvk { RotateBackBuffers(immediateContext); immediateContext->FlushCsChunk(); + immediateContext->BeginFrame(m_latencyControl, m_frameId + 1u); return S_OK; } @@ -532,6 +538,12 @@ namespace dxvk { } + void D3D11SwapChain::CreateFrameLatencyControl() { + if (m_frameLatencyCap < 0) + m_latencyControl = new DxvkLatencyControl(); + } + + void D3D11SwapChain::CreatePresenter() { PresenterDesc presenterDesc; presenterDesc.imageExtent = { m_desc.Width, m_desc.Height }; @@ -686,8 +698,12 @@ namespace dxvk { m_frameLatencySignal->setCallback(m_frameId, [this, cFrameId = m_frameId, - cFrameLatencyEvent = m_frameLatencyEvent + cFrameLatencyEvent = m_frameLatencyEvent, + cLatencyControl = m_latencyControl ] () { + if (cLatencyControl) + cLatencyControl->setMarker(cFrameId, DxvkLatencyMarker::GpuPresentEnd); + if (cFrameLatencyEvent) ReleaseSemaphore(cFrameLatencyEvent, 1, nullptr); @@ -698,6 +714,20 @@ namespace dxvk { } + void D3D11SwapChain::SyncLatencyControl() { + if (m_latencyControl) { + m_latencyControl->sleep(m_frameId, m_targetFrameRate); + m_latencyControl->setMarker(m_frameId + 1u, DxvkLatencyMarker::CpuFrameStart); + } + } + + + void D3D11SwapChain::NotifyLatencyControlCpuPresent() { + if (m_latencyControl) + m_latencyControl->setMarker(m_frameId + 1u, DxvkLatencyMarker::CpuPresent); + } + + uint32_t D3D11SwapChain::GetActualFrameLatency() { // DXGI does not seem to implicitly synchronize waitable swap chains, // so in that case we should just respect the user config. For regular @@ -707,8 +737,10 @@ namespace dxvk { if (!(m_desc.Flags & DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) m_dxgiDevice->GetMaximumFrameLatency(&maxFrameLatency); - if (m_frameLatencyCap) - maxFrameLatency = std::min(maxFrameLatency, m_frameLatencyCap); + if (m_frameLatencyCap != 0) { + maxFrameLatency = m_frameLatencyCap < 0 + ? 1u : std::min(maxFrameLatency, uint32_t(m_frameLatencyCap)); + } maxFrameLatency = std::min(maxFrameLatency, m_desc.BufferCount); return maxFrameLatency; diff --git a/src/d3d11/d3d11_swapchain.h b/src/d3d11/d3d11_swapchain.h index fa3c64c2c..b8b2f3f91 100644 --- a/src/d3d11/d3d11_swapchain.h +++ b/src/d3d11/d3d11_swapchain.h @@ -8,6 +8,8 @@ #include "../util/sync/sync_signal.h" +#include "../util/util_latency.h" + namespace dxvk { class D3D11Device; @@ -117,10 +119,12 @@ namespace dxvk { uint64_t m_frameId = DXGI_MAX_SWAP_CHAIN_BUFFERS; uint32_t m_frameLatency = DefaultFrameLatency; - uint32_t m_frameLatencyCap = 0; + int32_t m_frameLatencyCap = 0; HANDLE m_frameLatencyEvent = nullptr; Rc m_frameLatencySignal; + Rc m_latencyControl; + bool m_dirty = true; VkColorSpaceKHR m_colorspace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; @@ -145,6 +149,8 @@ namespace dxvk { void CreateFrameLatencyEvent(); + void CreateFrameLatencyControl(); + void CreatePresenter(); VkResult CreateSurface(VkSurfaceKHR* pSurface); @@ -161,6 +167,10 @@ namespace dxvk { void SyncFrameLatency(); + void SyncLatencyControl(); + + void NotifyLatencyControlCpuPresent(); + uint32_t GetActualFrameLatency(); uint32_t PickFormats(