1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-07 16:46:17 +01:00

[d3d11] Use new flush heuristic

This commit is contained in:
Philip Rebohle 2023-01-14 14:31:26 +01:00 committed by Philip Rebohle
parent f952418958
commit 2a3d7ee7dc
3 changed files with 89 additions and 87 deletions

View File

@ -2194,9 +2194,6 @@ namespace dxvk {
ID3D11DepthStencilView* pDepthStencilView) { ID3D11DepthStencilView* pDepthStencilView) {
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if constexpr (!IsDeferred)
GetTypedContext()->FlushImplicit(true);
SetRenderTargetsAndUnorderedAccessViews( SetRenderTargetsAndUnorderedAccessViews(
NumViews, ppRenderTargetViews, pDepthStencilView, NumViews, ppRenderTargetViews, pDepthStencilView,
NumViews, 0, nullptr, nullptr); NumViews, 0, nullptr, nullptr);
@ -2214,9 +2211,6 @@ namespace dxvk {
const UINT* pUAVInitialCounts) { const UINT* pUAVInitialCounts) {
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if constexpr (!IsDeferred)
GetTypedContext()->FlushImplicit(true);
SetRenderTargetsAndUnorderedAccessViews( SetRenderTargetsAndUnorderedAccessViews(
NumRTVs, ppRenderTargetViews, pDepthStencilView, NumRTVs, ppRenderTargetViews, pDepthStencilView,
UAVStartSlot, NumUAVs, ppUnorderedAccessViews, pUAVInitialCounts); UAVStartSlot, NumUAVs, ppUnorderedAccessViews, pUAVInitialCounts);
@ -2671,7 +2665,7 @@ namespace dxvk {
return E_INVALIDARG; return E_INVALIDARG;
if constexpr (!IsDeferred) if constexpr (!IsDeferred)
GetTypedContext()->FlushImplicit(false); GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint);
DxvkSparseBindInfo bindInfo; DxvkSparseBindInfo bindInfo;
bindInfo.dstResource = GetPagedResource(pDestTiledResource); bindInfo.dstResource = GetPagedResource(pDestTiledResource);
@ -2808,7 +2802,7 @@ namespace dxvk {
return E_INVALIDARG; return E_INVALIDARG;
if constexpr (!IsDeferred) if constexpr (!IsDeferred)
GetTypedContext()->FlushImplicit(false); GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint);
// Find sparse allocator if the tile pool is defined // Find sparse allocator if the tile pool is defined
DxvkSparseBindInfo bindInfo; DxvkSparseBindInfo bindInfo;
@ -4921,8 +4915,12 @@ namespace dxvk {
} }
} }
if (needsUpdate) if (needsUpdate) {
BindFramebuffer(); BindFramebuffer();
if constexpr (!IsDeferred)
GetTypedContext()->ConsiderFlush(GpuFlushType::ImplicitWeakHint);
}
} }

View File

@ -18,6 +18,7 @@ namespace dxvk {
: D3D11CommonContext<D3D11ImmediateContext>(pParent, Device, 0, DxvkCsChunkFlag::SingleUse), : D3D11CommonContext<D3D11ImmediateContext>(pParent, Device, 0, DxvkCsChunkFlag::SingleUse),
m_csThread(Device, Device->createContext(DxvkContextType::Primary)), m_csThread(Device, Device->createContext(DxvkContextType::Primary)),
m_maxImplicitDiscardSize(pParent->GetOptions()->maxImplicitDiscardSize), m_maxImplicitDiscardSize(pParent->GetOptions()->maxImplicitDiscardSize),
m_submissionFence(new sync::CallbackFence()),
m_multithread(this, false, pParent->GetOptions()->enableContextLock), m_multithread(this, false, pParent->GetOptions()->enableContextLock),
m_videoContext(this, Device) { m_videoContext(this, Device) {
EmitCs([ EmitCs([
@ -48,7 +49,7 @@ namespace dxvk {
if (this_thread::isInModuleDetachment()) if (this_thread::isInModuleDetachment())
return; return;
Flush(); ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr);
SynchronizeCsThread(DxvkCsThread::SynchronizeAll); SynchronizeCsThread(DxvkCsThread::SynchronizeAll);
SynchronizeDevice(); SynchronizeDevice();
} }
@ -99,7 +100,8 @@ namespace dxvk {
// Ignore the DONOTFLUSH flag here as some games will spin // Ignore the DONOTFLUSH flag here as some games will spin
// on queries without ever flushing the context otherwise. // on queries without ever flushing the context otherwise.
FlushImplicit(FALSE); D3D10DeviceLock lock = LockContext();
ConsiderFlush(GpuFlushType::ImplicitSynchronization);
} }
return hr; return hr;
@ -148,47 +150,33 @@ namespace dxvk {
query->NotifyEnd(); query->NotifyEnd();
if (query->IsStalling()) if (query->IsStalling())
Flush(); ExecuteFlush(GpuFlushType::ImplicitSynchronization, nullptr);
else if (query->IsEvent()) else if (query->IsEvent())
FlushImplicit(TRUE); ConsiderFlush(GpuFlushType::ImplicitStrongHint);
} }
} }
void STDMETHODCALLTYPE D3D11ImmediateContext::Flush() { void STDMETHODCALLTYPE D3D11ImmediateContext::Flush() {
Flush1(D3D11_CONTEXT_TYPE_ALL, nullptr); D3D10DeviceLock lock = LockContext();
ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr);
} }
void STDMETHODCALLTYPE D3D11ImmediateContext::Flush1( void STDMETHODCALLTYPE D3D11ImmediateContext::Flush1(
D3D11_CONTEXT_TYPE ContextType, D3D11_CONTEXT_TYPE ContextType,
HANDLE hEvent) { HANDLE hEvent) {
m_parent->FlushInitContext();
if (hEvent)
SignalEvent(hEvent);
D3D10DeviceLock lock = LockContext(); D3D10DeviceLock lock = LockContext();
if (GetPendingCsChunks()) { ExecuteFlush(GpuFlushType::ExplicitFlush, hEvent);
// Add commands to flush the threaded
// context, then flush the command list
EmitCs([] (DxvkContext* ctx) {
ctx->flushCommandList();
});
FlushCsChunk();
// Reset flush timer used for implicit flushes
m_lastFlush = dxvk::high_resolution_clock::now();
m_flushSeqNum = m_csSeqNum;
}
} }
HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Signal( HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Signal(
ID3D11Fence* pFence, ID3D11Fence* pFence,
UINT64 Value) { UINT64 Value) {
D3D10DeviceLock lock = LockContext();
auto fence = static_cast<D3D11Fence*>(pFence); auto fence = static_cast<D3D11Fence*>(pFence);
if (!fence) if (!fence)
@ -201,7 +189,7 @@ namespace dxvk {
ctx->signalFence(cFence, cValue); ctx->signalFence(cFence, cValue);
}); });
Flush(); ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr);
return S_OK; return S_OK;
} }
@ -209,12 +197,13 @@ namespace dxvk {
HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Wait( HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Wait(
ID3D11Fence* pFence, ID3D11Fence* pFence,
UINT64 Value) { UINT64 Value) {
D3D10DeviceLock lock = LockContext();
auto fence = static_cast<D3D11Fence*>(pFence); auto fence = static_cast<D3D11Fence*>(pFence);
if (!fence) if (!fence)
return E_INVALIDARG; return E_INVALIDARG;
Flush(); ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr);
EmitCs([ EmitCs([
cFence = fence->GetFence(), cFence = fence->GetFence(),
@ -246,7 +235,7 @@ namespace dxvk {
// As an optimization, flush everything if the // As an optimization, flush everything if the
// number of pending draw calls is high enough. // number of pending draw calls is high enough.
FlushImplicit(FALSE); ConsiderFlush(GpuFlushType::ImplicitWeakHint);
// Dispatch command list to the CS thread and // Dispatch command list to the CS thread and
// restore the immediate context's state // restore the immediate context's state
@ -257,6 +246,9 @@ namespace dxvk {
RestoreCommandListState(); RestoreCommandListState();
else else
ResetContextState(); ResetContextState();
// Flush after if the command list was sufficiently long
ConsiderFlush(GpuFlushType::ImplicitWeakHint);
} }
@ -386,7 +378,7 @@ namespace dxvk {
} }
if (doInvalidatePreserve) { if (doInvalidatePreserve) {
FlushImplicit(TRUE); ConsiderFlush(GpuFlushType::ImplicitWeakHint);
auto prevSlice = pResource->GetMappedSlice(); auto prevSlice = pResource->GetMappedSlice();
auto physSlice = pResource->DiscardSlice(); auto physSlice = pResource->DiscardSlice();
@ -533,7 +525,7 @@ namespace dxvk {
} }
if (doFlags & DoInvalidate) { if (doFlags & DoInvalidate) {
FlushImplicit(TRUE); ConsiderFlush(GpuFlushType::ImplicitWeakHint);
DxvkBufferSliceHandle prevSlice = pResource->GetMappedSlice(Subresource); DxvkBufferSliceHandle prevSlice = pResource->GetMappedSlice(Subresource);
DxvkBufferSliceHandle physSlice = pResource->DiscardSlice(Subresource); DxvkBufferSliceHandle physSlice = pResource->DiscardSlice(Subresource);
@ -809,14 +801,14 @@ namespace dxvk {
// We don't have to wait, but misbehaving games may // We don't have to wait, but misbehaving games may
// still try to spin on `Map` until the resource is // still try to spin on `Map` until the resource is
// idle, so we should flush pending commands // idle, so we should flush pending commands
FlushImplicit(FALSE); ConsiderFlush(GpuFlushType::ImplicitSynchronization);
return false; return false;
} }
} else { } else {
if (isInUse) { if (isInUse) {
// Make sure pending commands using the resource get // Make sure pending commands using the resource get
// executed on the the GPU if we have to wait for it // executed on the the GPU if we have to wait for it
Flush(); ExecuteFlush(GpuFlushType::ImplicitSynchronization, nullptr);
SynchronizeCsThread(SequenceNumber); SynchronizeCsThread(SequenceNumber);
m_device->waitForResource(Resource, access); m_device->waitForResource(Resource, access);
@ -838,7 +830,7 @@ namespace dxvk {
uint64_t sequenceNumber = GetCurrentSequenceNumber(); uint64_t sequenceNumber = GetCurrentSequenceNumber();
pResource->TrackSequenceNumber(Subresource, sequenceNumber); pResource->TrackSequenceNumber(Subresource, sequenceNumber);
FlushImplicit(TRUE); ConsiderFlush(GpuFlushType::ImplicitStrongHint);
} }
@ -847,7 +839,7 @@ namespace dxvk {
uint64_t sequenceNumber = GetCurrentSequenceNumber(); uint64_t sequenceNumber = GetCurrentSequenceNumber();
pResource->TrackSequenceNumber(sequenceNumber); pResource->TrackSequenceNumber(sequenceNumber);
FlushImplicit(TRUE); ConsiderFlush(GpuFlushType::ImplicitStrongHint);
} }
@ -864,40 +856,50 @@ namespace dxvk {
} }
void D3D11ImmediateContext::FlushImplicit(BOOL StrongHint) { void D3D11ImmediateContext::ConsiderFlush(
// Flush only if the GPU is about to go idle, in GpuFlushType FlushType) {
// order to keep the number of submissions low. uint64_t chunkId = GetCurrentSequenceNumber();
uint32_t pending = m_device->pendingSubmissions(); uint64_t submissionId = m_submissionFence->value();
if (StrongHint || pending <= MaxPendingSubmits) { if (m_flushTracker.considerFlush(FlushType, chunkId, submissionId))
auto now = dxvk::high_resolution_clock::now(); ExecuteFlush(FlushType, nullptr);
uint32_t delay = MinFlushIntervalUs
+ IncFlushIntervalUs * pending;
// Prevent flushing too often in short intervals.
if (now - m_lastFlush >= std::chrono::microseconds(delay))
Flush();
}
} }
void D3D11ImmediateContext::SignalEvent(HANDLE hEvent) { void D3D11ImmediateContext::ExecuteFlush(
uint64_t value = ++m_eventCount; GpuFlushType FlushType,
HANDLE hEvent) {
// Flush init context so that new resources are fully initialized
// before the app can access them in any way. This has to happen
// unconditionally since we may otherwise deadlock on Map.
m_parent->FlushInitContext();
if (m_eventSignal == nullptr) // Exit early if there's nothing to do
m_eventSignal = new sync::CallbackFence(); if (!GetPendingCsChunks() && !hEvent)
return;
m_eventSignal->setCallback(value, [hEvent] { // Signal the submission fence and flush the command list
SetEvent(hEvent); uint64_t submissionId = ++m_submissionId;
});
if (hEvent) {
m_submissionFence->setCallback(submissionId, [hEvent] {
SetEvent(hEvent);
});
}
EmitCs([ EmitCs([
cSignal = m_eventSignal, cSubmissionFence = m_submissionFence,
cValue = value cSubmissionId = submissionId
] (DxvkContext* ctx) { ] (DxvkContext* ctx) {
ctx->signal(cSignal, cValue); ctx->signal(cSubmissionFence, cSubmissionId);
ctx->flushCommandList();
}); });
FlushCsChunk();
// Notify flush tracker about the flush
m_flushSeqNum = m_csSeqNum;
m_flushTracker.notifyFlush(m_flushSeqNum, submissionId);
} }
} }

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "../util/util_flush.h"
#include "../util/util_time.h" #include "../util/util_time.h"
#include "../util/sync/sync_signal.h" #include "../util/sync/sync_signal.h"
@ -89,20 +90,18 @@ namespace dxvk {
DxvkCsThread m_csThread; DxvkCsThread m_csThread;
uint64_t m_csSeqNum = 0ull; uint64_t m_csSeqNum = 0ull;
Rc<sync::CallbackFence> m_eventSignal;
uint64_t m_eventCount = 0ull;
uint32_t m_mappedImageCount = 0u; uint32_t m_mappedImageCount = 0u;
VkDeviceSize m_maxImplicitDiscardSize = 0ull; VkDeviceSize m_maxImplicitDiscardSize = 0ull;
Rc<sync::CallbackFence> m_submissionFence;
uint64_t m_submissionId = 0ull;
uint64_t m_flushSeqNum = 0ull; uint64_t m_flushSeqNum = 0ull;
GpuFlushTracker m_flushTracker;
D3D10Multithread m_multithread;
dxvk::high_resolution_clock::time_point m_lastFlush D3D11VideoContext m_videoContext;
= dxvk::high_resolution_clock::now();
D3D10Multithread m_multithread;
D3D11VideoContext m_videoContext;
Com<D3D11DeviceContextState, false> m_stateObject; Com<D3D11DeviceContextState, false> m_stateObject;
@ -133,21 +132,21 @@ namespace dxvk {
const D3D11_COMMON_TEXTURE_REGION* pRegion); const D3D11_COMMON_TEXTURE_REGION* pRegion);
void UpdateMappedBuffer( void UpdateMappedBuffer(
D3D11Buffer* pDstBuffer, D3D11Buffer* pDstBuffer,
UINT Offset, UINT Offset,
UINT Length, UINT Length,
const void* pSrcData, const void* pSrcData,
UINT CopyFlags); UINT CopyFlags);
void SynchronizeDevice(); void SynchronizeDevice();
void EndFrame(); void EndFrame();
bool WaitForResource( bool WaitForResource(
const Rc<DxvkResource>& Resource, const Rc<DxvkResource>& Resource,
uint64_t SequenceNumber, uint64_t SequenceNumber,
D3D11_MAP MapType, D3D11_MAP MapType,
UINT MapFlags); UINT MapFlags);
void EmitCsChunk(DxvkCsChunkRef&& chunk); void EmitCsChunk(DxvkCsChunkRef&& chunk);
@ -162,9 +161,12 @@ namespace dxvk {
uint64_t GetPendingCsChunks(); uint64_t GetPendingCsChunks();
void FlushImplicit(BOOL StrongHint); void ConsiderFlush(
GpuFlushType FlushType);
void SignalEvent(HANDLE hEvent); void ExecuteFlush(
GpuFlushType FlushType,
HANDLE hEvent);
}; };