From e6f89062f5a5af96c4ae28d2ac441e54fbe94068 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Fri, 27 Sep 2024 22:53:56 +0200 Subject: [PATCH] [d3d9] Ensure that we stay below the maximum sampler count --- src/d3d9/d3d9_device.cpp | 74 +++++++++++++++++++++++++++++++++++++++- src/d3d9/d3d9_device.h | 15 ++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index fb7af1bc..49a44743 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -6642,11 +6642,14 @@ namespace dxvk { samplerInfo.first, DxsoBindingType::Image, samplerInfo.second); + m_samplerBindCount++; + EmitCs([this, cSlot = slot, cState = D3D9SamplerInfo(m_state.samplerStates[Sampler]), cIsCube = bool(m_cubeTextures & (1u << Sampler)), - cIsDepth = bool(m_depthTextures & (1u << Sampler)) + cIsDepth = bool(m_depthTextures & (1u << Sampler)), + cBindId = m_samplerBindCount ] (DxvkContext* ctx) { DxvkSamplerKey key = { }; @@ -6697,6 +6700,10 @@ namespace dxvk { VkShaderStageFlags stage = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; ctx->bindResourceSampler(stage, cSlot, m_dxvkDevice->createSampler(key)); + + // Let the main thread know about current sampler stats + uint64_t liveCount = m_dxvkDevice->getSamplerStats().liveCount; + m_lastSamplerStats.store(liveCount | (cBindId << SamplerCountBits), std::memory_order_relaxed); }); } @@ -6741,6 +6748,8 @@ namespace dxvk { void D3D9DeviceEx::UndirtySamplers(uint32_t mask) { + EnsureSamplerLimit(); + for (uint32_t i : bit::BitMask(mask)) BindSampler(i); @@ -6972,6 +6981,69 @@ namespace dxvk { } + void D3D9DeviceEx::EnsureSamplerLimit() { + constexpr uint32_t MaxSamplerCount = DxvkSamplerPool::MaxSamplerCount - SamplerCount; + + // Maximum possible number of live samplers we can have + // since last reading back from the CS thread. + if (likely(m_lastSamplerLiveCount + m_samplerBindCount - m_lastSamplerBindCount <= MaxSamplerCount)) + return; + + // Update current stats from CS thread and check again. We + // don't want to do this every time due to potential cache + // thrashing. + uint64_t lastStats = m_lastSamplerStats.load(std::memory_order_relaxed); + m_lastSamplerLiveCount = lastStats & SamplerCountMask; + m_lastSamplerBindCount = lastStats >> SamplerCountBits; + + if (likely(m_lastSamplerLiveCount + m_samplerBindCount - m_lastSamplerBindCount <= MaxSamplerCount)) + return; + + // If we have a large number of sampler updates in flight, wait for + // the CS thread to complete some and re-evaluate. We should not hit + // this path under normal gameplay conditions. + ConsiderFlush(GpuFlushType::ImplicitSynchronization); + + uint64_t sequenceNumber = m_csThread.lastSequenceNumber(); + + while (++sequenceNumber <= GetCurrentSequenceNumber()) { + SynchronizeCsThread(sequenceNumber); + + uint64_t lastStats = m_lastSamplerStats.load(std::memory_order_relaxed); + m_lastSamplerLiveCount = lastStats & SamplerCountMask; + m_lastSamplerBindCount = lastStats >> SamplerCountBits; + + if (m_lastSamplerLiveCount + m_samplerBindCount - m_lastSamplerBindCount <= MaxSamplerCount) + return; + } + + // If we end up here, the game somehow managed to queue up so + // many samplers that we need to wait for the GPU to free some. + // We should absolutely never hit this path in the real world. + Logger::warn("Sampler pool exhausted, synchronizing with GPU."); + + Flush(); + SynchronizeCsThread(DxvkCsThread::SynchronizeAll); + + uint64_t submissionId = m_submissionFence->value(); + + while (++submissionId <= m_submissionId) { + m_submissionFence->wait(submissionId); + + // Need to manually update sampler stats here since we + // might otherwise hit this path again the next time + auto samplerStats = m_dxvkDevice->getSamplerStats(); + m_lastSamplerStats = samplerStats.liveCount | (m_samplerBindCount << SamplerCountBits); + + if (samplerStats.liveCount <= MaxSamplerCount) + return; + } + + // If we end up *here*, good luck. + Logger::warn("Sampler pool exhausted, cannot create any new samplers."); + } + + template void D3D9DeviceEx::BindShader( const D3D9CommonShader* pShaderModule) { diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 5b935c7a..480f29d7 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -917,6 +917,8 @@ namespace dxvk { void PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool UploadVBOs, bool UploadIBOs); + void EnsureSamplerLimit(); + template void BindShader( const D3D9CommonShader* pShaderModule); @@ -1458,6 +1460,19 @@ namespace dxvk { D3D9VkInteropDevice m_d3d9Interop; D3D9On12 m_d3d9On12; DxvkD3D8Bridge m_d3d8Bridge; + + // Sampler statistics + constexpr static uint32_t SamplerCountBits = 12u; + constexpr static uint64_t SamplerCountMask = (1u << SamplerCountBits) - 1u; + + uint64_t m_samplerBindCount = 0u; + + uint64_t m_lastSamplerLiveCount = 0u; + uint64_t m_lastSamplerBindCount = 0u; + + // Written by CS thread + alignas(CACHE_LINE_SIZE) + std::atomic m_lastSamplerStats = { 0u }; }; }