diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 561a6436..fe8d16f0 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -4018,12 +4018,91 @@ namespace dxvk { D3D9BufferSlice D3D9DeviceEx::AllocStagingBuffer(VkDeviceSize size) { + m_stagingBufferAllocated += size; + D3D9BufferSlice result; result.slice = m_stagingBuffer.alloc(256, size); result.mapPtr = result.slice.mapPtr(0); return result; } + + void D3D9DeviceEx::EmitStagingBufferMarker() { + if (m_stagingBufferLastAllocated == m_stagingBufferAllocated) + return; + + D3D9StagingBufferMarkerPayload payload; + payload.sequenceNumber = GetCurrentSequenceNumber(); + payload.allocated = m_stagingBufferAllocated; + m_stagingBufferLastAllocated = m_stagingBufferAllocated; + + Rc marker = new D3D9StagingBufferMarker(payload); + m_stagingBufferMarkers.push(marker); + + EmitCs([ + cMarker = std::move(marker) + ] (DxvkContext* ctx) { + ctx->insertMarker(cMarker); + }); + } + + + void D3D9DeviceEx::WaitStagingBuffer() { + // The number below is not a hard limit, however we can be reasonably + // sure that there will never be more than two additional staging buffers + // in flight in addition to the number of staging buffers specified here. + constexpr VkDeviceSize maxStagingMemoryInFlight = env::is32BitHostPlatform() + ? StagingBufferSize * 4 + : StagingBufferSize * 16; + + // If the game uploads a significant amount of data at once, it's + // possible that we exceed the limit while the queue is empty. In + // that case, enforce a flush early to populate the marker queue. + bool didFlush = false; + + if (m_stagingBufferLastSignaled + maxStagingMemoryInFlight < m_stagingBufferAllocated + && m_stagingBufferMarkers.empty()) { + Flush(); + didFlush = true; + } + + // Process the marker queue. We'll remove as many markers as we + // can without stalling, and will stall until we're below the + // allocation limit again. + uint64_t lastSequenceNumber = m_csThread.lastSequenceNumber(); + + while (!m_stagingBufferMarkers.empty()) { + const auto& marker = m_stagingBufferMarkers.front(); + const auto& payload = marker->payload(); + + bool needsStall = m_stagingBufferLastSignaled + maxStagingMemoryInFlight < m_stagingBufferAllocated; + + if (payload.sequenceNumber > lastSequenceNumber) { + if (!needsStall) + break; + + m_csThread.synchronize(payload.sequenceNumber); + lastSequenceNumber = payload.sequenceNumber; + } + + if (marker->isInUse(DxvkAccess::Read)) { + if (!needsStall) + break; + + if (!didFlush) { + Flush(); + didFlush = true; + } + + m_dxvkDevice->waitForResource(marker, DxvkAccess::Read); + } + + m_stagingBufferLastSignaled = marker->payload().allocated; + m_stagingBufferMarkers.pop(); + } + } + + bool D3D9DeviceEx::ShouldRecord() { return m_recorder != nullptr && !m_recorder->IsApplying(); } @@ -4419,8 +4498,9 @@ namespace dxvk { UINT SrcSubresource, VkOffset3D SrcOffset, VkExtent3D SrcExtent, - VkOffset3D DestOffset - ) { + VkOffset3D DestOffset) { + WaitStagingBuffer(); + const Rc image = pDestTexture->GetImage(); // Now that data has been written into the buffer, @@ -4662,6 +4742,8 @@ namespace dxvk { HRESULT D3D9DeviceEx::FlushBuffer( D3D9CommonBuffer* pResource) { + WaitStagingBuffer(); + auto dstBuffer = pResource->GetBufferSlice(); auto srcSlice = pResource->GetMappedSlice(); @@ -5096,6 +5178,8 @@ namespace dxvk { m_initializer->Flush(); m_converter->Flush(); + EmitStagingBufferMarker(); + if (m_csIsBusy || !m_csChunk->empty()) { // Add commands to flush the threaded // context, then flush the command list diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 0df0496f..6486f367 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -96,6 +96,13 @@ namespace dxvk { void* mapPtr = nullptr; }; + struct D3D9StagingBufferMarkerPayload { + uint64_t sequenceNumber; + VkDeviceSize allocated; + }; + + using D3D9StagingBufferMarker = DxvkMarker; + class D3D9DeviceEx final : public ComObjectClamp { constexpr static uint32_t DefaultFrameLatency = 3; constexpr static uint32_t MaxFrameLatency = 20; @@ -977,6 +984,10 @@ namespace dxvk { D3D9BufferSlice AllocStagingBuffer(VkDeviceSize size); + void EmitStagingBufferMarker(); + + void WaitStagingBuffer(); + bool ShouldRecord(); HRESULT CreateShaderModule( @@ -1188,6 +1199,10 @@ namespace dxvk { void* m_upBufferMapPtr = nullptr; DxvkStagingBuffer m_stagingBuffer; + VkDeviceSize m_stagingBufferAllocated = 0ull; + VkDeviceSize m_stagingBufferLastAllocated = 0ull; + VkDeviceSize m_stagingBufferLastSignaled = 0ull; + std::queue> m_stagingBufferMarkers; D3D9Cursor m_cursor;