From 8eeff90e0af5f8d0838a5028d30e74bdeea643a6 Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Sat, 19 Jun 2021 02:49:24 +0200 Subject: [PATCH] [d3d9] Only use staging buffers for uploads once we've stalled on the resource --- src/d3d9/d3d9_common_buffer.h | 10 ++++++ src/d3d9/d3d9_common_texture.h | 7 ++++ src/d3d9/d3d9_device.cpp | 58 ++++++++++++++++++++++++---------- 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/src/d3d9/d3d9_common_buffer.h b/src/d3d9/d3d9_common_buffer.h index 94d61d2fb..e9b58793b 100644 --- a/src/d3d9/d3d9_common_buffer.h +++ b/src/d3d9/d3d9_common_buffer.h @@ -173,6 +173,15 @@ namespace dxvk { */ bool NeedsUpload() { return m_desc.Pool != D3DPOOL_DEFAULT && !m_dirtyRange.IsDegenerate(); } + bool DoesStagingBufferUploads() const { return m_uploadUsingStaging; } + + void EnableStagingBufferUploads() { + if (GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) + return; + + m_uploadUsingStaging = true; + } + void PreLoad(); private: @@ -196,6 +205,7 @@ namespace dxvk { const D3D9_BUFFER_DESC m_desc; DWORD m_mapFlags; bool m_wasWrittenByGPU = false; + bool m_uploadUsingStaging = false; Rc m_buffer; Rc m_stagingBuffer; diff --git a/src/d3d9/d3d9_common_texture.h b/src/d3d9/d3d9_common_texture.h index 3767184bc..f40ecf9fe 100644 --- a/src/d3d9/d3d9_common_texture.h +++ b/src/d3d9/d3d9_common_texture.h @@ -358,6 +358,11 @@ namespace dxvk { bool NeedsUpload(UINT Subresource) const { return m_needsUpload.get(Subresource); } bool NeedsAnyUpload() { return m_needsUpload.any(); } void ClearNeedsUpload() { return m_needsUpload.clearAll(); } + bool DoesStagingBufferUploads(UINT Subresource) const { return m_uploadUsingStaging.get(Subresource); } + + void EnableStagingBufferUploads(UINT Subresource) { + m_uploadUsingStaging.set(Subresource, true); + } void SetNeedsMipGen(bool value) { m_needsMipGen = value; } bool NeedsMipGen() const { return m_needsMipGen; } @@ -442,6 +447,8 @@ namespace dxvk { D3D9SubresourceBitset m_needsUpload = { }; + D3D9SubresourceBitset m_uploadUsingStaging = { }; + DWORD m_exposedMipLevels = 0; bool m_needsMipGen = false; diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 025ecb499..07671f54f 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -4114,12 +4114,17 @@ namespace dxvk { // calling app promises not to overwrite data that is in use // or is reading. Remember! This will only trigger for MANAGED resources // that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting. - const bool skipWait = scratch || managed || (systemmem && !wasWrittenByGPU); + const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(Subresource); + const bool skipWait = (scratch || managed || (systemmem && !wasWrittenByGPU)) + && (usesStagingBuffer || readOnly); if (alloced) { std::memset(physSlice.mapPtr, 0, physSlice.length); } else if (!skipWait) { + if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappedBuffer, D3DLOCK_DONOTWAIT)) + pResource->EnableStagingBufferUploads(Subresource); + if (!WaitForResource(mappedBuffer, Flags)) return D3DERR_WASSTILLDRAWING; } @@ -4354,31 +4359,41 @@ namespace dxvk { scaledAlignedBoxExtent.height = std::min(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height); scaledAlignedBoxExtent.depth = std::min(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth); - VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize; - D3D9BufferSlice slice = AllocTempBuffer(dirtySize); VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize); VkDeviceSize copySrcOffset = (boxOffsetBlockCount.z * texLevelExtentBlockCount.height * texLevelExtentBlockCount.width + boxOffsetBlockCount.y * texLevelExtentBlockCount.width + boxOffsetBlockCount.x) * formatInfo->elementSize; - VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); - void* srcData = reinterpret_cast(srcSlice.mapPtr) + copySrcOffset; - util::packImageData( - slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize, - pitch, pitch * texLevelExtentBlockCount.height); + VkDeviceSize rowAlignment = 0; + DxvkBufferSlice copySrcSlice; + if (pResource->DoesStagingBufferUploads(Subresource)) { + VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize; + VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); + D3D9BufferSlice slice = AllocTempBuffer(dirtySize); + copySrcSlice = slice.slice; + void* srcData = reinterpret_cast(srcSlice.mapPtr) + copySrcOffset; + util::packImageData( + slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize, + pitch, pitch * texLevelExtentBlockCount.height); + } else { + copySrcSlice = DxvkBufferSlice(pResource->GetBuffer(Subresource), copySrcOffset, srcSlice.length); + rowAlignment = 4; + } EmitCs([ - cSrcSlice = slice.slice, + cSrcSlice = std::move(copySrcSlice), cDstImage = image, cDstLayers = dstLayers, cDstLevelExtent = scaledAlignedBoxExtent, - cOffset = scaledBoxOffset + cOffset = scaledBoxOffset, + cRowAlignment = rowAlignment ] (DxvkContext* ctx) { ctx->copyBufferToImage( cDstImage, cDstLayers, cOffset, cDstLevelExtent, - cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0); + cSrcSlice.buffer(), cSrcSlice.offset(), + cRowAlignment, 0); }); } else { @@ -4389,6 +4404,7 @@ namespace dxvk { // TODO: PLEASE CLEAN ME texLevelExtentBlockCount.height *= std::min(convertFormat.PlaneCount, 2u); + // the converter can not handle the 4 aligned pitch so we always repack into a staging buffer D3D9BufferSlice slice = AllocTempBuffer(srcSlice.length); VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4); @@ -4508,8 +4524,12 @@ namespace dxvk { const bool readOnly = Flags & D3DLOCK_READONLY; const bool noOverlap = !pResource->GPUReadingRange().Overlaps(lockRange); const bool noOverwrite = Flags & D3DLOCK_NOOVERWRITE; - const bool skipWait = (!wasWrittenByGPU && (readOnly || noOverlap)) || noOverwrite; + const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(); + const bool skipWait = (!wasWrittenByGPU && (usesStagingBuffer || readOnly || noOverlap)) || noOverwrite; if (!skipWait) { + if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappingBuffer, D3DLOCK_DONOTWAIT)) + pResource->EnableStagingBufferUploads(); + if (!WaitForResource(mappingBuffer, Flags)) return D3DERR_WASSTILLDRAWING; @@ -4545,13 +4565,19 @@ namespace dxvk { D3D9Range& range = pResource->DirtyRange(); - D3D9BufferSlice slice = AllocTempBuffer(range.max - range.min); - void* srcData = reinterpret_cast(srcSlice.mapPtr) + range.min; - memcpy(slice.mapPtr, srcData, range.max - range.min); + DxvkBufferSlice copySrcSlice; + if (pResource->DoesStagingBufferUploads()) { + D3D9BufferSlice slice = AllocTempBuffer(range.max - range.min); + copySrcSlice = slice.slice; + void* srcData = reinterpret_cast(srcSlice.mapPtr) + range.min; + memcpy(slice.mapPtr, srcData, range.max - range.min); + } else { + copySrcSlice = DxvkBufferSlice(pResource->GetBuffer(), range.min, range.max - range.min); + } EmitCs([ cDstSlice = dstBuffer, - cSrcSlice = slice.slice, + cSrcSlice = copySrcSlice, cDstOffset = range.min, cLength = range.max - range.min ] (DxvkContext* ctx) {