From 4261ff6ec1aa7efdc6d7bf6a5b9434c2eb6cc184 Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Fri, 2 Apr 2021 01:37:33 +0200 Subject: [PATCH] [d3d9] Use staging buffer for managed copies --- src/d3d9/d3d9_device.cpp | 127 ++++++++++++++++++------------- src/d3d9/d3d9_format_helpers.cpp | 18 +++-- src/d3d9/d3d9_format_helpers.h | 4 +- 3 files changed, 89 insertions(+), 60 deletions(-) diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 6602417a8..4d64516f8 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -51,7 +51,7 @@ namespace dxvk { , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() ) , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? TRUE : FALSE ) { // If we can SWVP, then we use an extended constant set - // as SWVP has many more slots available than HWVP. + // as SWVP has many more slots available than HWVP. bool canSWVP = CanSWVP(); DetermineConstantLayouts(canSWVP); @@ -656,6 +656,8 @@ namespace dxvk { VkOffset3D srcBlockOffset = { 0u, 0u, 0u }; VkOffset3D dstOffset = { 0u, 0u, 0u }; + VkExtent3D texLevelExtent = srcTextureInfo->GetExtentMip(src->GetSubresource()); + VkExtent3D texLevelBlockCount = util::computeBlockCount(texLevelExtent, formatInfo->blockSize); VkExtent3D copyExtent = srcTextureInfo->GetExtentMip(src->GetSubresource()); @@ -678,7 +680,11 @@ namespace dxvk { const auto dstSubresource = vk::makeSubresourceLayers( dstTextureInfo->GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT, dst->GetSubresource())); - Rc srcBuffer = srcTextureInfo->GetBuffer(src->GetSubresource()); + DxvkBufferSliceHandle srcSlice = srcTextureInfo->GetMappedSlice(src->GetSubresource()); + D3D9BufferSlice slice = AllocTempBuffer(srcSlice.length); + util::packImageData( + slice.mapPtr, srcSlice.mapPtr, texLevelBlockCount, formatInfo->elementSize, + texLevelBlockCount.width * formatInfo->elementSize, texLevelBlockCount.width * texLevelBlockCount.height * formatInfo->elementSize); Rc dstImage = dstTextureInfo->GetImage(); VkExtent3D levelExtent = srcTextureInfo->GetExtentMip(src->GetSubresource()); @@ -692,7 +698,7 @@ namespace dxvk { EmitCs([ cDstImage = std::move(dstImage), - cSrcBuffer = std::move(srcBuffer), + cSrcSlice = slice.slice, cDstLayers = dstSubresource, cDstOffset = dstOffset, cSrcOffset = srcByteOffset, @@ -701,7 +707,7 @@ namespace dxvk { ] (DxvkContext* ctx) { ctx->copyBufferToImage( cDstImage, cDstLayers, cDstOffset, cCopyExtent, - cSrcBuffer, cSrcOffset, + cSrcSlice.buffer(), cSrcSlice.offset() + cSrcOffset, cSrcExtent); }); @@ -745,7 +751,6 @@ namespace dxvk { continue; for (uint32_t m = 0; m < mipLevels; m++) { - Rc srcBuffer = srcTexInfo->GetBuffer(srcTexInfo->CalcSubresource(a, m)); VkImageSubresourceLayers dstLayers = { VK_IMAGE_ASPECT_COLOR_BIT, m, a, 1 }; VkOffset3D scaledBoxOffset = { @@ -770,9 +775,19 @@ namespace dxvk { VkExtent2D srcExtent = VkExtent2D{ texLevelExtentBlockCount.width * formatInfo->blockSize.width, texLevelExtentBlockCount.height * formatInfo->blockSize.height }; + scaledAlignedBoxExtent.width = std::min(texLevelExtent.width, scaledAlignedBoxExtent.width); + scaledAlignedBoxExtent.height = std::min(texLevelExtent.height, scaledAlignedBoxExtent.height); + scaledAlignedBoxExtent.depth = std::min(texLevelExtent.depth, scaledAlignedBoxExtent.depth); + + DxvkBufferSliceHandle srcSlice = srcTexInfo->GetMappedSlice(srcTexInfo->CalcSubresource(a, m)); + D3D9BufferSlice slice = AllocTempBuffer(srcSlice.length); + util::packImageData( + slice.mapPtr, srcSlice.mapPtr, texLevelExtentBlockCount, formatInfo->elementSize, + texLevelExtentBlockCount.width * formatInfo->elementSize, texLevelExtentBlockCount.width * texLevelExtentBlockCount.height * formatInfo->elementSize); + EmitCs([ cDstImage = dstImage, - cSrcBuffer = srcBuffer, + cSrcSlice = slice.slice, cDstLayers = dstLayers, cExtent = scaledAlignedBoxExtent, cOffset = scaledBoxOffset, @@ -782,7 +797,7 @@ namespace dxvk { ctx->copyBufferToImage( cDstImage, cDstLayers, cOffset, cExtent, - cSrcBuffer, cSrcOffset, + cSrcSlice.buffer(), cSrcSlice.offset() + cSrcOffset, cSrcExtent); }); @@ -964,7 +979,7 @@ namespace dxvk { if (unlikely(IsBlitRegionInvalid(blitInfo.dstOffsets, dstExtent))) return D3DERR_INVALIDCALL; - + VkExtent3D srcCopyExtent = { uint32_t(blitInfo.srcOffsets[1].x - blitInfo.srcOffsets[0].x), uint32_t(blitInfo.srcOffsets[1].y - blitInfo.srcOffsets[0].y), @@ -1676,9 +1691,9 @@ namespace dxvk { if (unlikely(ShouldRecord())) return m_recorder->SetClipPlane(Index, pPlane); - + bool dirty = false; - + for (uint32_t i = 0; i < 4; i++) { dirty |= m_state.clipPlanes[Index].coeff[i] != pPlane[i]; m_state.clipPlanes[Index].coeff[i] = pPlane[i]; @@ -1686,10 +1701,10 @@ namespace dxvk { bool enabled = m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1u << Index); dirty &= enabled; - + if (dirty) m_flags.set(D3D9DeviceFlag::DirtyClipPlanes); - + return D3D_OK; } @@ -1699,10 +1714,10 @@ namespace dxvk { if (unlikely(Index >= caps::MaxClipPlanes || !pPlane)) return D3DERR_INVALIDCALL; - + for (uint32_t i = 0; i < 4; i++) pPlane[i] = m_state.clipPlanes[Index].coeff[i]; - + return D3D_OK; } @@ -1819,7 +1834,7 @@ namespace dxvk { UpdateActiveRTs(3); m_flags.set(D3D9DeviceFlag::DirtyBlendState); break; - + case D3DRS_ALPHATESTENABLE: { bool newATOC = IsAlphaToCoverageEnabled(); bool newAlphaTest = IsAlphaTestEnabled(); @@ -2546,7 +2561,7 @@ namespace dxvk { // We unbound the pixel shader before, // let's make sure that gets rebound. m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); - + if (m_state.pixelShader != nullptr) { BindShader( GetCommonShader(m_state.pixelShader), @@ -2911,7 +2926,7 @@ namespace dxvk { if (likely(pStride != nullptr)) *pStride = 0; - + if (unlikely(ppStreamData == nullptr || pOffsetInBytes == nullptr || pStride == nullptr)) return D3DERR_INVALIDCALL; @@ -3619,7 +3634,7 @@ namespace dxvk { DWORD newUsage = newTexture != nullptr ? newTexture->Desc()->Usage : 0; DWORD combinedUsage = oldUsage | newUsage; - + TextureChangePrivate(m_state.textures[StateSampler], pTexture); BindTexture(StateSampler); @@ -3991,7 +4006,7 @@ namespace dxvk { VkExtent3D levelExtent = pResource->GetExtentMip(MipLevel); VkExtent3D blockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize); - + const bool systemmem = desc.Pool == D3DPOOL_SYSTEMMEM; const bool managed = IsPoolManaged(desc.Pool); const bool scratch = desc.Pool == D3DPOOL_SCRATCH; @@ -4276,11 +4291,11 @@ namespace dxvk { HRESULT D3D9DeviceEx::FlushImage( D3D9CommonTexture* pResource, UINT Subresource) { - const Rc image = pResource->GetImage(); + const Rc image = pResource->GetImage(); // Now that data has been written into the buffer, // we need to copy its contents into the image - const Rc copyBuffer = pResource->GetBuffer(Subresource); + const DxvkBufferSliceHandle srcSlice = pResource->GetMappedSlice(Subresource); auto formatInfo = imageFormatInfo(image->info().format); auto subresource = pResource->GetSubresourceFromIndex( @@ -4297,25 +4312,35 @@ namespace dxvk { auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo; if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) { + VkExtent3D texLevelExtentBlockCount = util::computeBlockCount(levelExtent, formatInfo->blockSize); + D3D9BufferSlice slice = AllocTempBuffer(srcSlice.length); + util::packImageData( + slice.mapPtr, srcSlice.mapPtr, texLevelExtentBlockCount, formatInfo->elementSize, + texLevelExtentBlockCount.width * formatInfo->elementSize, texLevelExtentBlockCount.width * texLevelExtentBlockCount.height * formatInfo->elementSize); EmitCs([ - cSrcBuffer = copyBuffer, + cSrcSlice = slice.slice, cDstImage = image, cDstLayers = subresourceLayers, cDstLevelExtent = levelExtent ] (DxvkContext* ctx) { ctx->copyBufferToImage(cDstImage, cDstLayers, VkOffset3D{ 0, 0, 0 }, cDstLevelExtent, - cSrcBuffer, 0, { 0u, 0u }); + cSrcSlice.buffer(), cSrcSlice.offset(), + { 0u, 0u }); }); - } + } else { + D3D9BufferSlice slice = AllocTempBuffer(srcSlice.length); + memcpy(slice.mapPtr, srcSlice.mapPtr, srcSlice.length); + Flush(); SynchronizeCsThread(); m_converter->ConvertFormat( convertFormat, image, subresourceLayers, - copyBuffer); + slice.slice.buffer(), + slice.slice.offset()); } if (pResource->IsAutomaticMip()) @@ -4681,7 +4706,7 @@ namespace dxvk { DxsoProgramType::PixelShader, DxsoConstantBuffers::PSConstantBuffer); - m_vsClipPlanes = + m_vsClipPlanes = CreateConstantBuffer(false, caps::MaxClipPlanes * sizeof(D3D9ClipPlane), DxsoProgramType::VertexShader, @@ -4790,16 +4815,16 @@ namespace dxvk { void D3D9DeviceEx::UpdateClipPlanes() { m_flags.clr(D3D9DeviceFlag::DirtyClipPlanes); - + auto slice = m_vsClipPlanes->allocSlice(); auto dst = reinterpret_cast(slice.mapPtr); - + for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i)) ? m_state.clipPlanes[i] : D3D9ClipPlane(); } - + EmitCs([ cBuffer = m_vsClipPlanes, cSlice = slice @@ -4881,7 +4906,7 @@ namespace dxvk { else Logger::warn("D3D9: Invalid push constant set to update."); } - + void D3D9DeviceEx::Flush() { @@ -5050,7 +5075,7 @@ namespace dxvk { m_activeTexturesToGen &= ~mask; } - + void D3D9DeviceEx::MarkTextureMipsDirty(D3D9CommonTexture* pResource) { pResource->SetNeedsMipGen(true); pResource->MarkAllWrittenByGPU(); @@ -5526,13 +5551,13 @@ namespace dxvk { void D3D9DeviceEx::BindAlphaTestState() { m_flags.clr(D3D9DeviceFlag::DirtyAlphaTestState); - + auto& rs = m_state.renderStates; - + VkCompareOp alphaOp = IsAlphaTestEnabled() ? DecodeCompareOp(D3DCMPFUNC(rs[D3DRS_ALPHAFUNC])) : VK_COMPARE_OP_ALWAYS; - + EmitCs([cAlphaOp = alphaOp] (DxvkContext* ctx) { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaTestEnable, cAlphaOp != VK_COMPARE_OP_ALWAYS); ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, D3D9SpecConstantId::AlphaCompareOp, cAlphaOp); @@ -5715,7 +5740,7 @@ namespace dxvk { void D3D9DeviceEx::UndirtySamplers() { for (uint32_t dirty = m_dirtySamplerStates; dirty; dirty &= dirty - 1) BindSampler(bit::tzcnt(dirty)); - + m_dirtySamplerStates = 0; } @@ -5793,7 +5818,7 @@ namespace dxvk { if (m_flags.test(D3D9DeviceFlag::DirtyBlendState)) BindBlendState(); - + if (m_flags.test(D3D9DeviceFlag::DirtyDepthStencilState)) BindDepthStencilState(); @@ -5802,13 +5827,13 @@ namespace dxvk { if (m_flags.test(D3D9DeviceFlag::DirtyDepthBias)) BindDepthBias(); - + if (m_flags.test(D3D9DeviceFlag::DirtyMultiSampleState)) BindMultiSampleState(); if (m_flags.test(D3D9DeviceFlag::DirtyAlphaTestState)) BindAlphaTestState(); - + if (m_flags.test(D3D9DeviceFlag::DirtyClipPlanes)) UpdateClipPlanes(); @@ -6039,7 +6064,7 @@ namespace dxvk { // out attributes and bindings not used by the shader uint32_t attrCount = CompactSparseList(attrList.data(), attrMask); uint32_t bindCount = CompactSparseList(bindList.data(), bindMask); - + ctx->setInputLayout( attrCount, attrList.data(), bindCount, bindList.data()); @@ -6055,8 +6080,8 @@ namespace dxvk { UINT Stride) { EmitCs([ cSlotId = Slot, - cBufferSlice = pBuffer != nullptr ? - pBuffer->GetCommonBuffer()->GetBufferSlice(Offset) + cBufferSlice = pBuffer != nullptr ? + pBuffer->GetCommonBuffer()->GetBufferSlice(Offset) : DxvkBufferSlice(), cStride = pBuffer != nullptr ? Stride : 0 ] (DxvkContext* ctx) { @@ -6358,7 +6383,7 @@ namespace dxvk { data->TexcoordMatrices[i] = m_state.transforms[GetTransformIndex(D3DTS_TEXTURE0) + i]; data->ViewportInfo = m_viewportInfo; - + DecodeD3DCOLOR(m_state.renderStates[D3DRS_AMBIENT], data->GlobalAmbient.data); uint32_t lightIdx = 0; @@ -6415,7 +6440,7 @@ namespace dxvk { return 0b100u; // Arg 2 case D3DTOP_MULTIPLYADD: case D3DTOP_LERP: - return 0b111u; // Arg 0, 1, 2 + return 0b111u; // Arg 0, 1, 2 default: return 0b110u; // Arg 1, 2 } @@ -6609,23 +6634,23 @@ namespace dxvk { const D3D9_VK_FORMAT_MAPPING srcFormatInfo = LookupFormat(srcDesc->Format); const D3D9_VK_FORMAT_MAPPING dstFormatInfo = LookupFormat(dstDesc->Format); - + auto srcVulkanFormatInfo = imageFormatInfo(srcFormatInfo.FormatColor); auto dstVulkanFormatInfo = imageFormatInfo(dstFormatInfo.FormatColor); - + const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex( dstVulkanFormatInfo->aspectMask, 0); - + const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex( srcVulkanFormatInfo->aspectMask, src->GetSubresource()); - + const VkImageSubresourceLayers dstSubresourceLayers = { dstSubresource.aspectMask, dstSubresource.mipLevel, dstSubresource.arrayLayer, 1 }; - + const VkImageSubresourceLayers srcSubresourceLayers = { srcSubresource.aspectMask, srcSubresource.mipLevel, @@ -6646,7 +6671,7 @@ namespace dxvk { cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 }, cDstImage->mipLevelExtent(cDstLayers.mipLevel)); }); - } else { + } else { EmitCs([ cDstImage = dstTextureInfo->GetImage(), cSrcImage = srcTextureInfo->GetImage(), @@ -6701,7 +6726,7 @@ namespace dxvk { }); } - + HRESULT D3D9DeviceEx::ResetState(D3DPRESENT_PARAMETERS* pPresentationParameters) { if (!pPresentationParameters->EnableAutoDepthStencil) SetDepthStencilSurface(nullptr); @@ -6770,7 +6795,7 @@ namespace dxvk { rs[D3DRS_TEXTUREFACTOR] = 0xffffffff; m_flags.set(D3D9DeviceFlag::DirtyFFPixelData); - + rs[D3DRS_DIFFUSEMATERIALSOURCE] = D3DMCS_COLOR1; rs[D3DRS_SPECULARMATERIALSOURCE] = D3DMCS_COLOR2; rs[D3DRS_AMBIENTMATERIALSOURCE] = D3DMCS_MATERIAL; diff --git a/src/d3d9/d3d9_format_helpers.cpp b/src/d3d9/d3d9_format_helpers.cpp index 77a1eecb5..9c581e236 100644 --- a/src/d3d9/d3d9_format_helpers.cpp +++ b/src/d3d9/d3d9_format_helpers.cpp @@ -28,33 +28,34 @@ namespace dxvk { D3D9_CONVERSION_FORMAT_INFO conversionFormat, const Rc& dstImage, VkImageSubresourceLayers dstSubresource, - const Rc& srcBuffer) { + const Rc& srcBuffer, + uint32_t srcBufferOffset) { switch (conversionFormat.FormatType) { case D3D9ConversionFormat_YUY2: case D3D9ConversionFormat_UYVY: { uint32_t specConstant = conversionFormat.FormatType == D3D9ConversionFormat_UYVY ? 1 : 0; - ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R32_UINT, specConstant, { 2u, 1u }); + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, srcBufferOffset, VK_FORMAT_R32_UINT, specConstant, { 2u, 1u }); break; } case D3D9ConversionFormat_NV12: - ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R16_UINT, 0, { 2u, 1u }); + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, srcBufferOffset, VK_FORMAT_R16_UINT, 0, { 2u, 1u }); break; case D3D9ConversionFormat_YV12: - ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R8_UINT, 0, { 1u, 1u }); + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, srcBufferOffset, VK_FORMAT_R8_UINT, 0, { 1u, 1u }); break; case D3D9ConversionFormat_L6V5U5: - ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R16_UINT, 0, { 1u, 1u }); + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, srcBufferOffset, VK_FORMAT_R16_UINT, 0, { 1u, 1u }); break; case D3D9ConversionFormat_X8L8V8U8: - ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R32_UINT, 0, { 1u, 1u }); + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, srcBufferOffset, VK_FORMAT_R32_UINT, 0, { 1u, 1u }); break; case D3D9ConversionFormat_A2W10V10U10: - ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R32_UINT, 0, { 1u, 1u }); + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, srcBufferOffset, VK_FORMAT_R32_UINT, 0, { 1u, 1u }); break; default: @@ -68,6 +69,7 @@ namespace dxvk { const Rc& dstImage, VkImageSubresourceLayers dstSubresource, const Rc& srcBuffer, + uint32_t srcBufferOffset, VkFormat bufferFormat, uint32_t specConstantValue, VkExtent2D macroPixelRun) { @@ -89,7 +91,7 @@ namespace dxvk { DxvkBufferViewCreateInfo bufferViewInfo; bufferViewInfo.format = bufferFormat; - bufferViewInfo.rangeOffset = 0; + bufferViewInfo.rangeOffset = srcBufferOffset; bufferViewInfo.rangeLength = srcBuffer->info().size; auto tmpBufferView = m_device->createBufferView(srcBuffer, bufferViewInfo); diff --git a/src/d3d9/d3d9_format_helpers.h b/src/d3d9/d3d9_format_helpers.h index 08019842d..3563e9fdb 100644 --- a/src/d3d9/d3d9_format_helpers.h +++ b/src/d3d9/d3d9_format_helpers.h @@ -19,7 +19,8 @@ namespace dxvk { D3D9_CONVERSION_FORMAT_INFO conversionFormat, const Rc& dstImage, VkImageSubresourceLayers dstSubresource, - const Rc& srcBuffer); + const Rc& srcBuffer, + uint32_t srcBufferOffset); private: @@ -28,6 +29,7 @@ namespace dxvk { const Rc& dstImage, VkImageSubresourceLayers dstSubresource, const Rc& srcBuffer, + uint32_t srcBufferOffset, VkFormat bufferFormat, uint32_t specConstantValue, VkExtent2D macroPixelRun);