From ae68e3a5bc450512034694344360869eff4204fb Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Sat, 11 Jan 2020 04:12:59 +0000 Subject: [PATCH] [d3d9] Defer managed texture uploads until PrepareDraw and when needed This also caches shader masks used for hazard tracking. --- src/d3d9/d3d9_common_texture.h | 12 +++++ src/d3d9/d3d9_device.cpp | 99 +++++++++++++++++++++++++--------- src/d3d9/d3d9_device.h | 11 ++-- src/d3d9/d3d9_shader.cpp | 7 +++ src/d3d9/d3d9_util.h | 3 ++ 5 files changed, 105 insertions(+), 27 deletions(-) diff --git a/src/d3d9/d3d9_common_texture.h b/src/d3d9/d3d9_common_texture.h index fe046844a..5f9aaae02 100644 --- a/src/d3d9/d3d9_common_texture.h +++ b/src/d3d9/d3d9_common_texture.h @@ -357,6 +357,15 @@ namespace dxvk { UINT Lod, VkImageUsageFlags UsageFlags, bool Srgb); + D3D9SubresourceBitset& GetUploadBitmask() { return m_needsUpload; } + + void SetUploading(UINT Subresource, bool uploading) { m_uploading.set(Subresource, uploading); } + void ClearUploading() { m_uploading.clearAll(); } + bool GetUploading(UINT Subresource) const { return m_uploading.get(Subresource); } + + void SetNeedsUpload(UINT Subresource, bool upload) { m_needsUpload.set(Subresource, upload); } + bool NeedsAnyUpload() { return m_needsUpload.any(); } + void ClearNeedsUpload() { return m_needsUpload.clearAll(); } private: @@ -392,6 +401,9 @@ namespace dxvk { D3D9SubresourceBitset m_dirty = { }; + D3D9SubresourceBitset m_uploading = { }; + D3D9SubresourceBitset m_needsUpload = { }; + /** * \brief Mip level * \returns Size of packed mip level in bytes diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index e506a6fec..39a15bbc4 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -2662,7 +2662,11 @@ namespace dxvk { BindShader( GetCommonShader(shader), GetVertexShaderPermutation()); + + m_vsShaderMasks = newShader->GetShaderMask(); } + else + m_vsShaderMasks = D3D9ShaderMasks(); m_flags.set(D3D9DeviceFlag::DirtyInputLayout); @@ -2987,6 +2991,15 @@ namespace dxvk { BindShader( GetCommonShader(shader), GetPixelShaderPermutation()); + + m_psShaderMasks = newShader->GetShaderMask(); + } + else { + // TODO: What fixed function textures are in use? + // Currently we are making all 8 of them as in use here. + + // The RT output is always 0 for fixed function. + m_psShaderMasks = FixedFunctionMask; } UpdateActiveHazards(); @@ -3498,9 +3511,7 @@ namespace dxvk { BindTexture(StateSampler); - // We only care about PS samplers - if (likely(StateSampler <= caps::MaxSamplers)) - UpdateActiveRTTextures(StateSampler); + UpdateActiveTextures(StateSampler); return D3D_OK; } @@ -3902,14 +3913,17 @@ namespace dxvk { // calling app promises not to overwrite data that is in use // or is reading. Remember! This will only trigger for MANAGED resources // that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting. + const bool uploading = pResource->GetUploading(Subresource); const bool readOnly = Flags & D3DLOCK_READONLY; - const bool skipWait = (readOnly && managed) || scratch || (readOnly && systemmem && !dirty); + const bool skipWait = (managed && !uploading) || (readOnly && managed) || scratch || (readOnly && systemmem && !dirty); if (alloced) std::memset(physSlice.mapPtr, 0, physSlice.length); else if (!skipWait) { if (!WaitForResource(mappedBuffer, Flags)) return D3DERR_WASSTILLDRAWING; + + pResource->ClearUploading(); } } else { @@ -4036,7 +4050,22 @@ namespace dxvk { // Do we have a pending copy? if (!pResource->GetReadOnlyLocked(Subresource)) { // Only flush buffer -> image if we actually have an image - if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) + if (pResource->IsManaged()) { + pResource->SetNeedsUpload(Subresource, true); + + for (uint32_t tex = m_activeTextures; tex; tex &= tex - 1) { + // Guaranteed to not be nullptr... + const uint32_t i = bit::tzcnt(tex); + auto texInfo = GetCommonTexture(m_state.textures[i]); + + if (texInfo == pResource) { + m_activeTexturesToUpload |= 1 << i; + // We can early out here, no need to add another index for this. + break; + } + } + } + else if (pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) this->FlushImage(pResource, Subresource); } @@ -4075,6 +4104,8 @@ namespace dxvk { auto convertFormat = pResource->GetFormatMapping().ConversionFormatInfo; + pResource->SetUploading(Subresource, true); + if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) { EmitCs([ cSrcBuffer = copyBuffer, @@ -4647,20 +4678,6 @@ namespace dxvk { } - inline D3D9ShaderMasks D3D9DeviceEx::GetShaderMasks() { - const auto* shader = GetCommonShader(m_state.pixelShader); - - if (likely(shader != nullptr)) - return shader->GetShaderMask(); - - // TODO: What fixed function textures are in use? - // Currently we are making all 8 of them as in use here. - - // The RT output is always 0 for fixed function. - return D3D9ShaderMasks{ 0b1111111, 0b1 }; - } - - inline void D3D9DeviceEx::UpdateActiveRTs(uint32_t index) { const uint32_t bit = 1 << index; @@ -4675,21 +4692,30 @@ namespace dxvk { } - inline void D3D9DeviceEx::UpdateActiveRTTextures(uint32_t index) { + inline void D3D9DeviceEx::UpdateActiveTextures(uint32_t index) { const uint32_t bit = 1 << index; - m_activeRTTextures &= ~bit; + m_activeRTTextures &= ~bit; + m_activeTextures &= ~bit; + m_activeTexturesToUpload &= ~bit; auto tex = GetCommonTexture(m_state.textures[index]); - if (tex != nullptr && tex->IsRenderTarget()) - m_activeRTTextures |= bit; + if (tex != nullptr) { + m_activeTextures |= bit; + + if (unlikely(tex->IsRenderTarget())) + m_activeRTTextures |= bit; + + if (unlikely(tex->NeedsAnyUpload())) + m_activeTexturesToUpload |= bit; + } UpdateActiveHazards(); } inline void D3D9DeviceEx::UpdateActiveHazards() { - auto masks = GetShaderMasks(); + auto masks = m_psShaderMasks; masks.rtMask &= m_activeRTs; masks.samplerMask &= m_activeRTTextures; @@ -4727,6 +4753,26 @@ namespace dxvk { } + void D3D9DeviceEx::UploadManagedTextures(uint32_t mask) { + for (uint32_t tex = mask; tex; tex &= tex - 1) { + // Guaranteed to not be nullptr... + auto texInfo = GetCommonTexture(m_state.textures[bit::tzcnt(tex)]); + + for (uint32_t i = 0; i < texInfo->GetUploadBitmask().dwordCount(); i++) { + for (uint32_t subresources = texInfo->GetUploadBitmask().dword(i); subresources; subresources &= subresources - 1) { + uint32_t subresource = i * 32 + bit::tzcnt(subresources); + + this->FlushImage(texInfo, subresource); + } + } + + texInfo->ClearNeedsUpload(); + } + + m_activeTexturesToUpload = 0; + } + + template void D3D9DeviceEx::UpdatePointMode() { if constexpr (!Points) { @@ -5382,6 +5428,11 @@ namespace dxvk { FlushBuffer(vbo); } + uint32_t texturesToUpload = m_activeTexturesToUpload; + texturesToUpload &= m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask; + if (unlikely(texturesToUpload != 0)) + UploadManagedTextures(texturesToUpload); + auto* ibo = GetCommonBuffer(m_state.indices); if (ibo != nullptr && ibo->NeedsUpload()) FlushBuffer(ibo); diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 47544f080..fb8256005 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -734,16 +734,16 @@ namespace dxvk { void Flush(); - D3D9ShaderMasks GetShaderMasks(); - void UpdateActiveRTs(uint32_t index); - void UpdateActiveRTTextures(uint32_t index); + void UpdateActiveTextures(uint32_t index); void UpdateActiveHazards(); void MarkRenderHazards(); + void UploadManagedTextures(uint32_t mask); + template void UpdatePointMode(); @@ -1024,6 +1024,11 @@ namespace dxvk { uint32_t m_activeRTTextures = 0; uint32_t m_activeHazards = 0; uint32_t m_alphaSwizzleRTs = 0; + uint32_t m_activeTextures = 0; + uint32_t m_activeTexturesToUpload = 0; + + D3D9ShaderMasks m_vsShaderMasks = D3D9ShaderMasks(); + D3D9ShaderMasks m_psShaderMasks = FixedFunctionMask; D3D9ViewportInfo m_viewportInfo; diff --git a/src/d3d9/d3d9_shader.cpp b/src/d3d9/d3d9_shader.cpp index a3e1cf6bd..2e40737bc 100644 --- a/src/d3d9/d3d9_shader.cpp +++ b/src/d3d9/d3d9_shader.cpp @@ -61,6 +61,13 @@ namespace dxvk { m_shaders = pModule->compile(*pDxsoModuleInfo, name, AnalysisInfo, constantLayout); m_isgn = pModule->isgn(); m_usedSamplers = pModule->usedSamplers(); + + // Shift up these sampler bits so we can just + // do an or per-draw in the device. + // We shift by 17 because 16 ps samplers + 1 dmap (tess) + if (ShaderStage == VK_SHADER_STAGE_VERTEX_BIT) + m_usedSamplers <<= 17; + m_usedRTs = pModule->usedRTs(); m_info = pModule->info(); diff --git a/src/d3d9/d3d9_util.h b/src/d3d9/d3d9_util.h index 43d8e4562..4ce66e557 100644 --- a/src/d3d9/d3d9_util.h +++ b/src/d3d9/d3d9_util.h @@ -18,6 +18,9 @@ namespace dxvk { uint32_t rtMask; }; + static constexpr D3D9ShaderMasks FixedFunctionMask = + { 0b1111111, 0b1 }; + struct D3D9MipFilter { bool MipsEnabled; VkSamplerMipmapMode MipFilter;