From eb999a0194dad71ec79f3e1841370860612b586a Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Fri, 7 Mar 2025 03:51:12 +0100 Subject: [PATCH] [d3d9] Build shader constant UBOs on CS thread --- src/d3d9/d3d9_constant_buffer.cpp | 113 +++++++++- src/d3d9/d3d9_constant_buffer.h | 77 +++++++ src/d3d9/d3d9_constant_set.h | 25 ++- src/d3d9/d3d9_device.cpp | 359 +++++++++++++++++++----------- src/d3d9/d3d9_device.h | 46 ++-- src/d3d9/d3d9_shader.cpp | 1 - src/d3d9/d3d9_shader.h | 3 - src/d3d9/d3d9_state.h | 72 +++--- src/d3d9/d3d9_stateblock.h | 30 ++- src/dxso/dxso_compiler.cpp | 2 +- src/dxso/dxso_compiler.h | 1 - src/dxso/dxso_isgn.h | 1 + src/dxso/dxso_module.cpp | 1 - src/dxso/dxso_module.h | 3 - 14 files changed, 524 insertions(+), 210 deletions(-) diff --git a/src/d3d9/d3d9_constant_buffer.cpp b/src/d3d9/d3d9_constant_buffer.cpp index efb218c0b..3cb75ef4a 100644 --- a/src/d3d9/d3d9_constant_buffer.cpp +++ b/src/d3d9/d3d9_constant_buffer.cpp @@ -35,7 +35,6 @@ namespace dxvk { } - D3D9ConstantBuffer::~D3D9ConstantBuffer() { } @@ -136,4 +135,116 @@ namespace dxvk { device->properties().extRobustness2.robustUniformBufferAccessSizeAlignment); } + + + // Constant Buffer living on the CS thread + + D3D9CSConstantBuffer::D3D9CSConstantBuffer() { + + } + + D3D9CSConstantBuffer::D3D9CSConstantBuffer( + const Rc& Device, + DxsoProgramType ShaderStage, + DxsoConstantBuffers BufferType, + VkDeviceSize Size, + bool UseDeviceLocalBuffer) + : D3D9CSConstantBuffer(Device, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, GetShaderStage(ShaderStage), + computeResourceSlotId(ShaderStage, DxsoBindingType::ConstantBuffer, BufferType), + Size, UseDeviceLocalBuffer) { + + } + + D3D9CSConstantBuffer::D3D9CSConstantBuffer( + const Rc& Device, + VkBufferUsageFlags Usage, + VkShaderStageFlags Stages, + uint32_t ResourceSlot, + VkDeviceSize Size, + bool UseDeviceLocalBuffer) + : m_device (Device) + , m_binding (ResourceSlot) + , m_usage (Usage) + , m_stages (Stages) + , m_size (Size) + , m_align (getAlignment(Device)) + , m_useDeviceLocalBuffer(UseDeviceLocalBuffer) { + + } + + D3D9CSConstantBuffer::~D3D9CSConstantBuffer() { + + } + + + void* D3D9CSConstantBuffer::Alloc(DxvkContext* ctx, VkDeviceSize size) { + if (unlikely(m_buffer == nullptr)) + m_slice = this->createBuffer(ctx); + + size = align(size, m_align); + + if (unlikely(m_offset + size > m_size)) { + Rc newSlice = m_buffer->allocateStorage(); + m_offset = 0; + m_slice = newSlice; + ctx->invalidateBuffer(m_buffer, std::move(newSlice)); + } + + ctx->bindUniformBufferRange(m_stages, m_binding, m_offset, size); + + void* mapPtr = reinterpret_cast(m_slice->mapPtr()) + m_offset; + m_offset += size; + return mapPtr; + } + + + void* D3D9CSConstantBuffer::AllocSlice(DxvkContext* ctx) { + if (unlikely(m_buffer == nullptr)) + m_slice = this->createBuffer(ctx); + else + m_slice = m_buffer->allocateStorage(); + + ctx->invalidateBuffer(m_buffer, std::move(m_slice)); + + return m_slice->mapPtr(); + } + + + Rc D3D9CSConstantBuffer::createBuffer(DxvkContext* ctx) { + // Buffer usage and access flags don't make much of a difference + // in the backend, so set both STORAGE and UNIFORM usage/access. + DxvkBufferCreateInfo bufferInfo; + bufferInfo.size = align(m_size, m_align); + bufferInfo.usage = m_usage; + bufferInfo.access = 0; + bufferInfo.stages = util::pipelineStages(m_stages); + bufferInfo.debugName = "Constant buffer"; + + if (m_usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) + bufferInfo.access |= VK_ACCESS_UNIFORM_READ_BIT; + if (m_usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) + bufferInfo.access |= VK_ACCESS_SHADER_READ_BIT; + + VkMemoryPropertyFlags memoryFlags + = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + if (m_useDeviceLocalBuffer) + memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + m_buffer = m_device->createBuffer(bufferInfo, memoryFlags); + + ctx->bindUniformBuffer(m_stages, m_binding, DxvkBufferSlice(m_buffer)); + + return m_buffer->storage(); + } + + + VkDeviceSize D3D9CSConstantBuffer::getAlignment(const Rc& device) const { + return std::max(std::max( + device->properties().core.properties.limits.minUniformBufferOffsetAlignment, + device->properties().core.properties.limits.minStorageBufferOffsetAlignment), + device->properties().extRobustness2.robustUniformBufferAccessSizeAlignment); + } + } diff --git a/src/d3d9/d3d9_constant_buffer.h b/src/d3d9/d3d9_constant_buffer.h index 00f94cf04..90d42212f 100644 --- a/src/d3d9/d3d9_constant_buffer.h +++ b/src/d3d9/d3d9_constant_buffer.h @@ -1,6 +1,7 @@ #pragma once #include "../dxvk/dxvk_buffer.h" +#include "../dxvk/dxvk_context.h" #include "../dxso/dxso_util.h" @@ -82,4 +83,80 @@ namespace dxvk { }; + + + /** + * \brief Constant buffer living on the CS thread + */ + class D3D9CSConstantBuffer { + + public: + + D3D9CSConstantBuffer(); + + D3D9CSConstantBuffer( + const Rc& Device, + DxsoProgramType ShaderStage, + DxsoConstantBuffers BufferType, + VkDeviceSize Size, + bool UseDeviceLocalBuffer); + + D3D9CSConstantBuffer( + const Rc& Device, + VkBufferUsageFlags Usage, + VkShaderStageFlags Stages, + uint32_t ResourceSlot, + VkDeviceSize Size, + bool UseDeviceLocalBuffer); + + ~D3D9CSConstantBuffer(); + + /** + * \brief Queries alignment + * + * Useful to pad copies with initialized data. + * \returns Data alignment + */ + VkDeviceSize GetAlignment() const { + return m_align; + } + + /** + * \brief Allocates a given amount of memory + * + * \param [in] size Number of bytes to allocate + * \returns Map pointer of the allocated region + */ + void* Alloc(DxvkContext* ctx, VkDeviceSize size); + + /** + * \brief Allocates a full buffer slice + * + * This must not be called if \ref Alloc is used. + * \returns Map pointer of the allocated region + */ + void* AllocSlice(DxvkContext* ctx); + + private: + + Rc m_device; + + uint32_t m_binding = 0u; + VkBufferUsageFlags m_usage = 0u; + VkShaderStageFlags m_stages = 0u; + VkDeviceSize m_size = 0ull; + VkDeviceSize m_align = 0ull; + VkDeviceSize m_offset = 0ull; + + bool m_useDeviceLocalBuffer = false; + + Rc m_buffer = nullptr; + Rc m_slice = nullptr; + + Rc createBuffer(DxvkContext* ctx); + + VkDeviceSize getAlignment(const Rc& device) const; + + }; + } \ No newline at end of file diff --git a/src/d3d9/d3d9_constant_set.h b/src/d3d9/d3d9_constant_set.h index fb64a586b..d5f756aee 100644 --- a/src/d3d9/d3d9_constant_set.h +++ b/src/d3d9/d3d9_constant_set.h @@ -40,15 +40,30 @@ namespace dxvk { }; struct D3D9SwvpConstantBuffers { - D3D9ConstantBuffer intBuffer; - D3D9ConstantBuffer boolBuffer; + D3D9CSConstantBuffer intBuffer; + D3D9CSConstantBuffer boolBuffer; }; - struct D3D9ConstantSets { - D3D9SwvpConstantBuffers swvp; - D3D9ConstantBuffer buffer; + template + struct D3D9CSShaderConstants { + ShaderConstantsStorage constants; + + // Primary buffer (contains HWVP or pixel shaders: Ints + Floats, SWVP: Floats) + D3D9CSConstantBuffer buffer; + // Secondary buffers for SWVP (one for Ints, one for Bools) + D3D9SwvpConstantBuffers swvp; + + // Shader related DxsoShaderMetaInfo meta = {}; + DxsoDefinedConstants shaderDefinedConsts; + + // Tracking bool dirty = true; + uint32_t floatConstsCount = 0; + // The highest changed int and bool constants are only tracked for SWVP. + // For HWVP or pixel shaders, the maximum amount is only 16 anyway. + uint32_t intConstsCount = 0; + uint32_t boolConstsCount = 0; }; } \ No newline at end of file diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 4b8a70710..43eb804d3 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -115,10 +115,10 @@ namespace dxvk { if (!useRobustConstantAccess) { // Disable optimized constant copies, we always have to copy all constants. - m_vsFloatConstsCount = m_vsLayout.floatCount; - m_vsIntConstsCount = m_vsLayout.intCount; - m_vsBoolConstsCount = m_vsLayout.boolCount; - m_psFloatConstsCount = m_psLayout.floatCount; + m_csVSConsts.floatConstsCount = m_vsLayout.floatCount; + m_csVSConsts.intConstsCount = m_vsLayout.intCount; + m_csVSConsts.boolConstsCount = m_vsLayout.boolCount; + m_csPSConsts.floatConstsCount = m_psLayout.floatCount; if (supportsRobustness2) { Logger::warn("Disabling robust constant buffer access because of alignment."); @@ -3379,15 +3379,23 @@ namespace dxvk { bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; - m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader; - m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(); - - if (newShader && oldShader) { - m_consts[DxsoProgramTypes::VertexShader].dirty - |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF + bool dirty = oldCopies || newCopies || !oldShader; + dirty |= newShader && oldShader && ( + newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI - || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; - } + || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB + ); + + EmitCs([ + &cShaderConsts = m_csVSConsts, + cDirty = dirty, + cMeta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(), + cShaderDefinedConsts = newShader ? newShader->GetConstants() : DxsoDefinedConstants() + ](DxvkContext* ctx) { + cShaderConsts.meta = cMeta; + cShaderConsts.dirty |= cDirty; + cShaderConsts.shaderDefinedConsts = cShaderDefinedConsts; + }); m_state.vertexShader = shader; @@ -3754,15 +3762,23 @@ namespace dxvk { bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies; bool newCopies = newShader && newShader->GetMeta().needsConstantCopies; - m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader; - m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(); - - if (newShader && oldShader) { - m_consts[DxsoProgramTypes::PixelShader].dirty - |= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF + bool dirty = oldCopies || newCopies || !oldShader; + dirty |= newShader && oldShader && ( + newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF || newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI - || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB; - } + || newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB + ); + + EmitCs([ + &cShaderConsts = m_csPSConsts, + cDirty = dirty, + cMeta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(), + cShaderDefinedConsts = newShader ? newShader->GetConstants() : DxsoDefinedConstants() + ](DxvkContext* ctx) { + cShaderConsts.meta = cMeta; + cShaderConsts.dirty |= cDirty; + cShaderConsts.shaderDefinedConsts = cShaderDefinedConsts; + }); m_state.pixelShader = shader; @@ -5783,25 +5799,37 @@ namespace dxvk { constexpr VkDeviceSize DefaultConstantBufferSize = 1024ull << 10; constexpr VkDeviceSize SmallConstantBufferSize = 64ull << 10; - m_consts[DxsoProgramTypes::VertexShader].buffer = D3D9ConstantBuffer(this, - DxsoProgramType::VertexShader, - DxsoConstantBuffers::VSConstantBuffer, - DefaultConstantBufferSize); + EmitCs([ + cDevice = m_dxvkDevice, + &cCSVSConsts = m_csVSConsts, + &cCSPSConsts = m_csPSConsts, + cUseDeviceLocalBuffers = m_d3d9Options.deviceLocalConstantBuffers + ] (DxvkContext* ctx) { + cCSVSConsts.buffer = D3D9CSConstantBuffer(cDevice, + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSConstantBuffer, + DefaultConstantBufferSize, + cUseDeviceLocalBuffers); - m_consts[DxsoProgramTypes::VertexShader].swvp.intBuffer = D3D9ConstantBuffer(this, - DxsoProgramType::VertexShader, - DxsoConstantBuffers::VSIntConstantBuffer, - SmallConstantBufferSize); + cCSVSConsts.swvp.intBuffer = D3D9CSConstantBuffer(cDevice, + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSIntConstantBuffer, + SmallConstantBufferSize, + cUseDeviceLocalBuffers); - m_consts[DxsoProgramTypes::VertexShader].swvp.boolBuffer = D3D9ConstantBuffer(this, - DxsoProgramType::VertexShader, - DxsoConstantBuffers::VSBoolConstantBuffer, - SmallConstantBufferSize); + cCSVSConsts.swvp.boolBuffer = D3D9CSConstantBuffer(cDevice, + DxsoProgramType::VertexShader, + DxsoConstantBuffers::VSBoolConstantBuffer, + SmallConstantBufferSize, + cUseDeviceLocalBuffers); + + cCSPSConsts.buffer = D3D9CSConstantBuffer(cDevice, + DxsoProgramType::PixelShader, + DxsoConstantBuffers::PSConstantBuffer, + DefaultConstantBufferSize, + cUseDeviceLocalBuffers); + }); - m_consts[DxsoProgramTypes::PixelShader].buffer = D3D9ConstantBuffer(this, - DxsoProgramType::PixelShader, - DxsoConstantBuffers::PSConstantBuffer, - DefaultConstantBufferSize); m_vsClipPlanes = D3D9ConstantBuffer(this, DxsoProgramType::VertexShader, @@ -5841,7 +5869,7 @@ namespace dxvk { } - inline void D3D9DeviceEx::UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout) { + inline void D3D9DeviceEx::UploadSoftwareConstantSet(DxvkContext* ctx, D3D9CSShaderConstants& ShaderConsts) { /* * SWVP raises the amount of constants by a lot. * To avoid copying huge amounts of data for every draw call, @@ -5849,42 +5877,37 @@ namespace dxvk { * to fit that. We rely on robustness to return 0 for OOB reads. */ - D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader]; - - if (!constSet.dirty) + if (!ShaderConsts.dirty) return; - constSet.dirty = false; + ShaderConsts.dirty = false; - uint32_t floatCount = m_vsFloatConstsCount; - if (constSet.meta.needsConstantCopies) { + uint32_t floatCount = ShaderConsts.floatConstsCount; + if (ShaderConsts.meta.needsConstantCopies) { // If the shader requires us to preserve shader defined constants, // we copy those over. We need to adjust the amount of used floats accordingly. - auto shader = GetCommonShader(m_state.vertexShader); - floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1); + floatCount = std::max(floatCount, ShaderConsts.meta.maxShaderDefinedFloatConstant + 1); } // If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest. - floatCount = std::min(floatCount, constSet.meta.maxConstIndexF); + floatCount = std::min(floatCount, ShaderConsts.meta.maxConstIndexF); // Calculate data sizes for each constant type. const uint32_t floatDataSize = floatCount * sizeof(Vector4); - const uint32_t intDataSize = std::min(constSet.meta.maxConstIndexI, m_vsIntConstsCount) * sizeof(Vector4i); - const uint32_t boolDataSize = divCeil(std::min(constSet.meta.maxConstIndexB, m_vsBoolConstsCount), 32u) * uint32_t(sizeof(uint32_t)); + const uint32_t intDataSize = std::min(ShaderConsts.meta.maxConstIndexI, ShaderConsts.intConstsCount) * sizeof(Vector4i); + const uint32_t boolDataSize = divCeil(std::min(ShaderConsts.meta.maxConstIndexB, ShaderConsts.boolConstsCount), 32u) * uint32_t(sizeof(uint32_t)); // Max copy source size is 8192 * 16 => always aligned to any plausible value // => we won't copy out of bounds - if (likely(constSet.meta.maxConstIndexF != 0)) { - auto mapPtr = CopySoftwareConstants(constSet.buffer, Src.fConsts, floatDataSize); + if (likely(ShaderConsts.meta.maxConstIndexF != 0)) { + auto mapPtr = CopySoftwareConstants(ctx, ShaderConsts.buffer, ShaderConsts.constants.fConsts, floatDataSize); - if (constSet.meta.needsConstantCopies) { + if (ShaderConsts.meta.needsConstantCopies) { // Copy shader defined constants over so they can be accessed // with relative addressing. Vector4* data = reinterpret_cast(mapPtr); - auto& shaderConsts = GetCommonShader(m_state.vertexShader)->GetConstants(); - - for (const auto& constant : shaderConsts) { - if (constant.uboIdx < constSet.meta.maxConstIndexF) + for (const auto& constant : ShaderConsts.shaderDefinedConsts) { + if (constant.uboIdx < ShaderConsts.meta.maxConstIndexF) data[constant.uboIdx] = *reinterpret_cast(constant.float32); } } @@ -5892,90 +5915,84 @@ namespace dxvk { // Max copy source size is 2048 * 16 => always aligned to any plausible value // => we won't copy out of bounds - if (likely(constSet.meta.maxConstIndexI != 0)) - CopySoftwareConstants(constSet.swvp.intBuffer, Src.iConsts, intDataSize); + if (likely(ShaderConsts.meta.maxConstIndexI != 0)) + CopySoftwareConstants(ctx, ShaderConsts.swvp.intBuffer, ShaderConsts.constants.iConsts, intDataSize); - if (likely(constSet.meta.maxConstIndexB != 0)) - CopySoftwareConstants(constSet.swvp.boolBuffer, Src.bConsts, boolDataSize); + if (likely(ShaderConsts.meta.maxConstIndexB != 0)) + CopySoftwareConstants(ctx, ShaderConsts.swvp.boolBuffer, ShaderConsts.constants.bConsts, boolDataSize); } - - inline void* D3D9DeviceEx::CopySoftwareConstants(D3D9ConstantBuffer& dstBuffer, const void* src, uint32_t size) { + inline void* D3D9DeviceEx::CopySoftwareConstants(DxvkContext* ctx, D3D9CSConstantBuffer& dstBuffer, const void* src, uint32_t size) { uint32_t alignment = dstBuffer.GetAlignment(); size = std::max(size, alignment); size = align(size, alignment); - auto mapPtr = dstBuffer.Alloc(size); + auto mapPtr = dstBuffer.Alloc(ctx, size); std::memcpy(mapPtr, src, size); return mapPtr; } - template - inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) { + template + inline void D3D9DeviceEx::UploadConstantSet(DxvkContext* ctx, D3D9CSShaderConstants& ShaderConsts) { /* * We just copy the float constants that have been set by the application and rely on robustness * to return 0 on OOB reads. */ - D3D9ConstantSets& constSet = m_consts[ShaderStage]; - - if (!constSet.dirty) + if (!ShaderConsts.dirty) return; - constSet.dirty = false; + ShaderConsts.dirty = false; - uint32_t floatCount = ShaderStage == DxsoProgramType::VertexShader ? m_vsFloatConstsCount : m_psFloatConstsCount; - if (constSet.meta.needsConstantCopies) { + uint32_t floatCount = ShaderConsts.floatConstsCount; + if (ShaderConsts.meta.needsConstantCopies) { // If the shader requires us to preserve shader defined constants, // we copy those over. We need to adjust the amount of used floats accordingly. - auto shader = GetCommonShader(Shader); - floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1); + floatCount = std::max(floatCount, ShaderConsts.meta.maxShaderDefinedFloatConstant + 1); } // If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest. - floatCount = std::min(constSet.meta.maxConstIndexF, floatCount); + floatCount = std::min(floatCount, ShaderConsts.meta.maxConstIndexF); // There are very few int constants, so we put those into the same buffer at the start. // We always allocate memory for all possible int constants to make sure alignment works out. const uint32_t intRange = caps::MaxOtherConstants * sizeof(Vector4i); uint32_t floatDataSize = floatCount * sizeof(Vector4); // Determine amount of floats and buffer size based on highest used float constant and alignment - const uint32_t alignment = constSet.buffer.GetAlignment(); + const uint32_t alignment = ShaderConsts.buffer.GetAlignment(); const uint32_t bufferSize = align(std::max(floatDataSize + intRange, alignment), alignment); floatDataSize = bufferSize - intRange; - void* mapPtr = constSet.buffer.Alloc(bufferSize); - auto* dst = reinterpret_cast(mapPtr); + void* mapPtr = ShaderConsts.buffer.Alloc(ctx, bufferSize); + auto* dst = reinterpret_cast(mapPtr); - const uint32_t intDataSize = constSet.meta.maxConstIndexI * sizeof(Vector4i); - if (constSet.meta.maxConstIndexI != 0) - std::memcpy(dst->iConsts, Src.iConsts, intDataSize); - if (constSet.meta.maxConstIndexF != 0) - std::memcpy(dst->fConsts, Src.fConsts, floatDataSize); + const uint32_t intDataSize = ShaderConsts.meta.maxConstIndexI * sizeof(Vector4i); + if (ShaderConsts.meta.maxConstIndexI != 0) + std::memcpy(dst->iConsts, ShaderConsts.constants.iConsts, intDataSize); + if (ShaderConsts.meta.maxConstIndexF != 0) + std::memcpy(dst->fConsts, ShaderConsts.constants.fConsts, floatDataSize); - if (constSet.meta.needsConstantCopies) { + if (ShaderConsts.meta.needsConstantCopies) { // Copy shader defined constants over so they can be accessed // with relative addressing. Vector4* data = reinterpret_cast(dst->fConsts); - auto& shaderConsts = GetCommonShader(Shader)->GetConstants(); - - for (const auto& constant : shaderConsts) { - if (constant.uboIdx < constSet.meta.maxConstIndexF) + for (const auto& constant : ShaderConsts.shaderDefinedConsts) { + if (constant.uboIdx < ShaderConsts.meta.maxConstIndexF) data[constant.uboIdx] = *reinterpret_cast(constant.float32); } } } - template - void D3D9DeviceEx::UploadConstants() { - if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) { - if (CanSWVP()) - return UploadSoftwareConstantSet(m_state.vsConsts.get(), m_vsLayout); + template + void D3D9DeviceEx::UploadConstants(DxvkContext* ctx, D3D9CSShaderConstants& ShaderConsts, bool canSWVP) { + if constexpr (std::is_same::value) { + if (canSWVP) + return UploadSoftwareConstantSet(ctx, ShaderConsts); else - return UploadConstantSet(m_state.vsConsts.get(), m_vsLayout, m_state.vertexShader); + return UploadConstantSet(ctx, ShaderConsts); } else { - return UploadConstantSet (m_state.psConsts.get(), m_psLayout, m_state.pixelShader); + return UploadConstantSet (ctx, ShaderConsts); } } @@ -7313,12 +7330,18 @@ namespace dxvk { BindShader( GetCommonShader(m_state.vertexShader)); } - UploadConstants(); + EmitCs([ + &cShaderConsts = m_csVSConsts, + cCanSWVP = CanSWVP() + ](DxvkContext* ctx) { + UploadConstants(ctx, cShaderConsts, cCanSWVP); + }); if (likely(!CanSWVP())) { + const D3D9CommonShader* shader = GetCommonShader(m_state.vertexShader); UpdateVertexBoolSpec( m_state.vsConsts->bConsts[0] & - m_consts[DxsoProgramType::VertexShader].meta.boolConstantMask); + shader->GetMeta().boolConstantMask); } else UpdateVertexBoolSpec(0); } @@ -7331,13 +7354,18 @@ namespace dxvk { BindInputLayout(); if (likely(UseProgrammablePS())) { - UploadConstants(); + EmitCs([ + &cShaderConsts = m_csPSConsts + ](DxvkContext* ctx) { + UploadConstants(ctx, cShaderConsts, false); + }); const uint32_t psTextureMask = usedTextureMask & ((1u << caps::MaxTexturesPS) - 1u); const uint32_t fetch4 = m_fetch4 & psTextureMask; const uint32_t projected = m_projectionBitfield & psTextureMask; - const auto& programInfo = GetCommonShader(m_state.pixelShader)->GetInfo(); + const D3D9CommonShader* shader = GetCommonShader(m_state.pixelShader); + const auto& programInfo = shader->GetInfo(); if (programInfo.majorVersion() >= 2) UpdatePixelShaderSamplerSpec(m_d3d9Options.forceSamplerTypeSpecConstants ? m_textureTypes : 0u, 0u, fetch4); @@ -7346,7 +7374,7 @@ namespace dxvk { UpdatePixelBoolSpec( m_state.psConsts->bConsts[0] & - m_consts[DxsoProgramType::PixelShader].meta.boolConstantMask); + shader->GetMeta().boolConstantMask); } else { UpdatePixelBoolSpec(0); @@ -7665,7 +7693,9 @@ namespace dxvk { m_state.vsConsts->bConsts[idx] &= ~mask; m_state.vsConsts->bConsts[idx] |= bits & mask; - m_consts[DxsoProgramTypes::VertexShader].dirty = true; + EmitCs([&cConsts = m_csVSConsts](DxvkContext* ctx) { + cConsts.dirty = true; + }); } @@ -7673,7 +7703,9 @@ namespace dxvk { m_state.psConsts->bConsts[idx] &= ~mask; m_state.psConsts->bConsts[idx] |= bits & mask; - m_consts[DxsoProgramTypes::PixelShader].dirty = true; + EmitCs([&cConsts = m_csPSConsts](DxvkContext* ctx) { + cConsts.dirty = true; + }); } @@ -7731,39 +7763,108 @@ namespace dxvk { pConstantData, Count); + constexpr uint32_t vectorElementsCount = ConstantType != D3D9ConstantType::Bool ? 4 : 1; + const size_t dataSize = Count * vectorElementsCount * sizeof(T); + + if (ProgramType == DxsoProgramType::VertexShader && (likely(ConstantType != D3D9ConstantType::Bool) || unlikely(CanSWVP()))) { + + DxvkCsDataBlock* csData = EmitCsWithData(Count * vectorElementsCount, [ + &cShaderConsts = m_csVSConsts, + cStartRegister = StartRegister, + cFloatEmulation = m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled + ] (DxvkContext* ctx, const T* data, size_t count) { + uint32_t vectorsCount; + if constexpr (ConstantType == D3D9ConstantType::Float) { + vectorsCount = count / 4; + cShaderConsts.floatConstsCount = std::max(cShaderConsts.floatConstsCount, cStartRegister + uint32_t(vectorsCount)); + } else if constexpr (ConstantType == D3D9ConstantType::Int) { + vectorsCount = count / 4; + cShaderConsts.intConstsCount = std::max(cShaderConsts.intConstsCount, cStartRegister + uint32_t(vectorsCount)); + } else /* if constexpr (ConstantType == D3D9ConstantType::Bool) */ { + vectorsCount = count; + cShaderConsts.boolConstsCount = std::max(cShaderConsts.boolConstsCount, cStartRegister + uint32_t(vectorsCount)); + } + + if constexpr (ConstantType != D3D9ConstantType::Bool) { + uint32_t maxCount = ConstantType == D3D9ConstantType::Float + ? cShaderConsts.meta.maxConstIndexF + : cShaderConsts.meta.maxConstIndexI; + + cShaderConsts.dirty |= cStartRegister < maxCount; + } else /* if (CanSWVP()) */ { + cShaderConsts.dirty |= cStartRegister < cShaderConsts.meta.maxConstIndexB; + } + + UpdateStateConstants< + D3D9ShaderConstantsVSSoftware*, + ConstantType, + T>( + &cShaderConsts.constants, + cStartRegister, + data, + vectorsCount, + cFloatEmulation); + }); + + auto dst = reinterpret_cast(csData->first()); + std::memcpy(dst, pConstantData, dataSize); + + } else if constexpr (ProgramType == DxsoProgramType::PixelShader && ConstantType != D3D9ConstantType::Bool) { + + DxvkCsDataBlock* csData = EmitCsWithData(Count * vectorElementsCount, [ + &cShaderConsts = m_csPSConsts, + cStartRegister = StartRegister, + cFloatEmulation = m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled + ] (DxvkContext* ctx, const T* data, size_t count) { + const uint32_t vectorsCount = count / 4; + if constexpr (ConstantType == D3D9ConstantType::Float) { + cShaderConsts.floatConstsCount = std::max(cShaderConsts.floatConstsCount, cStartRegister + uint32_t(vectorsCount)); + } + + uint32_t maxCount = ConstantType == D3D9ConstantType::Float + ? cShaderConsts.meta.maxConstIndexF + : cShaderConsts.meta.maxConstIndexI; + + cShaderConsts.dirty |= cStartRegister < maxCount; + + UpdateStateConstants< + D3D9ShaderConstantsPS*, + ConstantType, + T>( + &cShaderConsts.constants, + cStartRegister, + data, + vectorsCount, + cFloatEmulation); + }); + + auto dst = reinterpret_cast(csData->first()); + std::memcpy(dst, pConstantData, dataSize); + + } + if constexpr (ProgramType == DxsoProgramType::VertexShader) { - if constexpr (ConstantType == D3D9ConstantType::Float) { - m_vsFloatConstsCount = std::max(m_vsFloatConstsCount, StartRegister + Count); - } else if constexpr (ConstantType == D3D9ConstantType::Int) { - m_vsIntConstsCount = std::max(m_vsIntConstsCount, StartRegister + Count); - } else /* if constexpr (ConstantType == D3D9ConstantType::Bool) */ { - m_vsBoolConstsCount = std::max(m_vsBoolConstsCount, StartRegister + Count); - } + UpdateStateConstants< + static_item&, + ConstantType, + T>( + m_state.vsConsts, + StartRegister, + pConstantData, + Count, + false); } else { - if constexpr (ConstantType == D3D9ConstantType::Float) { - m_psFloatConstsCount = std::max(m_psFloatConstsCount, StartRegister + Count); - } + UpdateStateConstants< + static_item&, + ConstantType, + T>( + m_state.psConsts, + StartRegister, + pConstantData, + Count, + false); } - if constexpr (ConstantType != D3D9ConstantType::Bool) { - uint32_t maxCount = ConstantType == D3D9ConstantType::Float - ? m_consts[ProgramType].meta.maxConstIndexF - : m_consts[ProgramType].meta.maxConstIndexI; - - m_consts[ProgramType].dirty |= StartRegister < maxCount; - } else if constexpr (ProgramType == DxsoProgramType::VertexShader) { - if (unlikely(CanSWVP())) { - m_consts[DxsoProgramType::VertexShader].dirty |= StartRegister < m_consts[ProgramType].meta.maxConstIndexB; - } - } - - UpdateStateConstants( - &m_state, - StartRegister, - pConstantData, - Count, - m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled); - return D3D_OK; } diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 3a2a4184d..c94fc19d7 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -941,16 +941,16 @@ namespace dxvk { void BindDepthBias(); - inline void UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout); + inline static void UploadSoftwareConstantSet(DxvkContext* ctx, D3D9CSShaderConstants& ShaderConsts); - inline void* CopySoftwareConstants(D3D9ConstantBuffer& dstBuffer, const void* src, uint32_t size); + inline static void* CopySoftwareConstants(DxvkContext* ctx, D3D9CSConstantBuffer& dstBuffer, const void* src, uint32_t size); + + template + inline static void UploadConstantSet(DxvkContext* ctx, D3D9CSShaderConstants& ShaderConsts); + + template + static void UploadConstants(DxvkContext* ctx, D3D9CSShaderConstants& ShaderConsts, bool canSWVP); - template - inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); - - template - void UploadConstants(); - void UpdateClipPlanes(); /** @@ -1192,6 +1192,24 @@ namespace dxvk { } } + template + DxvkCsDataBlock* EmitCsWithData(size_t count, Cmd&& command) { + DxvkCsDataBlock* data = m_csChunk->pushCmd(command, count); + + if (unlikely(!data)) { + EmitCsChunk(std::move(m_csChunk)); + m_csChunk = AllocCsChunk(); + + if constexpr (AllowFlush) + ConsiderFlush(GpuFlushType::ImplicitWeakHint); + + // We must record this command after the potential + // flush since the caller may still access the data + data = m_csChunk->pushCmd(command, count); + } + return data; + } + void EmitCsChunk(DxvkCsChunkRef&& chunk); void FlushCsChunk() { @@ -1582,16 +1600,8 @@ namespace dxvk { uint32_t m_robustSSBOAlignment = 1; uint32_t m_robustUBOAlignment = 1; - uint32_t m_vsFloatConstsCount = 0; - uint32_t m_vsIntConstsCount = 0; - uint32_t m_vsBoolConstsCount = 0; - uint32_t m_psFloatConstsCount = 0; - VkDeviceSize m_boundVSConstantsBufferSize = 0; - VkDeviceSize m_boundPSConstantsBufferSize = 0; - D3D9ConstantLayout m_vsLayout; D3D9ConstantLayout m_psLayout; - D3D9ConstantSets m_consts[DxsoProgramTypes::Count]; D3D9UserDefinedAnnotation* m_annotation = nullptr; @@ -1641,6 +1651,10 @@ namespace dxvk { // Written by CS thread alignas(CACHE_LINE_SIZE) std::atomic m_lastSamplerStats = { 0u }; + + D3D9CSShaderConstants m_csVSConsts; + D3D9CSShaderConstants m_csPSConsts; + }; } diff --git a/src/d3d9/d3d9_shader.cpp b/src/d3d9/d3d9_shader.cpp index f8807e0cb..580554ccd 100644 --- a/src/d3d9/d3d9_shader.cpp +++ b/src/d3d9/d3d9_shader.cpp @@ -70,7 +70,6 @@ namespace dxvk { m_info = pModule->info(); m_meta = pModule->meta(); m_constants = pModule->constants(); - m_maxDefinedConst = pModule->maxDefinedConstant(); m_shader->setShaderKey(Key); diff --git a/src/d3d9/d3d9_shader.h b/src/d3d9/d3d9_shader.h index a6c6af94a..6d2e34ecf 100644 --- a/src/d3d9/d3d9_shader.h +++ b/src/d3d9/d3d9_shader.h @@ -52,8 +52,6 @@ namespace dxvk { const DxsoProgramInfo& GetInfo() const { return m_info; } - uint32_t GetMaxDefinedConstant() const { return m_maxDefinedConst; } - VkImageViewType GetImageViewType(uint32_t samplerSlot) const { const uint32_t offset = samplerSlot * 2; const uint32_t mask = 0b11; @@ -70,7 +68,6 @@ namespace dxvk { DxsoProgramInfo m_info; DxsoShaderMetaInfo m_meta; DxsoDefinedConstants m_constants; - uint32_t m_maxDefinedConst; Rc m_shader; diff --git a/src/d3d9/d3d9_state.h b/src/d3d9/d3d9_state.h index 79aa0d9d7..d8b2fb061 100644 --- a/src/d3d9/d3d9_state.h +++ b/src/d3d9/d3d9_state.h @@ -318,54 +318,46 @@ namespace dxvk { using D3D9DeviceState = D3D9State; template < - DxsoProgramType ProgramType, + typename ShaderConstantsStorage, D3D9ConstantType ConstantType, - typename T, - typename StateType> + typename T> HRESULT UpdateStateConstants( - StateType* pState, - UINT StartRegister, - const T* pConstantData, - UINT Count, - bool FloatEmu) { - auto UpdateHelper = [&] (auto& set) { - if constexpr (ConstantType == D3D9ConstantType::Float) { + ShaderConstantsStorage ConstantSet, + UINT StartRegister, + const T* pConstantData, + UINT Count, + bool FloatEmu) { + if constexpr (ConstantType == D3D9ConstantType::Float) { + if (!FloatEmu) { + size_t size = Count * sizeof(Vector4); - if (!FloatEmu) { - size_t size = Count * sizeof(Vector4); - - std::memcpy(set->fConsts[StartRegister].data, pConstantData, size); - } - else { - for (UINT i = 0; i < Count; i++) - set->fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4)); - } - } - else if constexpr (ConstantType == D3D9ConstantType::Int) { - size_t size = Count * sizeof(Vector4i); - - std::memcpy(set->iConsts[StartRegister].data, pConstantData, size); + std::memcpy(ConstantSet->fConsts[StartRegister].data, pConstantData, size); } else { - for (uint32_t i = 0; i < Count; i++) { - const uint32_t constantIdx = StartRegister + i; - const uint32_t arrayIdx = constantIdx / 32; - const uint32_t bitIdx = constantIdx % 32; - - const uint32_t bit = 1u << bitIdx; - - set->bConsts[arrayIdx] &= ~bit; - if (pConstantData[i]) - set->bConsts[arrayIdx] |= bit; - } + for (UINT i = 0; i < Count; i++) + ConstantSet->fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4)); } + } + else if constexpr (ConstantType == D3D9ConstantType::Int) { + size_t size = Count * sizeof(Vector4i); - return D3D_OK; - }; + std::memcpy(ConstantSet->iConsts[StartRegister].data, pConstantData, size); + } + else { + for (uint32_t i = 0; i < Count; i++) { + const uint32_t constantIdx = StartRegister + i; + const uint32_t arrayIdx = constantIdx / 32; + const uint32_t bitIdx = constantIdx % 32; - return ProgramType == DxsoProgramTypes::VertexShader - ? UpdateHelper(pState->vsConsts) - : UpdateHelper(pState->psConsts); + const uint32_t bit = 1u << bitIdx; + + ConstantSet->bConsts[arrayIdx] &= ~bit; + if (pConstantData[i]) + ConstantSet->bConsts[arrayIdx] |= bit; + } + } + + return D3D_OK; } struct Direct3DState9 : public D3D9DeviceState { diff --git a/src/d3d9/d3d9_stateblock.h b/src/d3d9/d3d9_stateblock.h index 74284d4f7..d3ea22f4a 100644 --- a/src/d3d9/d3d9_stateblock.h +++ b/src/d3d9/d3d9_stateblock.h @@ -367,15 +367,27 @@ namespace dxvk { setCaptures.bConsts.set(reg, true); } - UpdateStateConstants< - ProgramType, - ConstantType, - T>( - &m_state, - StartRegister, - pConstantData, - Count, - false); + if constexpr (ProgramType == DxsoProgramType::VertexShader) { + UpdateStateConstants< + dynamic_item&, + ConstantType, + T>( + m_state.vsConsts, + StartRegister, + pConstantData, + Count, + false); + } else { + UpdateStateConstants< + dynamic_item&, + ConstantType, + T>( + m_state.psConsts, + StartRegister, + pConstantData, + Count, + false); + } return D3D_OK; }; diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index 1d7e1bf1d..38523b0c5 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -1797,7 +1797,7 @@ namespace dxvk { for (uint32_t i = 0; i < 4; i++) constant.float32[i] = data[i]; m_constants.push_back(constant); - m_maxDefinedConstant = std::max(constant.uboIdx, m_maxDefinedConstant); + m_meta.maxShaderDefinedFloatConstant = std::max(constant.uboIdx, m_meta.maxShaderDefinedFloatConstant); } void DxsoCompiler::emitDefI(const DxsoInstructionContext& ctx) { diff --git a/src/dxso/dxso_compiler.h b/src/dxso/dxso_compiler.h index 7a8d9e586..9398c01e5 100644 --- a/src/dxso/dxso_compiler.h +++ b/src/dxso/dxso_compiler.h @@ -247,7 +247,6 @@ namespace dxvk { const DxsoDefinedConstants& constants() { return m_constants; } uint32_t usedSamplers() const { return m_usedSamplers; } uint32_t usedRTs() const { return m_usedRTs; } - uint32_t maxDefinedConstant() const { return m_maxDefinedConstant; } uint32_t textureTypes() const { return m_textureTypes; } private: diff --git a/src/dxso/dxso_isgn.h b/src/dxso/dxso_isgn.h index 2faa04ef9..935df3edf 100644 --- a/src/dxso/dxso_isgn.h +++ b/src/dxso/dxso_isgn.h @@ -31,6 +31,7 @@ namespace dxvk { struct DxsoShaderMetaInfo { bool needsConstantCopies = false; + uint32_t maxShaderDefinedFloatConstant = 0; uint32_t maxConstIndexF = 0; uint32_t maxConstIndexI = 0; uint32_t maxConstIndexB = 0; diff --git a/src/dxso/dxso_module.cpp b/src/dxso/dxso_module.cpp index 233f950cd..4986118d7 100644 --- a/src/dxso/dxso_module.cpp +++ b/src/dxso/dxso_module.cpp @@ -36,7 +36,6 @@ namespace dxvk { m_meta = compiler->meta(); m_constants = compiler->constants(); - m_maxDefinedConst = compiler->maxDefinedConstant(); m_usedSamplers = compiler->usedSamplers(); m_textureTypes = compiler->textureTypes(); diff --git a/src/dxso/dxso_module.h b/src/dxso/dxso_module.h index e95115c8d..9e18d9616 100644 --- a/src/dxso/dxso_module.h +++ b/src/dxso/dxso_module.h @@ -59,8 +59,6 @@ namespace dxvk { uint32_t usedRTs() { return m_usedRTs; } - uint32_t maxDefinedConstant() { return m_maxDefinedConst; } - uint32_t textureTypes() { return m_textureTypes; } private: @@ -82,7 +80,6 @@ namespace dxvk { uint32_t m_textureTypes; DxsoShaderMetaInfo m_meta; - uint32_t m_maxDefinedConst; DxsoDefinedConstants m_constants; };