From d6d13edb7a2cee8eb31ea273c2d55eabce929f6a Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 26 Feb 2025 00:04:55 +0100 Subject: [PATCH] [dxbc] Use smallest aligned vector type for buffer-backed ubo --- src/dxbc/dxbc_compiler.cpp | 35 +++++++++++++++++++++++++---------- src/dxbc/dxbc_compiler.h | 3 ++- src/dxbc/dxbc_options.cpp | 3 ++- src/dxbc/dxbc_options.h | 6 +++++- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index c76753c56..d9d177109 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -241,8 +241,8 @@ namespace dxvk { info.bindings = m_bindings.data(); info.inputMask = m_inputMask; info.outputMask = m_outputMask; - info.uniformSize = m_icbData.size(); - info.uniformData = m_icbData.data(); + info.uniformSize = m_icbData.size() * sizeof(uint32_t); + info.uniformData = reinterpret_cast(m_icbData.data()); info.pushConstStages = VK_SHADER_STAGE_FRAGMENT_BIT; info.pushConstSize = sizeof(DxbcPushConstants); info.outputTopology = m_outputTopology; @@ -817,7 +817,7 @@ namespace dxvk { if (ins.controls.accessType() == DxbcConstantBufferAccessType::DynamicallyIndexed) elementCount = 4096; - this->emitDclConstantBufferVar(bufferId, elementCount, + this->emitDclConstantBufferVar(bufferId, elementCount, 4u, str::format("cb", bufferId).c_str()); } @@ -825,13 +825,14 @@ namespace dxvk { void DxbcCompiler::emitDclConstantBufferVar( uint32_t regIdx, uint32_t numConstants, + uint32_t numComponents, const char* name) { // Uniform buffer data is stored as a fixed-size array // of 4x32-bit vectors. SPIR-V requires explicit strides. const uint32_t arrayType = m_module.defArrayTypeUnique( - getVectorTypeId({ DxbcScalarType::Float32, 4 }), + getVectorTypeId({ DxbcScalarType::Float32, numComponents }), m_module.constu32(numConstants)); - m_module.decorateArrayStride(arrayType, 16); + m_module.decorateArrayStride(arrayType, sizeof(uint32_t) * numComponents); // SPIR-V requires us to put that array into a // struct and decorate that struct as a block. @@ -1586,13 +1587,27 @@ namespace dxvk { uint32_t dwordCount, const uint32_t* dwordArray, uint32_t componentCount) { - this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb"); + uint32_t vectorCount = dwordCount / 4u; - m_icbData.resize(dwordCount * sizeof(uint32_t)); - std::memcpy(m_icbData.data(), dwordArray, m_icbData.size()); + // Tightly pack vec2 or scalar arrays if possible. Don't bother with + // vec3 since we'd rather have properly vectorized loads in that case. + if (m_moduleInfo.options.supportsTightIcbPacking && componentCount <= 2u) + m_icbComponents = componentCount; + else + m_icbComponents = 4u; - m_icbComponents = 4u; - m_icbSize = dwordCount / 4u; + // Immediate constant buffer can be read out of bounds, declare + // it with the maximum possible size and rely on robustness. + this->emitDclConstantBufferVar(Icb_BindingSlotId, 4096u, m_icbComponents, "icb"); + + m_icbData.reserve(vectorCount * componentCount); + + for (uint32_t i = 0; i < dwordCount; i += 4u) { + for (uint32_t c = 0; c < m_icbComponents; c++) + m_icbData.push_back(dwordArray[i + c]); + } + + m_icbSize = vectorCount; } diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index ed63f4ba7..e47e31c5f 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -504,7 +504,7 @@ namespace dxvk { // Immediate constant buffer. If defined, this is // an array of four-component uint32 vectors. uint32_t m_icbArray = 0; - std::vector m_icbData; + std::vector m_icbData; uint32_t m_icbComponents = 0u; uint32_t m_icbSize = 0u; @@ -593,6 +593,7 @@ namespace dxvk { void emitDclConstantBufferVar( uint32_t regIdx, uint32_t numConstants, + uint32_t numComponents, const char* name); void emitDclSampler( diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp index e55a0f945..7c269f792 100644 --- a/src/dxbc/dxbc_options.cpp +++ b/src/dxbc/dxbc_options.cpp @@ -38,6 +38,7 @@ namespace dxvk { disableMsaa = options.disableMsaa; forceSampleRateShading = options.forceSampleRateShading; enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock; + supportsTightIcbPacking = device->features().vk12.uniformBufferStandardLayout; // Figure out float control flags to match D3D11 rules if (options.floatControls) { @@ -55,4 +56,4 @@ namespace dxvk { } } -} \ No newline at end of file +} diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h index 27ecca1ff..986004924 100644 --- a/src/dxbc/dxbc_options.h +++ b/src/dxbc/dxbc_options.h @@ -49,6 +49,10 @@ namespace dxvk { // Enable per-sample interlock if supported bool enableSampleShadingInterlock = false; + /// Use tightly packed arrays for immediate + /// constant buffers if possible + bool supportsTightIcbPacking = false; + /// Float control flags DxbcFloatControlFlags floatControl; @@ -56,4 +60,4 @@ namespace dxvk { VkDeviceSize minSsboAlignment = 0; }; -} \ No newline at end of file +}