From 31192b6d3f254d2279f6fda9a125675107c6bc36 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 25 Feb 2025 23:25:37 +0100 Subject: [PATCH] [dxbc] Rework embedded immediate constant buffers Considerably reduces the size of immediate constant buffer arrays when not all vector components are used. Also adds bound-checking. --- src/dxbc/dxbc_compiler.cpp | 146 +++++++++++++++++++++++++------------ src/dxbc/dxbc_compiler.h | 13 +++- 2 files changed, 108 insertions(+), 51 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index e4de077ef..c76753c56 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -241,8 +241,8 @@ namespace dxvk { info.bindings = m_bindings.data(); info.inputMask = m_inputMask; info.outputMask = m_outputMask; - info.uniformSize = m_immConstData.size(); - info.uniformData = m_immConstData.data(); + info.uniformSize = m_icbData.size(); + info.uniformData = m_icbData.data(); info.pushConstStages = VK_SHADER_STAGE_FRAGMENT_BIT; info.pushConstSize = sizeof(DxbcPushConstants); info.outputTopology = m_outputTopology; @@ -1491,77 +1491,108 @@ namespace dxvk { void DxbcCompiler::emitDclImmediateConstantBuffer(const DxbcShaderInstruction& ins) { - if (m_immConstBuf != 0) + if (m_icbArray) throw DxvkError("DxbcCompiler: Immediate constant buffer already declared"); if ((ins.customDataSize & 0x3) != 0) throw DxvkError("DxbcCompiler: Immediate constant buffer size not a multiple of four DWORDs"); - - if (ins.customDataSize <= Icb_MaxBakedDwords) { + + // A lot of the time we'll be dealing with a scalar or vec2 + // array here, there's no reason to emit all those zeroes. + uint32_t componentCount = 1u; + + for (uint32_t i = 0; i < ins.customDataSize; i += 4u) { + for (uint32_t c = componentCount; c < 4u; c++) { + if (ins.customData[i + c]) + componentCount = c + 1u; + } + + if (componentCount == 4u) + break; + } + + uint32_t vectorCount = (ins.customDataSize / 4u); + uint32_t dwordCount = vectorCount * componentCount; + + if (dwordCount <= Icb_MaxBakedDwords) { this->emitDclImmediateConstantBufferBaked( - ins.customDataSize, ins.customData); + ins.customDataSize, ins.customData, componentCount); } else { this->emitDclImmediateConstantBufferUbo( - ins.customDataSize, ins.customData); + ins.customDataSize, ins.customData, componentCount); } } void DxbcCompiler::emitDclImmediateConstantBufferBaked( uint32_t dwordCount, - const uint32_t* dwordArray) { + const uint32_t* dwordArray, + uint32_t componentCount) { // Declare individual vector constants as 4x32-bit vectors - std::array vectorIds; + small_vector vectorIds; DxbcVectorType vecType; vecType.ctype = DxbcScalarType::Uint32; - vecType.ccount = 4; + vecType.ccount = componentCount; - const uint32_t vectorTypeId = getVectorTypeId(vecType); - const uint32_t vectorCount = dwordCount / 4; + uint32_t vectorTypeId = getVectorTypeId(vecType); - for (uint32_t i = 0; i < vectorCount; i++) { - std::array scalarIds = { - m_module.constu32(dwordArray[4 * i + 0]), - m_module.constu32(dwordArray[4 * i + 1]), - m_module.constu32(dwordArray[4 * i + 2]), - m_module.constu32(dwordArray[4 * i + 3]), - }; - - vectorIds.at(i) = m_module.constComposite( - vectorTypeId, scalarIds.size(), scalarIds.data()); + for (uint32_t i = 0; i < dwordCount; i += 4u) { + std::array scalarIds = { }; + + for (uint32_t c = 0; c < componentCount; c++) + scalarIds[c] = m_module.constu32(dwordArray[i + c]); + + uint32_t id = scalarIds[0]; + + if (componentCount > 1u) + id = m_module.constComposite(vectorTypeId, componentCount, scalarIds.data()); + + vectorIds.push_back(id); } - + + // Pad array with one entry of zeroes so that we can + // handle out-of-bounds accesses more conveniently. + vectorIds.push_back(emitBuildZeroVector(vecType).id); + // Declare the array that contains all the vectors DxbcArrayType arrInfo; arrInfo.ctype = DxbcScalarType::Uint32; - arrInfo.ccount = 4; - arrInfo.alength = vectorCount; - - const uint32_t arrayTypeId = getArrayTypeId(arrInfo); - const uint32_t arrayId = m_module.constComposite( - arrayTypeId, vectorCount, vectorIds.data()); - + arrInfo.ccount = componentCount; + arrInfo.alength = vectorIds.size(); + + uint32_t arrayTypeId = getArrayTypeId(arrInfo); + uint32_t arrayId = m_module.constComposite( + arrayTypeId, vectorIds.size(), vectorIds.data()); + // Declare the variable that will hold the constant // data and initialize it with the constant array. - const uint32_t pointerTypeId = m_module.defPointerType( + uint32_t pointerTypeId = m_module.defPointerType( arrayTypeId, spv::StorageClassPrivate); - - m_immConstBuf = m_module.newVarInit( + + m_icbArray = m_module.newVarInit( pointerTypeId, spv::StorageClassPrivate, arrayId); - m_module.setDebugName(m_immConstBuf, "icb"); - m_module.decorate(m_immConstBuf, spv::DecorationNonWritable); + m_module.setDebugName(m_icbArray, "icb"); + m_module.decorate(m_icbArray, spv::DecorationNonWritable); + + m_icbComponents = componentCount; + m_icbSize = dwordCount / 4u; } void DxbcCompiler::emitDclImmediateConstantBufferUbo( uint32_t dwordCount, - const uint32_t* dwordArray) { + const uint32_t* dwordArray, + uint32_t componentCount) { this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb"); - m_immConstData.resize(dwordCount * sizeof(uint32_t)); - std::memcpy(m_immConstData.data(), dwordArray, m_immConstData.size()); + + m_icbData.resize(dwordCount * sizeof(uint32_t)); + std::memcpy(m_icbData.data(), dwordArray, m_icbData.size()); + + m_icbComponents = 4u; + m_icbSize = dwordCount / 4u; } @@ -5282,13 +5313,17 @@ namespace dxvk { DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr( const DxbcRegister& operand) { - const DxbcRegisterValue constId - = emitIndexLoad(operand.idx[0]); - - if (m_immConstBuf != 0) { + DxbcRegisterValue constId = emitIndexLoad(operand.idx[0]); + + if (m_icbArray) { + // We pad the icb array with an extra zero vector, so we can + // clamp the index and get correct robustness behaviour. + constId.id = m_module.opUMin(getVectorTypeId(constId.type), + constId.id, m_module.constu32(m_icbSize)); + DxbcRegisterInfo ptrInfo; ptrInfo.type.ctype = DxbcScalarType::Uint32; - ptrInfo.type.ccount = 4; + ptrInfo.type.ccount = m_icbComponents; ptrInfo.type.alength = 0; ptrInfo.sclass = spv::StorageClassPrivate; @@ -5297,7 +5332,7 @@ namespace dxvk { result.type.ccount = ptrInfo.type.ccount; result.id = m_module.opAccessChain( getPointerTypeId(ptrInfo), - m_immConstBuf, 1, &constId.id); + m_icbArray, 1, &constId.id); return result; } else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) { const std::array indices = @@ -5305,7 +5340,7 @@ namespace dxvk { DxbcRegisterInfo ptrInfo; ptrInfo.type.ctype = DxbcScalarType::Float32; - ptrInfo.type.ccount = 4; + ptrInfo.type.ccount = m_icbComponents; ptrInfo.type.alength = 0; ptrInfo.sclass = spv::StorageClassUniform; @@ -5343,7 +5378,7 @@ namespace dxvk { case DxbcOperandType::ImmediateConstantBuffer: return emitGetImmConstBufPtr(operand); - + case DxbcOperandType::InputThreadId: return DxbcRegisterPointer { { DxbcScalarType::Uint32, 3 }, @@ -5812,7 +5847,24 @@ namespace dxvk { } } - return emitValueLoad(emitGetOperandPtr(reg)); + DxbcRegisterValue value = emitValueLoad(emitGetOperandPtr(reg)); + + // Pad icb values to a vec4 since the app may access components that are always 0 + if (reg.type == DxbcOperandType::ImmediateConstantBuffer && value.type.ccount < 4u) { + DxbcVectorType zeroType; + zeroType.ctype = value.type.ctype; + zeroType.ccount = 4u - value.type.ccount; + + uint32_t zeroVector = emitBuildZeroVector(zeroType).id; + + std::array constituents = { value.id, zeroVector }; + + value.type.ccount = 4u; + value.id = m_module.opCompositeConstruct(getVectorTypeId(value.type), + constituents.size(), constituents.data()); + } + + return value; } diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index 434571888..ed63f4ba7 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -503,8 +503,11 @@ namespace dxvk { ////////////////////////////////////////////////// // Immediate constant buffer. If defined, this is // an array of four-component uint32 vectors. - uint32_t m_immConstBuf = 0; - std::vector m_immConstData; + uint32_t m_icbArray = 0; + std::vector m_icbData; + + uint32_t m_icbComponents = 0u; + uint32_t m_icbSize = 0u; /////////////////////////////////////////////////// // Sample pos array. If defined, this iis an array @@ -650,11 +653,13 @@ namespace dxvk { void emitDclImmediateConstantBufferBaked( uint32_t dwordCount, - const uint32_t* dwordArray); + const uint32_t* dwordArray, + uint32_t componentCount); void emitDclImmediateConstantBufferUbo( uint32_t dwordCount, - const uint32_t* dwordArray); + const uint32_t* dwordArray, + uint32_t componentCount); void emitCustomData( const DxbcShaderInstruction& ins);