From ad095deaa899121be8eac7925b8d3e4fde4ca3d6 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 27 Feb 2025 16:26:05 +0100 Subject: [PATCH 1/2] [dxbc] Refactor immediate constant buffer loads --- src/dxbc/dxbc_compiler.cpp | 77 +++++++++++++++++++------------------- src/dxbc/dxbc_compiler.h | 8 ++-- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 79126c8ad..41144c803 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -5326,50 +5326,66 @@ namespace dxvk { } - DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr( + DxbcRegisterValue DxbcCompiler::emitImmediateConstantBufferLoadRaw( const DxbcRegister& operand) { DxbcRegisterValue constId = emitIndexLoad(operand.idx[0]); + DxbcRegisterValue value = { }; + if (m_icbArray) { // We pad the icb array with an extra zero vector, so we can // clamp the index and get correct robustness behaviour. constId.id = m_module.opUMin(getVectorTypeId(constId.type), constId.id, m_module.constu32(m_icbSize)); - DxbcRegisterInfo ptrInfo; + DxbcRegisterInfo ptrInfo = { }; ptrInfo.type.ctype = DxbcScalarType::Uint32; ptrInfo.type.ccount = m_icbComponents; - ptrInfo.type.alength = 0; ptrInfo.sclass = spv::StorageClassPrivate; - DxbcRegisterPointer result; - result.type.ctype = ptrInfo.type.ctype; - result.type.ccount = ptrInfo.type.ccount; - result.id = m_module.opAccessChain( - getPointerTypeId(ptrInfo), - m_icbArray, 1, &constId.id); - return result; + uint32_t ptrId = m_module.opAccessChain( + getPointerTypeId(ptrInfo), m_icbArray, 1, &constId.id); + + value.type.ctype = ptrInfo.type.ctype; + value.type.ccount = ptrInfo.type.ccount; + value.id = m_module.opLoad(getVectorTypeId(value.type), ptrId); } else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) { const std::array indices = {{ m_module.consti32(0), constId.id }}; - DxbcRegisterInfo ptrInfo; + DxbcRegisterInfo ptrInfo = { }; ptrInfo.type.ctype = DxbcScalarType::Float32; ptrInfo.type.ccount = m_icbComponents; - ptrInfo.type.alength = 0; ptrInfo.sclass = spv::StorageClassUniform; - DxbcRegisterPointer result; - result.type.ctype = ptrInfo.type.ctype; - result.type.ccount = ptrInfo.type.ccount; - result.id = m_module.opAccessChain( - getPointerTypeId(ptrInfo), + uint32_t ptrId = m_module.opAccessChain(getPointerTypeId(ptrInfo), m_constantBuffers.at(Icb_BindingSlotId).varId, indices.size(), indices.data()); - return result; + + value.type.ctype = ptrInfo.type.ctype; + value.type.ccount = ptrInfo.type.ccount; + value.id = m_module.opLoad(getVectorTypeId(value.type), ptrId); } else { throw DxvkError("DxbcCompiler: Immediate constant buffer not defined"); } + + // Pad to vec4 since apps may want to access + // components that we optimized away + if (value.type.ccount < 4u) { + DxbcVectorType zeroType; + zeroType.ctype = value.type.ctype; + zeroType.ccount = 4u - value.type.ccount; + + uint32_t zeroVector = emitBuildZeroVector(zeroType).id; + + std::array constituents = { value.id, zeroVector }; + + value.type.ccount = 4u; + value.id = m_module.opCompositeConstruct(getVectorTypeId(value.type), + constituents.size(), constituents.data()); + } + + return value; } @@ -5391,9 +5407,6 @@ namespace dxvk { case DxbcOperandType::Output: return emitGetOutputPtr(operand); - case DxbcOperandType::ImmediateConstantBuffer: - return emitGetImmConstBufPtr(operand); - case DxbcOperandType::InputThreadId: return DxbcRegisterPointer { { DxbcScalarType::Uint32, 3 }, @@ -5796,6 +5809,9 @@ namespace dxvk { DxbcRegisterValue DxbcCompiler::emitRegisterLoadRaw( const DxbcRegister& reg) { + if (reg.type == DxbcOperandType::ImmediateConstantBuffer) + return emitImmediateConstantBufferLoadRaw(reg); + // Try to find index range for the given register const DxbcIndexRange* indexRange = nullptr; @@ -5862,24 +5878,7 @@ namespace dxvk { } } - DxbcRegisterValue value = emitValueLoad(emitGetOperandPtr(reg)); - - // Pad icb values to a vec4 since the app may access components that are always 0 - if (reg.type == DxbcOperandType::ImmediateConstantBuffer && value.type.ccount < 4u) { - DxbcVectorType zeroType; - zeroType.ctype = value.type.ctype; - zeroType.ccount = 4u - value.type.ccount; - - uint32_t zeroVector = emitBuildZeroVector(zeroType).id; - - std::array constituents = { value.id, zeroVector }; - - value.type.ccount = 4u; - value.id = m_module.opCompositeConstruct(getVectorTypeId(value.type), - constituents.size(), constituents.data()); - } - - return value; + return emitValueLoad(emitGetOperandPtr(reg)); } diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index 7f4595a73..9ed553237 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -968,9 +968,6 @@ namespace dxvk { DxbcRegisterPointer emitGetConstBufPtr( const DxbcRegister& operand); - DxbcRegisterPointer emitGetImmConstBufPtr( - const DxbcRegister& operand); - DxbcRegisterPointer emitGetOperandPtr( const DxbcRegister& operand); @@ -1027,13 +1024,16 @@ namespace dxvk { DxbcRegisterValue value, DxbcRegMask writeMask); + DxbcRegisterValue emitImmediateConstantBufferLoadRaw( + const DxbcRegister& reg); + DxbcRegisterValue emitRegisterLoadRaw( const DxbcRegister& reg); DxbcRegisterValue emitConstantBufferLoad( const DxbcRegister& reg, DxbcRegMask writeMask); - + DxbcRegisterValue emitRegisterLoad( const DxbcRegister& reg, DxbcRegMask writeMask); From 96c1e0cc3b53facbb14013e65368b6ad56d21305 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 27 Feb 2025 16:26:33 +0100 Subject: [PATCH 2/2] [dxbc] Promote matrix-like icb to constant vector --- src/dxbc/dxbc_compiler.cpp | 73 ++++++++++++++++++++++++++++++++++++++ src/dxbc/dxbc_compiler.h | 10 +++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 41144c803..1f8ba83f4 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -1512,6 +1512,11 @@ namespace dxvk { break; } + // Special case for the pattern where fxc emits a matrix + if (this->emitDclImmediateConstantBufferMatrix( + ins.customDataSize, ins.customData, componentCount)) + return; + uint32_t vectorCount = (ins.customDataSize / 4u); uint32_t dwordCount = vectorCount * componentCount; @@ -1611,6 +1616,59 @@ namespace dxvk { } + bool DxbcCompiler::emitDclImmediateConstantBufferMatrix( + uint32_t dwordCount, + const uint32_t* dwordArray, + uint32_t componentCount) { + // A very common pattern is for fxc to emit a matrix as an icb where each + // component only has a non-zero value in a single vector. Detect this + // pattern and emit it as a single constant vector instead, and implement + // dynamic indexing by selecting either the vector component or zero by + // comparing against a component map. + uint32_t vectorCount = dwordCount / 4u; + + if (vectorCount > componentCount) + return false; + + std::array componentMap = { 0u, 0u, 0u, 0u }; + std::array componentData = { 0u, 0u, 0u, 0u }; + + for (uint32_t v = 0; v < vectorCount; v++) { + for (uint32_t c = 0; c < componentCount; c++) { + uint32_t value = dwordArray[4u * v + c]; + + if (value && componentData[c]) + return false; + + if (value) { + componentData[c] = value; + componentMap[c] = v; + } + } + } + + uint32_t mapId = m_module.constvec4u32(componentMap[0], componentMap[1], componentMap[2], componentMap[3]); + uint32_t dataId = m_module.constvec4u32(componentData[0], componentData[1], componentData[2], componentData[3]); + + // Emit variables to make it more obvious what's going on + DxbcRegisterInfo varInfo = { }; + varInfo.type.ctype = DxbcScalarType::Uint32; + varInfo.type.ccount = 4u; + varInfo.sclass = spv::StorageClassPrivate; + + uint32_t ptrTypeId = this->getPointerTypeId(varInfo); + + m_icbMatrixMap = m_module.newVarInit(ptrTypeId, spv::StorageClassPrivate, mapId); + m_icbMatrixData = m_module.newVarInit(ptrTypeId, spv::StorageClassPrivate, dataId); + + m_module.setDebugName(m_icbMatrixMap, "icb_sel"); + m_module.setDebugName(m_icbMatrixData, "icb"); + + m_icbComponents = 4u; + return true; + } + + void DxbcCompiler::emitCustomData(const DxbcShaderInstruction& ins) { switch (ins.customDataType) { case DxbcCustomDataClass::ImmConstBuf: @@ -5349,6 +5407,21 @@ namespace dxvk { value.type.ctype = ptrInfo.type.ctype; value.type.ccount = ptrInfo.type.ccount; value.id = m_module.opLoad(getVectorTypeId(value.type), ptrId); + } else if (m_icbMatrixData) { + value.type.ctype = DxbcScalarType::Uint32; + value.type.ccount = m_icbComponents; + + uint32_t uintTypeId = getVectorTypeId(value.type); + uint32_t boolTypeId = getVectorTypeId({ DxbcScalarType::Bool, m_icbComponents }); + + uint32_t indexId = emitRegisterExtend(constId, m_icbComponents).id; + uint32_t mapId = m_module.opLoad(uintTypeId, m_icbMatrixMap); + + uint32_t selId = m_module.opIEqual(boolTypeId, indexId, mapId); + uint32_t icbId = m_module.opLoad(uintTypeId, m_icbMatrixData); + uint32_t zeroId = emitBuildZeroVector(value.type).id; + + value.id = m_module.opSelect(uintTypeId, selId, icbId, zeroId); } else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) { const std::array indices = {{ m_module.consti32(0), constId.id }}; diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index 9ed553237..8f22a7012 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -519,6 +519,9 @@ namespace dxvk { uint32_t m_icbComponents = 0u; uint32_t m_icbSize = 0u; + + uint32_t m_icbMatrixMap = 0u; + uint32_t m_icbMatrixData = 0u; /////////////////////////////////////////////////// // Sample pos array. If defined, this iis an array @@ -673,7 +676,12 @@ namespace dxvk { uint32_t dwordCount, const uint32_t* dwordArray, uint32_t componentCount); - + + bool emitDclImmediateConstantBufferMatrix( + uint32_t dwordCount, + const uint32_t* dwordArray, + uint32_t componentCount); + void emitCustomData( const DxbcShaderInstruction& ins);