1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-27 13:54:16 +01:00

[dxbc] Rework embedded immediate constant buffers

Considerably reduces the size of immediate constant buffer arrays when
not all vector components are used. Also adds bound-checking.
This commit is contained in:
Philip Rebohle 2025-02-25 23:25:37 +01:00 committed by Philip Rebohle
parent 31a4679960
commit 31192b6d3f
2 changed files with 108 additions and 51 deletions

View File

@ -241,8 +241,8 @@ namespace dxvk {
info.bindings = m_bindings.data();
info.inputMask = m_inputMask;
info.outputMask = m_outputMask;
info.uniformSize = m_immConstData.size();
info.uniformData = m_immConstData.data();
info.uniformSize = m_icbData.size();
info.uniformData = m_icbData.data();
info.pushConstStages = VK_SHADER_STAGE_FRAGMENT_BIT;
info.pushConstSize = sizeof(DxbcPushConstants);
info.outputTopology = m_outputTopology;
@ -1491,77 +1491,108 @@ namespace dxvk {
void DxbcCompiler::emitDclImmediateConstantBuffer(const DxbcShaderInstruction& ins) {
if (m_immConstBuf != 0)
if (m_icbArray)
throw DxvkError("DxbcCompiler: Immediate constant buffer already declared");
if ((ins.customDataSize & 0x3) != 0)
throw DxvkError("DxbcCompiler: Immediate constant buffer size not a multiple of four DWORDs");
if (ins.customDataSize <= Icb_MaxBakedDwords) {
// A lot of the time we'll be dealing with a scalar or vec2
// array here, there's no reason to emit all those zeroes.
uint32_t componentCount = 1u;
for (uint32_t i = 0; i < ins.customDataSize; i += 4u) {
for (uint32_t c = componentCount; c < 4u; c++) {
if (ins.customData[i + c])
componentCount = c + 1u;
}
if (componentCount == 4u)
break;
}
uint32_t vectorCount = (ins.customDataSize / 4u);
uint32_t dwordCount = vectorCount * componentCount;
if (dwordCount <= Icb_MaxBakedDwords) {
this->emitDclImmediateConstantBufferBaked(
ins.customDataSize, ins.customData);
ins.customDataSize, ins.customData, componentCount);
} else {
this->emitDclImmediateConstantBufferUbo(
ins.customDataSize, ins.customData);
ins.customDataSize, ins.customData, componentCount);
}
}
void DxbcCompiler::emitDclImmediateConstantBufferBaked(
uint32_t dwordCount,
const uint32_t* dwordArray) {
const uint32_t* dwordArray,
uint32_t componentCount) {
// Declare individual vector constants as 4x32-bit vectors
std::array<uint32_t, 4096> vectorIds;
small_vector<uint32_t, Icb_MaxBakedDwords> vectorIds;
DxbcVectorType vecType;
vecType.ctype = DxbcScalarType::Uint32;
vecType.ccount = 4;
vecType.ccount = componentCount;
const uint32_t vectorTypeId = getVectorTypeId(vecType);
const uint32_t vectorCount = dwordCount / 4;
uint32_t vectorTypeId = getVectorTypeId(vecType);
for (uint32_t i = 0; i < vectorCount; i++) {
std::array<uint32_t, 4> scalarIds = {
m_module.constu32(dwordArray[4 * i + 0]),
m_module.constu32(dwordArray[4 * i + 1]),
m_module.constu32(dwordArray[4 * i + 2]),
m_module.constu32(dwordArray[4 * i + 3]),
};
vectorIds.at(i) = m_module.constComposite(
vectorTypeId, scalarIds.size(), scalarIds.data());
for (uint32_t i = 0; i < dwordCount; i += 4u) {
std::array<uint32_t, 4> scalarIds = { };
for (uint32_t c = 0; c < componentCount; c++)
scalarIds[c] = m_module.constu32(dwordArray[i + c]);
uint32_t id = scalarIds[0];
if (componentCount > 1u)
id = m_module.constComposite(vectorTypeId, componentCount, scalarIds.data());
vectorIds.push_back(id);
}
// Pad array with one entry of zeroes so that we can
// handle out-of-bounds accesses more conveniently.
vectorIds.push_back(emitBuildZeroVector(vecType).id);
// Declare the array that contains all the vectors
DxbcArrayType arrInfo;
arrInfo.ctype = DxbcScalarType::Uint32;
arrInfo.ccount = 4;
arrInfo.alength = vectorCount;
const uint32_t arrayTypeId = getArrayTypeId(arrInfo);
const uint32_t arrayId = m_module.constComposite(
arrayTypeId, vectorCount, vectorIds.data());
arrInfo.ccount = componentCount;
arrInfo.alength = vectorIds.size();
uint32_t arrayTypeId = getArrayTypeId(arrInfo);
uint32_t arrayId = m_module.constComposite(
arrayTypeId, vectorIds.size(), vectorIds.data());
// Declare the variable that will hold the constant
// data and initialize it with the constant array.
const uint32_t pointerTypeId = m_module.defPointerType(
uint32_t pointerTypeId = m_module.defPointerType(
arrayTypeId, spv::StorageClassPrivate);
m_immConstBuf = m_module.newVarInit(
m_icbArray = m_module.newVarInit(
pointerTypeId, spv::StorageClassPrivate,
arrayId);
m_module.setDebugName(m_immConstBuf, "icb");
m_module.decorate(m_immConstBuf, spv::DecorationNonWritable);
m_module.setDebugName(m_icbArray, "icb");
m_module.decorate(m_icbArray, spv::DecorationNonWritable);
m_icbComponents = componentCount;
m_icbSize = dwordCount / 4u;
}
void DxbcCompiler::emitDclImmediateConstantBufferUbo(
uint32_t dwordCount,
const uint32_t* dwordArray) {
const uint32_t* dwordArray,
uint32_t componentCount) {
this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb");
m_immConstData.resize(dwordCount * sizeof(uint32_t));
std::memcpy(m_immConstData.data(), dwordArray, m_immConstData.size());
m_icbData.resize(dwordCount * sizeof(uint32_t));
std::memcpy(m_icbData.data(), dwordArray, m_icbData.size());
m_icbComponents = 4u;
m_icbSize = dwordCount / 4u;
}
@ -5282,13 +5313,17 @@ namespace dxvk {
DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr(
const DxbcRegister& operand) {
const DxbcRegisterValue constId
= emitIndexLoad(operand.idx[0]);
if (m_immConstBuf != 0) {
DxbcRegisterValue constId = emitIndexLoad(operand.idx[0]);
if (m_icbArray) {
// We pad the icb array with an extra zero vector, so we can
// clamp the index and get correct robustness behaviour.
constId.id = m_module.opUMin(getVectorTypeId(constId.type),
constId.id, m_module.constu32(m_icbSize));
DxbcRegisterInfo ptrInfo;
ptrInfo.type.ctype = DxbcScalarType::Uint32;
ptrInfo.type.ccount = 4;
ptrInfo.type.ccount = m_icbComponents;
ptrInfo.type.alength = 0;
ptrInfo.sclass = spv::StorageClassPrivate;
@ -5297,7 +5332,7 @@ namespace dxvk {
result.type.ccount = ptrInfo.type.ccount;
result.id = m_module.opAccessChain(
getPointerTypeId(ptrInfo),
m_immConstBuf, 1, &constId.id);
m_icbArray, 1, &constId.id);
return result;
} else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) {
const std::array<uint32_t, 2> indices =
@ -5305,7 +5340,7 @@ namespace dxvk {
DxbcRegisterInfo ptrInfo;
ptrInfo.type.ctype = DxbcScalarType::Float32;
ptrInfo.type.ccount = 4;
ptrInfo.type.ccount = m_icbComponents;
ptrInfo.type.alength = 0;
ptrInfo.sclass = spv::StorageClassUniform;
@ -5343,7 +5378,7 @@ namespace dxvk {
case DxbcOperandType::ImmediateConstantBuffer:
return emitGetImmConstBufPtr(operand);
case DxbcOperandType::InputThreadId:
return DxbcRegisterPointer {
{ DxbcScalarType::Uint32, 3 },
@ -5812,7 +5847,24 @@ namespace dxvk {
}
}
return emitValueLoad(emitGetOperandPtr(reg));
DxbcRegisterValue value = emitValueLoad(emitGetOperandPtr(reg));
// Pad icb values to a vec4 since the app may access components that are always 0
if (reg.type == DxbcOperandType::ImmediateConstantBuffer && value.type.ccount < 4u) {
DxbcVectorType zeroType;
zeroType.ctype = value.type.ctype;
zeroType.ccount = 4u - value.type.ccount;
uint32_t zeroVector = emitBuildZeroVector(zeroType).id;
std::array<uint32_t, 2> constituents = { value.id, zeroVector };
value.type.ccount = 4u;
value.id = m_module.opCompositeConstruct(getVectorTypeId(value.type),
constituents.size(), constituents.data());
}
return value;
}

View File

@ -503,8 +503,11 @@ namespace dxvk {
//////////////////////////////////////////////////
// Immediate constant buffer. If defined, this is
// an array of four-component uint32 vectors.
uint32_t m_immConstBuf = 0;
std::vector<char> m_immConstData;
uint32_t m_icbArray = 0;
std::vector<char> m_icbData;
uint32_t m_icbComponents = 0u;
uint32_t m_icbSize = 0u;
///////////////////////////////////////////////////
// Sample pos array. If defined, this iis an array
@ -650,11 +653,13 @@ namespace dxvk {
void emitDclImmediateConstantBufferBaked(
uint32_t dwordCount,
const uint32_t* dwordArray);
const uint32_t* dwordArray,
uint32_t componentCount);
void emitDclImmediateConstantBufferUbo(
uint32_t dwordCount,
const uint32_t* dwordArray);
const uint32_t* dwordArray,
uint32_t componentCount);
void emitCustomData(
const DxbcShaderInstruction& ins);