mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-27 04:54:15 +01:00
[dxbc] Rework embedded immediate constant buffers
Considerably reduces the size of immediate constant buffer arrays when not all vector components are used. Also adds bound-checking.
This commit is contained in:
parent
31a4679960
commit
31192b6d3f
@ -241,8 +241,8 @@ namespace dxvk {
|
||||
info.bindings = m_bindings.data();
|
||||
info.inputMask = m_inputMask;
|
||||
info.outputMask = m_outputMask;
|
||||
info.uniformSize = m_immConstData.size();
|
||||
info.uniformData = m_immConstData.data();
|
||||
info.uniformSize = m_icbData.size();
|
||||
info.uniformData = m_icbData.data();
|
||||
info.pushConstStages = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
info.pushConstSize = sizeof(DxbcPushConstants);
|
||||
info.outputTopology = m_outputTopology;
|
||||
@ -1491,77 +1491,108 @@ namespace dxvk {
|
||||
|
||||
|
||||
void DxbcCompiler::emitDclImmediateConstantBuffer(const DxbcShaderInstruction& ins) {
|
||||
if (m_immConstBuf != 0)
|
||||
if (m_icbArray)
|
||||
throw DxvkError("DxbcCompiler: Immediate constant buffer already declared");
|
||||
|
||||
if ((ins.customDataSize & 0x3) != 0)
|
||||
throw DxvkError("DxbcCompiler: Immediate constant buffer size not a multiple of four DWORDs");
|
||||
|
||||
if (ins.customDataSize <= Icb_MaxBakedDwords) {
|
||||
|
||||
// A lot of the time we'll be dealing with a scalar or vec2
|
||||
// array here, there's no reason to emit all those zeroes.
|
||||
uint32_t componentCount = 1u;
|
||||
|
||||
for (uint32_t i = 0; i < ins.customDataSize; i += 4u) {
|
||||
for (uint32_t c = componentCount; c < 4u; c++) {
|
||||
if (ins.customData[i + c])
|
||||
componentCount = c + 1u;
|
||||
}
|
||||
|
||||
if (componentCount == 4u)
|
||||
break;
|
||||
}
|
||||
|
||||
uint32_t vectorCount = (ins.customDataSize / 4u);
|
||||
uint32_t dwordCount = vectorCount * componentCount;
|
||||
|
||||
if (dwordCount <= Icb_MaxBakedDwords) {
|
||||
this->emitDclImmediateConstantBufferBaked(
|
||||
ins.customDataSize, ins.customData);
|
||||
ins.customDataSize, ins.customData, componentCount);
|
||||
} else {
|
||||
this->emitDclImmediateConstantBufferUbo(
|
||||
ins.customDataSize, ins.customData);
|
||||
ins.customDataSize, ins.customData, componentCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxbcCompiler::emitDclImmediateConstantBufferBaked(
|
||||
uint32_t dwordCount,
|
||||
const uint32_t* dwordArray) {
|
||||
const uint32_t* dwordArray,
|
||||
uint32_t componentCount) {
|
||||
// Declare individual vector constants as 4x32-bit vectors
|
||||
std::array<uint32_t, 4096> vectorIds;
|
||||
small_vector<uint32_t, Icb_MaxBakedDwords> vectorIds;
|
||||
|
||||
DxbcVectorType vecType;
|
||||
vecType.ctype = DxbcScalarType::Uint32;
|
||||
vecType.ccount = 4;
|
||||
vecType.ccount = componentCount;
|
||||
|
||||
const uint32_t vectorTypeId = getVectorTypeId(vecType);
|
||||
const uint32_t vectorCount = dwordCount / 4;
|
||||
uint32_t vectorTypeId = getVectorTypeId(vecType);
|
||||
|
||||
for (uint32_t i = 0; i < vectorCount; i++) {
|
||||
std::array<uint32_t, 4> scalarIds = {
|
||||
m_module.constu32(dwordArray[4 * i + 0]),
|
||||
m_module.constu32(dwordArray[4 * i + 1]),
|
||||
m_module.constu32(dwordArray[4 * i + 2]),
|
||||
m_module.constu32(dwordArray[4 * i + 3]),
|
||||
};
|
||||
|
||||
vectorIds.at(i) = m_module.constComposite(
|
||||
vectorTypeId, scalarIds.size(), scalarIds.data());
|
||||
for (uint32_t i = 0; i < dwordCount; i += 4u) {
|
||||
std::array<uint32_t, 4> scalarIds = { };
|
||||
|
||||
for (uint32_t c = 0; c < componentCount; c++)
|
||||
scalarIds[c] = m_module.constu32(dwordArray[i + c]);
|
||||
|
||||
uint32_t id = scalarIds[0];
|
||||
|
||||
if (componentCount > 1u)
|
||||
id = m_module.constComposite(vectorTypeId, componentCount, scalarIds.data());
|
||||
|
||||
vectorIds.push_back(id);
|
||||
}
|
||||
|
||||
|
||||
// Pad array with one entry of zeroes so that we can
|
||||
// handle out-of-bounds accesses more conveniently.
|
||||
vectorIds.push_back(emitBuildZeroVector(vecType).id);
|
||||
|
||||
// Declare the array that contains all the vectors
|
||||
DxbcArrayType arrInfo;
|
||||
arrInfo.ctype = DxbcScalarType::Uint32;
|
||||
arrInfo.ccount = 4;
|
||||
arrInfo.alength = vectorCount;
|
||||
|
||||
const uint32_t arrayTypeId = getArrayTypeId(arrInfo);
|
||||
const uint32_t arrayId = m_module.constComposite(
|
||||
arrayTypeId, vectorCount, vectorIds.data());
|
||||
|
||||
arrInfo.ccount = componentCount;
|
||||
arrInfo.alength = vectorIds.size();
|
||||
|
||||
uint32_t arrayTypeId = getArrayTypeId(arrInfo);
|
||||
uint32_t arrayId = m_module.constComposite(
|
||||
arrayTypeId, vectorIds.size(), vectorIds.data());
|
||||
|
||||
// Declare the variable that will hold the constant
|
||||
// data and initialize it with the constant array.
|
||||
const uint32_t pointerTypeId = m_module.defPointerType(
|
||||
uint32_t pointerTypeId = m_module.defPointerType(
|
||||
arrayTypeId, spv::StorageClassPrivate);
|
||||
|
||||
m_immConstBuf = m_module.newVarInit(
|
||||
|
||||
m_icbArray = m_module.newVarInit(
|
||||
pointerTypeId, spv::StorageClassPrivate,
|
||||
arrayId);
|
||||
|
||||
m_module.setDebugName(m_immConstBuf, "icb");
|
||||
m_module.decorate(m_immConstBuf, spv::DecorationNonWritable);
|
||||
m_module.setDebugName(m_icbArray, "icb");
|
||||
m_module.decorate(m_icbArray, spv::DecorationNonWritable);
|
||||
|
||||
m_icbComponents = componentCount;
|
||||
m_icbSize = dwordCount / 4u;
|
||||
}
|
||||
|
||||
|
||||
void DxbcCompiler::emitDclImmediateConstantBufferUbo(
|
||||
uint32_t dwordCount,
|
||||
const uint32_t* dwordArray) {
|
||||
const uint32_t* dwordArray,
|
||||
uint32_t componentCount) {
|
||||
this->emitDclConstantBufferVar(Icb_BindingSlotId, dwordCount / 4, "icb");
|
||||
m_immConstData.resize(dwordCount * sizeof(uint32_t));
|
||||
std::memcpy(m_immConstData.data(), dwordArray, m_immConstData.size());
|
||||
|
||||
m_icbData.resize(dwordCount * sizeof(uint32_t));
|
||||
std::memcpy(m_icbData.data(), dwordArray, m_icbData.size());
|
||||
|
||||
m_icbComponents = 4u;
|
||||
m_icbSize = dwordCount / 4u;
|
||||
}
|
||||
|
||||
|
||||
@ -5282,13 +5313,17 @@ namespace dxvk {
|
||||
|
||||
DxbcRegisterPointer DxbcCompiler::emitGetImmConstBufPtr(
|
||||
const DxbcRegister& operand) {
|
||||
const DxbcRegisterValue constId
|
||||
= emitIndexLoad(operand.idx[0]);
|
||||
|
||||
if (m_immConstBuf != 0) {
|
||||
DxbcRegisterValue constId = emitIndexLoad(operand.idx[0]);
|
||||
|
||||
if (m_icbArray) {
|
||||
// We pad the icb array with an extra zero vector, so we can
|
||||
// clamp the index and get correct robustness behaviour.
|
||||
constId.id = m_module.opUMin(getVectorTypeId(constId.type),
|
||||
constId.id, m_module.constu32(m_icbSize));
|
||||
|
||||
DxbcRegisterInfo ptrInfo;
|
||||
ptrInfo.type.ctype = DxbcScalarType::Uint32;
|
||||
ptrInfo.type.ccount = 4;
|
||||
ptrInfo.type.ccount = m_icbComponents;
|
||||
ptrInfo.type.alength = 0;
|
||||
ptrInfo.sclass = spv::StorageClassPrivate;
|
||||
|
||||
@ -5297,7 +5332,7 @@ namespace dxvk {
|
||||
result.type.ccount = ptrInfo.type.ccount;
|
||||
result.id = m_module.opAccessChain(
|
||||
getPointerTypeId(ptrInfo),
|
||||
m_immConstBuf, 1, &constId.id);
|
||||
m_icbArray, 1, &constId.id);
|
||||
return result;
|
||||
} else if (m_constantBuffers.at(Icb_BindingSlotId).varId != 0) {
|
||||
const std::array<uint32_t, 2> indices =
|
||||
@ -5305,7 +5340,7 @@ namespace dxvk {
|
||||
|
||||
DxbcRegisterInfo ptrInfo;
|
||||
ptrInfo.type.ctype = DxbcScalarType::Float32;
|
||||
ptrInfo.type.ccount = 4;
|
||||
ptrInfo.type.ccount = m_icbComponents;
|
||||
ptrInfo.type.alength = 0;
|
||||
ptrInfo.sclass = spv::StorageClassUniform;
|
||||
|
||||
@ -5343,7 +5378,7 @@ namespace dxvk {
|
||||
|
||||
case DxbcOperandType::ImmediateConstantBuffer:
|
||||
return emitGetImmConstBufPtr(operand);
|
||||
|
||||
|
||||
case DxbcOperandType::InputThreadId:
|
||||
return DxbcRegisterPointer {
|
||||
{ DxbcScalarType::Uint32, 3 },
|
||||
@ -5812,7 +5847,24 @@ namespace dxvk {
|
||||
}
|
||||
}
|
||||
|
||||
return emitValueLoad(emitGetOperandPtr(reg));
|
||||
DxbcRegisterValue value = emitValueLoad(emitGetOperandPtr(reg));
|
||||
|
||||
// Pad icb values to a vec4 since the app may access components that are always 0
|
||||
if (reg.type == DxbcOperandType::ImmediateConstantBuffer && value.type.ccount < 4u) {
|
||||
DxbcVectorType zeroType;
|
||||
zeroType.ctype = value.type.ctype;
|
||||
zeroType.ccount = 4u - value.type.ccount;
|
||||
|
||||
uint32_t zeroVector = emitBuildZeroVector(zeroType).id;
|
||||
|
||||
std::array<uint32_t, 2> constituents = { value.id, zeroVector };
|
||||
|
||||
value.type.ccount = 4u;
|
||||
value.id = m_module.opCompositeConstruct(getVectorTypeId(value.type),
|
||||
constituents.size(), constituents.data());
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
|
@ -503,8 +503,11 @@ namespace dxvk {
|
||||
//////////////////////////////////////////////////
|
||||
// Immediate constant buffer. If defined, this is
|
||||
// an array of four-component uint32 vectors.
|
||||
uint32_t m_immConstBuf = 0;
|
||||
std::vector<char> m_immConstData;
|
||||
uint32_t m_icbArray = 0;
|
||||
std::vector<char> m_icbData;
|
||||
|
||||
uint32_t m_icbComponents = 0u;
|
||||
uint32_t m_icbSize = 0u;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Sample pos array. If defined, this iis an array
|
||||
@ -650,11 +653,13 @@ namespace dxvk {
|
||||
|
||||
void emitDclImmediateConstantBufferBaked(
|
||||
uint32_t dwordCount,
|
||||
const uint32_t* dwordArray);
|
||||
const uint32_t* dwordArray,
|
||||
uint32_t componentCount);
|
||||
|
||||
void emitDclImmediateConstantBufferUbo(
|
||||
uint32_t dwordCount,
|
||||
const uint32_t* dwordArray);
|
||||
const uint32_t* dwordArray,
|
||||
uint32_t componentCount);
|
||||
|
||||
void emitCustomData(
|
||||
const DxbcShaderInstruction& ins);
|
||||
|
Loading…
x
Reference in New Issue
Block a user