1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-19 05:52:11 +01:00

[dxbc] Use subgroup operations for atomic append/consume operations

Reduces the number of atomic operations performed per subgroup to 1.
This commit is contained in:
Philip Rebohle 2019-05-15 19:32:27 +02:00
parent dfa3caa946
commit 7d9a75c82c
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99

View File

@ -2378,7 +2378,51 @@ namespace dxvk {
m_module.opBranchConditional(writeTest, cond.labelIf, cond.labelEnd);
m_module.opLabel(cond.labelIf);
// In case we have subgroup ops enabled, we need to
// count the number of active lanes, the lane index,
// and we need to perform the atomic op conditionally
uint32_t laneCount = 0;
uint32_t laneIndex = 0;
DxbcConditional elect;
if (m_moduleInfo.options.useSubgroupOpsForAtomicCounters) {
m_module.enableCapability(spv::CapabilityGroupNonUniform);
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
uint32_t ballot = m_module.opGroupNonUniformBallot(
getVectorTypeId({ DxbcScalarType::Uint32, 4 }),
m_module.constu32(spv::ScopeSubgroup),
m_module.constBool(true));
laneCount = m_module.opGroupNonUniformBallotBitCount(
getScalarTypeId(DxbcScalarType::Uint32),
m_module.constu32(spv::ScopeSubgroup),
spv::GroupOperationReduce, ballot);
laneIndex = m_module.opGroupNonUniformBallotBitCount(
getScalarTypeId(DxbcScalarType::Uint32),
m_module.constu32(spv::ScopeSubgroup),
spv::GroupOperationExclusiveScan, ballot);
// Elect one lane to perform the atomic op
uint32_t election = m_module.opGroupNonUniformElect(
m_module.defBoolType(),
m_module.constu32(spv::ScopeSubgroup));
elect.labelIf = m_module.allocateId();
elect.labelEnd = m_module.allocateId();
m_module.opSelectionMerge(elect.labelEnd, spv::SelectionControlMaskNone);
m_module.opBranchConditional(election, elect.labelIf, elect.labelEnd);
m_module.opLabel(elect.labelIf);
} else {
// We're going to use this for the increment
laneCount = m_module.constu32(1);
}
// Get a pointer to the atomic counter in question
DxbcRegisterInfo ptrType;
ptrType.type.ctype = DxbcScalarType::Uint32;
@ -2386,8 +2430,8 @@ namespace dxvk {
ptrType.type.alength = 0;
ptrType.sclass = spv::StorageClassUniform;
const uint32_t zeroId = m_module.consti32(0);
const uint32_t ptrId = m_module.opAccessChain(
uint32_t zeroId = m_module.consti32(0);
uint32_t ptrId = m_module.opAccessChain(
getPointerTypeId(ptrType),
m_uavs.at(registerId).ctrId,
1, &zeroId);
@ -2397,27 +2441,26 @@ namespace dxvk {
uint32_t semantics = spv::MemorySemanticsUniformMemoryMask
| spv::MemorySemanticsAcquireReleaseMask;
const uint32_t scopeId = m_module.constu32(scope);
const uint32_t semanticsId = m_module.constu32(semantics);
uint32_t scopeId = m_module.constu32(scope);
uint32_t semanticsId = m_module.constu32(semantics);
// Compute the result value
DxbcRegisterValue value;
value.type.ctype = DxbcScalarType::Uint32;
value.type.ccount = 1;
const uint32_t typeId = getVectorTypeId(value.type);
uint32_t typeId = getVectorTypeId(value.type);
switch (ins.op) {
case DxbcOpcode::ImmAtomicAlloc:
value.id = m_module.opAtomicIAdd(typeId, ptrId,
scopeId, semanticsId, m_module.constu32(1));
scopeId, semanticsId, laneCount);
break;
case DxbcOpcode::ImmAtomicConsume:
value.id = m_module.opAtomicISub(typeId, ptrId,
scopeId, semanticsId, m_module.constu32(1));
value.id = m_module.opISub(typeId, value.id,
m_module.constu32(1));
scopeId, semanticsId, laneCount);
value.id = m_module.opISub(typeId, value.id, laneCount);
break;
default:
@ -2426,6 +2469,26 @@ namespace dxvk {
ins.op));
return;
}
// If we're using subgroup ops, we have to broadcast
// the result of the atomic op and compute the index
if (m_moduleInfo.options.useSubgroupOpsForAtomicCounters) {
m_module.opBranch(elect.labelEnd);
m_module.opLabel (elect.labelEnd);
uint32_t undef = m_module.constUndef(typeId);
std::array<SpirvPhiLabel, 2> phiLabels = {{
{ value.id, elect.labelIf },
{ undef, cond.labelIf },
}};
value.id = m_module.opPhi(typeId,
phiLabels.size(), phiLabels.data());
value.id = m_module.opGroupNonUniformBroadcastFirst(typeId,
m_module.constu32(spv::ScopeSubgroup), value.id);
value.id = m_module.opIAdd(typeId, value.id, laneIndex);
}
// Store the result
emitRegisterStore(ins.dst[0], value);