1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-19 05:52:11 +01:00

[dxvk] Re-implement early discard with quad granularity

May perform better on some hardware in situations where we cannot
discard a full subgroup. Closes #753.
This commit is contained in:
Philip Rebohle 2019-04-30 14:42:21 +02:00
parent 4dd68987d6
commit 3b1e753bb5
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
2 changed files with 32 additions and 19 deletions

View File

@ -3983,15 +3983,33 @@ namespace dxvk {
m_module.constu32(spv::ScopeSubgroup), m_module.constu32(spv::ScopeSubgroup),
killState); killState);
uint32_t invocationMask = m_module.opLoad( uint32_t laneId = m_module.opLoad(
getVectorTypeId({ DxbcScalarType::Uint32, 4 }), getScalarTypeId(DxbcScalarType::Uint32),
m_ps.invocationMask); m_ps.builtinLaneId);
uint32_t killSubgroup = m_module.opAll( uint32_t laneIdPart = m_module.opShiftRightLogical(
getScalarTypeId(DxbcScalarType::Uint32),
laneId, m_module.constu32(5));
uint32_t laneMask = m_module.opVectorExtractDynamic(
getScalarTypeId(DxbcScalarType::Uint32),
ballot, laneIdPart);
uint32_t laneIdQuad = m_module.opBitwiseAnd(
getScalarTypeId(DxbcScalarType::Uint32),
laneId, m_module.constu32(0x1c));
laneMask = m_module.opShiftRightLogical(
getScalarTypeId(DxbcScalarType::Uint32),
laneMask, laneIdQuad);
laneMask = m_module.opBitwiseAnd(
getScalarTypeId(DxbcScalarType::Uint32),
laneMask, m_module.constu32(0xf));
uint32_t killSubgroup = m_module.opIEqual(
m_module.defBoolType(), m_module.defBoolType(),
m_module.opIEqual( laneMask, m_module.constu32(0xf));
m_module.defVectorType(m_module.defBoolType(), 4),
ballot, invocationMask));
DxbcConditional cond; DxbcConditional cond;
cond.labelIf = m_module.allocateId(); cond.labelIf = m_module.allocateId();
@ -6544,18 +6562,13 @@ namespace dxvk {
m_module.enableCapability(spv::CapabilityGroupNonUniform); m_module.enableCapability(spv::CapabilityGroupNonUniform);
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot); m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
DxbcRegisterInfo invocationMask; DxbcRegisterInfo laneId;
invocationMask.type = { DxbcScalarType::Uint32, 4, 0 }; laneId.type = { DxbcScalarType::Uint32, 1, 0 };
invocationMask.sclass = spv::StorageClassFunction; laneId.sclass = spv::StorageClassInput;
m_ps.invocationMask = emitNewVariable(invocationMask); m_ps.builtinLaneId = emitNewBuiltinVariable(
m_module.setDebugName(m_ps.invocationMask, "fInvocationMask"); laneId, spv::BuiltInSubgroupLocalInvocationId,
"fLaneId");
m_module.opStore(m_ps.invocationMask,
m_module.opGroupNonUniformBallot(
getVectorTypeId({ DxbcScalarType::Uint32, 4 }),
m_module.constu32(spv::ScopeSubgroup),
m_module.constBool(true)));
} }
} }
} }

View File

@ -178,7 +178,7 @@ namespace dxvk {
uint32_t builtinLayer = 0; uint32_t builtinLayer = 0;
uint32_t builtinViewportId = 0; uint32_t builtinViewportId = 0;
uint32_t invocationMask = 0; uint32_t builtinLaneId = 0;
uint32_t killState = 0; uint32_t killState = 0;
}; };