mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-01-19 05:52:11 +01:00
[dxvk] Re-implement early discard with quad granularity
May perform better on some hardware in situations where we cannot discard a full subgroup. Closes #753.
This commit is contained in:
parent
4dd68987d6
commit
3b1e753bb5
@ -3983,15 +3983,33 @@ namespace dxvk {
|
|||||||
m_module.constu32(spv::ScopeSubgroup),
|
m_module.constu32(spv::ScopeSubgroup),
|
||||||
killState);
|
killState);
|
||||||
|
|
||||||
uint32_t invocationMask = m_module.opLoad(
|
uint32_t laneId = m_module.opLoad(
|
||||||
getVectorTypeId({ DxbcScalarType::Uint32, 4 }),
|
getScalarTypeId(DxbcScalarType::Uint32),
|
||||||
m_ps.invocationMask);
|
m_ps.builtinLaneId);
|
||||||
|
|
||||||
uint32_t killSubgroup = m_module.opAll(
|
uint32_t laneIdPart = m_module.opShiftRightLogical(
|
||||||
|
getScalarTypeId(DxbcScalarType::Uint32),
|
||||||
|
laneId, m_module.constu32(5));
|
||||||
|
|
||||||
|
uint32_t laneMask = m_module.opVectorExtractDynamic(
|
||||||
|
getScalarTypeId(DxbcScalarType::Uint32),
|
||||||
|
ballot, laneIdPart);
|
||||||
|
|
||||||
|
uint32_t laneIdQuad = m_module.opBitwiseAnd(
|
||||||
|
getScalarTypeId(DxbcScalarType::Uint32),
|
||||||
|
laneId, m_module.constu32(0x1c));
|
||||||
|
|
||||||
|
laneMask = m_module.opShiftRightLogical(
|
||||||
|
getScalarTypeId(DxbcScalarType::Uint32),
|
||||||
|
laneMask, laneIdQuad);
|
||||||
|
|
||||||
|
laneMask = m_module.opBitwiseAnd(
|
||||||
|
getScalarTypeId(DxbcScalarType::Uint32),
|
||||||
|
laneMask, m_module.constu32(0xf));
|
||||||
|
|
||||||
|
uint32_t killSubgroup = m_module.opIEqual(
|
||||||
m_module.defBoolType(),
|
m_module.defBoolType(),
|
||||||
m_module.opIEqual(
|
laneMask, m_module.constu32(0xf));
|
||||||
m_module.defVectorType(m_module.defBoolType(), 4),
|
|
||||||
ballot, invocationMask));
|
|
||||||
|
|
||||||
DxbcConditional cond;
|
DxbcConditional cond;
|
||||||
cond.labelIf = m_module.allocateId();
|
cond.labelIf = m_module.allocateId();
|
||||||
@ -6544,18 +6562,13 @@ namespace dxvk {
|
|||||||
m_module.enableCapability(spv::CapabilityGroupNonUniform);
|
m_module.enableCapability(spv::CapabilityGroupNonUniform);
|
||||||
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
|
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
|
||||||
|
|
||||||
DxbcRegisterInfo invocationMask;
|
DxbcRegisterInfo laneId;
|
||||||
invocationMask.type = { DxbcScalarType::Uint32, 4, 0 };
|
laneId.type = { DxbcScalarType::Uint32, 1, 0 };
|
||||||
invocationMask.sclass = spv::StorageClassFunction;
|
laneId.sclass = spv::StorageClassInput;
|
||||||
|
|
||||||
m_ps.invocationMask = emitNewVariable(invocationMask);
|
m_ps.builtinLaneId = emitNewBuiltinVariable(
|
||||||
m_module.setDebugName(m_ps.invocationMask, "fInvocationMask");
|
laneId, spv::BuiltInSubgroupLocalInvocationId,
|
||||||
|
"fLaneId");
|
||||||
m_module.opStore(m_ps.invocationMask,
|
|
||||||
m_module.opGroupNonUniformBallot(
|
|
||||||
getVectorTypeId({ DxbcScalarType::Uint32, 4 }),
|
|
||||||
m_module.constu32(spv::ScopeSubgroup),
|
|
||||||
m_module.constBool(true)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -178,7 +178,7 @@ namespace dxvk {
|
|||||||
uint32_t builtinLayer = 0;
|
uint32_t builtinLayer = 0;
|
||||||
uint32_t builtinViewportId = 0;
|
uint32_t builtinViewportId = 0;
|
||||||
|
|
||||||
uint32_t invocationMask = 0;
|
uint32_t builtinLaneId = 0;
|
||||||
uint32_t killState = 0;
|
uint32_t killState = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user