1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-19 05:52:11 +01:00

[dxso] Remove fallback path for discards

This commit is contained in:
Philip Rebohle 2022-08-17 17:44:10 +02:00 committed by Joshie
parent 165648017e
commit 2d92679cd1
4 changed files with 9 additions and 130 deletions

View File

@ -510,33 +510,11 @@ namespace dxvk {
m_module.defVoidType(), 0, nullptr));
this->emitFunctionLabel();
// We may have to defer kill operations to the end of
// the shader in order to keep derivatives correct.
if (m_analysis->usesKill && m_moduleInfo.options.useDemoteToHelperInvocation) {
if (m_analysis->usesKill) {
// This extension basically implements D3D-style discard
m_module.enableExtension("SPV_EXT_demote_to_helper_invocation");
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocationEXT);
}
else if (m_analysis->usesKill && m_analysis->usesDerivatives) {
m_ps.killState = m_module.newVarInit(
m_module.defPointerType(m_module.defBoolType(), spv::StorageClassPrivate),
spv::StorageClassPrivate, m_module.constBool(false));
m_module.setDebugName(m_ps.killState, "ps_kill");
if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
m_module.enableCapability(spv::CapabilityGroupNonUniform);
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
DxsoRegisterInfo laneId;
laneId.type = { DxsoScalarType::Uint32, 1, 0 };
laneId.sclass = spv::StorageClassInput;
m_ps.builtinLaneId = emitNewBuiltinVariable(
laneId, spv::BuiltInSubgroupLocalInvocationId,
"fLaneId", 0);
}
}
}
@ -3105,79 +3083,17 @@ void DxsoCompiler::emitControlFlowGenericLoop(
if (texReg.type.ccount != 1)
result = m_module.opAny(m_module.defBoolType(), result);
if (m_ps.killState == 0) {
uint32_t labelIf = m_module.allocateId();
uint32_t labelEnd = m_module.allocateId();
uint32_t labelIf = m_module.allocateId();
uint32_t labelEnd = m_module.allocateId();
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
m_module.opBranchConditional(result, labelIf, labelEnd);
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
m_module.opBranchConditional(result, labelIf, labelEnd);
m_module.opLabel(labelIf);
m_module.opLabel(labelIf);
m_module.opDemoteToHelperInvocation();
m_module.opBranch(labelEnd);
if (m_moduleInfo.options.useDemoteToHelperInvocation) {
m_module.opDemoteToHelperInvocation();
m_module.opBranch(labelEnd);
} else {
// OpKill terminates the block
m_module.opKill();
}
m_module.opLabel(labelEnd);
}
else {
uint32_t typeId = m_module.defBoolType();
uint32_t killState = m_module.opLoad (typeId, m_ps.killState);
killState = m_module.opLogicalOr(typeId, killState, result);
m_module.opStore(m_ps.killState, killState);
if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
uint32_t ballot = m_module.opGroupNonUniformBallot(
getVectorTypeId({ DxsoScalarType::Uint32, 4 }),
m_module.constu32(spv::ScopeSubgroup),
killState);
uint32_t laneId = m_module.opLoad(
getScalarTypeId(DxsoScalarType::Uint32),
m_ps.builtinLaneId);
uint32_t laneIdPart = m_module.opShiftRightLogical(
getScalarTypeId(DxsoScalarType::Uint32),
laneId, m_module.constu32(5));
uint32_t laneMask = m_module.opVectorExtractDynamic(
getScalarTypeId(DxsoScalarType::Uint32),
ballot, laneIdPart);
uint32_t laneIdQuad = m_module.opBitwiseAnd(
getScalarTypeId(DxsoScalarType::Uint32),
laneId, m_module.constu32(0x1c));
laneMask = m_module.opShiftRightLogical(
getScalarTypeId(DxsoScalarType::Uint32),
laneMask, laneIdQuad);
laneMask = m_module.opBitwiseAnd(
getScalarTypeId(DxsoScalarType::Uint32),
laneMask, m_module.constu32(0xf));
uint32_t killSubgroup = m_module.opIEqual(
m_module.defBoolType(),
laneMask, m_module.constu32(0xf));
uint32_t labelIf = m_module.allocateId();
uint32_t labelEnd = m_module.allocateId();
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
m_module.opBranchConditional(killSubgroup, labelIf, labelEnd);
// OpKill terminates the block
m_module.opLabel(labelIf);
m_module.opKill();
m_module.opLabel(labelEnd);
}
}
m_module.opLabel(labelEnd);
}
void DxsoCompiler::emitTextureDepth(const DxsoInstructionContext& ctx) {
@ -3800,21 +3716,6 @@ void DxsoCompiler::emitControlFlowGenericLoop(
m_module.defVoidType(),
m_ps.functionId, 0, nullptr);
if (m_ps.killState != 0) {
uint32_t labelIf = m_module.allocateId();
uint32_t labelEnd = m_module.allocateId();
uint32_t killTest = m_module.opLoad(m_module.defBoolType(), m_ps.killState);
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
m_module.opBranchConditional(killTest, labelIf, labelEnd);
m_module.opLabel(labelIf);
m_module.opKill();
m_module.opLabel(labelEnd);
}
// r0 in PS1 is the colour output register. Move r0 -> cO0 here.
if (m_programInfo.majorVersion() == 1
&& m_programInfo.type() == DxsoProgramTypes::PixelShader) {

View File

@ -173,9 +173,6 @@ namespace dxvk {
// Shared State
uint32_t sharedState = 0;
uint32_t killState = 0;
uint32_t builtinLaneId = 0;
uint32_t flatShadingMask = 0;
};

View File

@ -14,18 +14,6 @@ namespace dxvk {
const DxvkDeviceFeatures& devFeatures = device->features();
const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt();
useDemoteToHelperInvocation
= (devFeatures.vk13.shaderDemoteToHelperInvocation);
useSubgroupOpsForEarlyDiscard
= (devInfo.vk11.subgroupSize >= 4)
&& (devInfo.vk11.subgroupSupportedStages & VK_SHADER_STAGE_FRAGMENT_BIT)
&& (devInfo.vk11.subgroupSupportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT);
// Disable early discard on Nvidia because it may hurt performance
if (adapter->matchesDriver(VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0))
useSubgroupOpsForEarlyDiscard = false;
// Apply shader-related options
strictConstantCopies = options.strictConstantCopies;

View File

@ -12,13 +12,6 @@ namespace dxvk {
DxsoOptions();
DxsoOptions(D3D9DeviceEx* pDevice, const D3D9Options& options);
/// Use a SPIR-V extension to implement D3D-style discards
bool useDemoteToHelperInvocation = false;
/// Use subgroup operations to discard fragment
/// shader invocations if derivatives remain valid.
bool useSubgroupOpsForEarlyDiscard = false;
/// True: Copy our constant set into UBO if we are relative indexing ever.
/// False: Copy our constant set into UBO if we are relative indexing at the start of a defined constant
/// Why?: In theory, FXC should never generate code where this would be an issue.