mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-01-19 05:52:11 +01:00
[dxso] Remove fallback path for discards
This commit is contained in:
parent
165648017e
commit
2d92679cd1
@ -510,33 +510,11 @@ namespace dxvk {
|
||||
m_module.defVoidType(), 0, nullptr));
|
||||
this->emitFunctionLabel();
|
||||
|
||||
// We may have to defer kill operations to the end of
|
||||
// the shader in order to keep derivatives correct.
|
||||
if (m_analysis->usesKill && m_moduleInfo.options.useDemoteToHelperInvocation) {
|
||||
if (m_analysis->usesKill) {
|
||||
// This extension basically implements D3D-style discard
|
||||
m_module.enableExtension("SPV_EXT_demote_to_helper_invocation");
|
||||
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocationEXT);
|
||||
}
|
||||
else if (m_analysis->usesKill && m_analysis->usesDerivatives) {
|
||||
m_ps.killState = m_module.newVarInit(
|
||||
m_module.defPointerType(m_module.defBoolType(), spv::StorageClassPrivate),
|
||||
spv::StorageClassPrivate, m_module.constBool(false));
|
||||
|
||||
m_module.setDebugName(m_ps.killState, "ps_kill");
|
||||
|
||||
if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
|
||||
m_module.enableCapability(spv::CapabilityGroupNonUniform);
|
||||
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
|
||||
|
||||
DxsoRegisterInfo laneId;
|
||||
laneId.type = { DxsoScalarType::Uint32, 1, 0 };
|
||||
laneId.sclass = spv::StorageClassInput;
|
||||
|
||||
m_ps.builtinLaneId = emitNewBuiltinVariable(
|
||||
laneId, spv::BuiltInSubgroupLocalInvocationId,
|
||||
"fLaneId", 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -3105,7 +3083,6 @@ void DxsoCompiler::emitControlFlowGenericLoop(
|
||||
if (texReg.type.ccount != 1)
|
||||
result = m_module.opAny(m_module.defBoolType(), result);
|
||||
|
||||
if (m_ps.killState == 0) {
|
||||
uint32_t labelIf = m_module.allocateId();
|
||||
uint32_t labelEnd = m_module.allocateId();
|
||||
|
||||
@ -3113,72 +3090,11 @@ void DxsoCompiler::emitControlFlowGenericLoop(
|
||||
m_module.opBranchConditional(result, labelIf, labelEnd);
|
||||
|
||||
m_module.opLabel(labelIf);
|
||||
|
||||
if (m_moduleInfo.options.useDemoteToHelperInvocation) {
|
||||
m_module.opDemoteToHelperInvocation();
|
||||
m_module.opBranch(labelEnd);
|
||||
} else {
|
||||
// OpKill terminates the block
|
||||
m_module.opKill();
|
||||
}
|
||||
|
||||
m_module.opLabel(labelEnd);
|
||||
}
|
||||
else {
|
||||
uint32_t typeId = m_module.defBoolType();
|
||||
|
||||
uint32_t killState = m_module.opLoad (typeId, m_ps.killState);
|
||||
killState = m_module.opLogicalOr(typeId, killState, result);
|
||||
m_module.opStore(m_ps.killState, killState);
|
||||
|
||||
if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) {
|
||||
uint32_t ballot = m_module.opGroupNonUniformBallot(
|
||||
getVectorTypeId({ DxsoScalarType::Uint32, 4 }),
|
||||
m_module.constu32(spv::ScopeSubgroup),
|
||||
killState);
|
||||
|
||||
uint32_t laneId = m_module.opLoad(
|
||||
getScalarTypeId(DxsoScalarType::Uint32),
|
||||
m_ps.builtinLaneId);
|
||||
|
||||
uint32_t laneIdPart = m_module.opShiftRightLogical(
|
||||
getScalarTypeId(DxsoScalarType::Uint32),
|
||||
laneId, m_module.constu32(5));
|
||||
|
||||
uint32_t laneMask = m_module.opVectorExtractDynamic(
|
||||
getScalarTypeId(DxsoScalarType::Uint32),
|
||||
ballot, laneIdPart);
|
||||
|
||||
uint32_t laneIdQuad = m_module.opBitwiseAnd(
|
||||
getScalarTypeId(DxsoScalarType::Uint32),
|
||||
laneId, m_module.constu32(0x1c));
|
||||
|
||||
laneMask = m_module.opShiftRightLogical(
|
||||
getScalarTypeId(DxsoScalarType::Uint32),
|
||||
laneMask, laneIdQuad);
|
||||
|
||||
laneMask = m_module.opBitwiseAnd(
|
||||
getScalarTypeId(DxsoScalarType::Uint32),
|
||||
laneMask, m_module.constu32(0xf));
|
||||
|
||||
uint32_t killSubgroup = m_module.opIEqual(
|
||||
m_module.defBoolType(),
|
||||
laneMask, m_module.constu32(0xf));
|
||||
|
||||
uint32_t labelIf = m_module.allocateId();
|
||||
uint32_t labelEnd = m_module.allocateId();
|
||||
|
||||
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
|
||||
m_module.opBranchConditional(killSubgroup, labelIf, labelEnd);
|
||||
|
||||
// OpKill terminates the block
|
||||
m_module.opLabel(labelIf);
|
||||
m_module.opKill();
|
||||
|
||||
m_module.opLabel(labelEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DxsoCompiler::emitTextureDepth(const DxsoInstructionContext& ctx) {
|
||||
const uint32_t fType = m_module.defFloatType(32);
|
||||
@ -3800,21 +3716,6 @@ void DxsoCompiler::emitControlFlowGenericLoop(
|
||||
m_module.defVoidType(),
|
||||
m_ps.functionId, 0, nullptr);
|
||||
|
||||
if (m_ps.killState != 0) {
|
||||
uint32_t labelIf = m_module.allocateId();
|
||||
uint32_t labelEnd = m_module.allocateId();
|
||||
|
||||
uint32_t killTest = m_module.opLoad(m_module.defBoolType(), m_ps.killState);
|
||||
|
||||
m_module.opSelectionMerge(labelEnd, spv::SelectionControlMaskNone);
|
||||
m_module.opBranchConditional(killTest, labelIf, labelEnd);
|
||||
|
||||
m_module.opLabel(labelIf);
|
||||
m_module.opKill();
|
||||
|
||||
m_module.opLabel(labelEnd);
|
||||
}
|
||||
|
||||
// r0 in PS1 is the colour output register. Move r0 -> cO0 here.
|
||||
if (m_programInfo.majorVersion() == 1
|
||||
&& m_programInfo.type() == DxsoProgramTypes::PixelShader) {
|
||||
|
@ -173,9 +173,6 @@ namespace dxvk {
|
||||
// Shared State
|
||||
uint32_t sharedState = 0;
|
||||
|
||||
uint32_t killState = 0;
|
||||
uint32_t builtinLaneId = 0;
|
||||
|
||||
uint32_t flatShadingMask = 0;
|
||||
};
|
||||
|
||||
|
@ -14,18 +14,6 @@ namespace dxvk {
|
||||
const DxvkDeviceFeatures& devFeatures = device->features();
|
||||
const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt();
|
||||
|
||||
useDemoteToHelperInvocation
|
||||
= (devFeatures.vk13.shaderDemoteToHelperInvocation);
|
||||
|
||||
useSubgroupOpsForEarlyDiscard
|
||||
= (devInfo.vk11.subgroupSize >= 4)
|
||||
&& (devInfo.vk11.subgroupSupportedStages & VK_SHADER_STAGE_FRAGMENT_BIT)
|
||||
&& (devInfo.vk11.subgroupSupportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT);
|
||||
|
||||
// Disable early discard on Nvidia because it may hurt performance
|
||||
if (adapter->matchesDriver(VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0))
|
||||
useSubgroupOpsForEarlyDiscard = false;
|
||||
|
||||
// Apply shader-related options
|
||||
strictConstantCopies = options.strictConstantCopies;
|
||||
|
||||
|
@ -12,13 +12,6 @@ namespace dxvk {
|
||||
DxsoOptions();
|
||||
DxsoOptions(D3D9DeviceEx* pDevice, const D3D9Options& options);
|
||||
|
||||
/// Use a SPIR-V extension to implement D3D-style discards
|
||||
bool useDemoteToHelperInvocation = false;
|
||||
|
||||
/// Use subgroup operations to discard fragment
|
||||
/// shader invocations if derivatives remain valid.
|
||||
bool useSubgroupOpsForEarlyDiscard = false;
|
||||
|
||||
/// True: Copy our constant set into UBO if we are relative indexing ever.
|
||||
/// False: Copy our constant set into UBO if we are relative indexing at the start of a defined constant
|
||||
/// Why?: In theory, FXC should never generate code where this would be an issue.
|
||||
|
Loading…
x
Reference in New Issue
Block a user