diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 3bfcdd571..051d20c1a 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -3898,16 +3898,20 @@ namespace dxvk { m_module.opStore(m_ps.killState, killState); if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) { - uint32_t killSubgroup = m_module.opGroupNonUniformLogicalAnd( - m_module.defBoolType(), + uint32_t ballot = m_module.opGroupNonUniformBallot( + getVectorTypeId({ DxbcScalarType::Uint32, 4 }), m_module.constu32(spv::ScopeSubgroup), - m_moduleInfo.options.useSubgroupOpsClustered - ? spv::GroupOperationClusteredReduce - : spv::GroupOperationReduce, - killState, - m_moduleInfo.options.useSubgroupOpsClustered - ? m_module.constu32(4) - : 0); + killState); + + uint32_t invocationMask = m_module.opLoad( + getVectorTypeId({ DxbcScalarType::Uint32, 4 }), + m_ps.invocationMask); + + uint32_t killSubgroup = m_module.opAll( + m_module.defBoolType(), + m_module.opIEqual( + m_module.defVectorType(m_module.defBoolType(), 4), + ballot, invocationMask)); DxbcConditional cond; cond.labelIf = m_module.allocateId(); @@ -6297,6 +6301,17 @@ namespace dxvk { spv::BuiltInCullDistance, spv::StorageClassInput); + // Main function of the pixel shader + m_ps.functionId = m_module.allocateId(); + m_module.setDebugName(m_ps.functionId, "ps_main"); + + this->emitFunctionBegin( + m_ps.functionId, + m_module.defVoidType(), + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr)); + this->emitFunctionLabel(); + // We may have to defer kill operations to the end of // the shader in order to keep derivatives correct. if (m_analysis->usesKill && m_analysis->usesDerivatives) { @@ -6308,23 +6323,22 @@ namespace dxvk { if (m_moduleInfo.options.useSubgroupOpsForEarlyDiscard) { m_module.enableCapability(spv::CapabilityGroupNonUniform); - m_module.enableCapability(spv::CapabilityGroupNonUniformArithmetic); + m_module.enableCapability(spv::CapabilityGroupNonUniformBallot); - if (m_moduleInfo.options.useSubgroupOpsClustered) - m_module.enableCapability(spv::CapabilityGroupNonUniformClustered); + DxbcRegisterInfo invocationMask; + invocationMask.type = { DxbcScalarType::Uint32, 4, 0 }; + invocationMask.sclass = spv::StorageClassFunction; + + m_ps.invocationMask = emitNewVariable(invocationMask); + m_module.setDebugName(m_ps.invocationMask, "fInvocationMask"); + + m_module.opStore(m_ps.invocationMask, + m_module.opGroupNonUniformBallot( + getVectorTypeId({ DxbcScalarType::Uint32, 4 }), + m_module.constu32(spv::ScopeSubgroup), + m_module.constBool(true))); } } - - // Main function of the pixel shader - m_ps.functionId = m_module.allocateId(); - m_module.setDebugName(m_ps.functionId, "ps_main"); - - this->emitFunctionBegin( - m_ps.functionId, - m_module.defVoidType(), - m_module.defFunctionType( - m_module.defVoidType(), 0, nullptr)); - this->emitFunctionLabel(); } diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index bc791b526..edf587d42 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -178,6 +178,7 @@ namespace dxvk { uint32_t builtinLayer = 0; uint32_t builtinViewportId = 0; + uint32_t invocationMask = 0; uint32_t killState = 0; }; diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp index 040d59ced..ebc6f4e4f 100644 --- a/src/dxbc/dxbc_options.cpp +++ b/src/dxbc/dxbc_options.cpp @@ -18,9 +18,7 @@ namespace dxvk { useSubgroupOpsForEarlyDiscard = (devInfo.coreSubgroup.subgroupSize >= 4) && (devInfo.coreSubgroup.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) - && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT); - useSubgroupOpsClustered = useSubgroupOpsForEarlyDiscard - && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT); + && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT); zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory; @@ -29,10 +27,8 @@ namespace dxvk { auto vendor = DxvkGpuVendor(devInfo.core.properties.vendorID); if (vendor == DxvkGpuVendor::Amd - || vendor == DxvkGpuVendor::Nvidia) { + || vendor == DxvkGpuVendor::Nvidia) useSubgroupOpsForEarlyDiscard = false; - useSubgroupOpsClustered = false; - } } } \ No newline at end of file diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h index 54b9c0a02..8d85d5ede 100644 --- a/src/dxbc/dxbc_options.h +++ b/src/dxbc/dxbc_options.h @@ -17,9 +17,6 @@ namespace dxvk { /// shader invocations if derivatives remain valid. bool useSubgroupOpsForEarlyDiscard = false; - /// Use clustered subgroup operations - bool useSubgroupOpsClustered = false; - /// Clear thread-group shared memory to zero bool zeroInitWorkgroupMemory = false; }; diff --git a/tests/dxbc/test_dxbc_compiler.cpp b/tests/dxbc/test_dxbc_compiler.cpp index b6f79b33d..16ab707dc 100644 --- a/tests/dxbc/test_dxbc_compiler.cpp +++ b/tests/dxbc/test_dxbc_compiler.cpp @@ -43,7 +43,6 @@ int WINAPI WinMain(HINSTANCE hInstance, DxbcModuleInfo moduleInfo; moduleInfo.options.useSubgroupOpsForEarlyDiscard = true; - moduleInfo.options.useSubgroupOpsClustered = true; moduleInfo.xfb = nullptr; Rc shader = module.compile(moduleInfo, ifileName);