[dxbc] Only use atomic append/consume optimization in compute shaders

- For fragment shaders, this isn't safe since ballots include helper invocations - For vertex shaders, if drivers don't support subgroup operations in those stages, we don't want it to affect the performance of compute shadres.
2024-12-02 10:24:12 +01:00 · 2019-07-26 14:07:39 +02:00 · 2019-07-26 14:07:39 +02:00 · 6ab074c95b
commit 6ab074c95b
parent dfe2922136
2 changed files with 8 additions and 5 deletions
--- a/src/dxbc/dxbc_compiler.cpp
+++ b/src/dxbc/dxbc_compiler.cpp
@ -2411,6 +2411,11 @@ namespace dxvk {
    m_module.opLabel(cond.labelIf);
    // Only use subgroup ops on compute to avoid having to
    // deal with helper invocations or hardware limitations
    bool useSubgroupOps = m_moduleInfo.options.useSubgroupOpsForAtomicCounters
      && m_programInfo.type() == DxbcProgramType::ComputeShader;
    // In case we have subgroup ops enabled, we need to
    // count the number of active lanes, the lane index,
    // and we need to perform the atomic op conditionally
@ -2419,7 +2424,7 @@ namespace dxvk {
    DxbcConditional elect;
-    if (m_moduleInfo.options.useSubgroupOpsForAtomicCounters) {
+    if (useSubgroupOps) {
      m_module.enableCapability(spv::CapabilityGroupNonUniform);
      m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
@ -2504,7 +2509,7 @@ namespace dxvk {
    // If we're using subgroup ops, we have to broadcast
    // the result of the atomic op and compute the index
-    if (m_moduleInfo.options.useSubgroupOpsForAtomicCounters) {
+    if (useSubgroupOps) {
      m_module.opBranch(elect.labelEnd);
      m_module.opLabel (elect.labelEnd);
--- a/src/dxbc/dxbc_options.cpp
+++ b/src/dxbc/dxbc_options.cpp
@ -15,14 +15,12 @@ namespace dxvk {
    const DxvkDeviceFeatures& devFeatures = device->features();
    const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt();
    const VkShaderStageFlags allShaderStages = device->getShaderPipelineStages();
    useDepthClipWorkaround
      = !devFeatures.extDepthClipEnable.depthClipEnable;
    useStorageImageReadWithoutFormat
      = devFeatures.core.features.shaderStorageImageReadWithoutFormat;
    useSubgroupOpsForAtomicCounters
-      = (devInfo.coreSubgroup.supportedStages     & allShaderStages) == allShaderStages
+      = (devInfo.coreSubgroup.supportedStages     & VK_SHADER_STAGE_COMPUTE_BIT)
     && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT);
    useDemoteToHelperInvocation
      = (devFeatures.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation);