1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2024-12-02 10:24:12 +01:00

[dxbc] Only use atomic append/consume optimization in compute shaders

- For fragment shaders, this isn't safe since ballots include helper invocations
- For vertex shaders, if drivers don't support subgroup operations in those
  stages, we don't want it to affect the performance of compute shadres.
This commit is contained in:
Philip Rebohle 2019-07-26 14:07:39 +02:00
parent dfe2922136
commit 6ab074c95b
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
2 changed files with 8 additions and 5 deletions

View File

@ -2411,6 +2411,11 @@ namespace dxvk {
m_module.opLabel(cond.labelIf); m_module.opLabel(cond.labelIf);
// Only use subgroup ops on compute to avoid having to
// deal with helper invocations or hardware limitations
bool useSubgroupOps = m_moduleInfo.options.useSubgroupOpsForAtomicCounters
&& m_programInfo.type() == DxbcProgramType::ComputeShader;
// In case we have subgroup ops enabled, we need to // In case we have subgroup ops enabled, we need to
// count the number of active lanes, the lane index, // count the number of active lanes, the lane index,
// and we need to perform the atomic op conditionally // and we need to perform the atomic op conditionally
@ -2419,7 +2424,7 @@ namespace dxvk {
DxbcConditional elect; DxbcConditional elect;
if (m_moduleInfo.options.useSubgroupOpsForAtomicCounters) { if (useSubgroupOps) {
m_module.enableCapability(spv::CapabilityGroupNonUniform); m_module.enableCapability(spv::CapabilityGroupNonUniform);
m_module.enableCapability(spv::CapabilityGroupNonUniformBallot); m_module.enableCapability(spv::CapabilityGroupNonUniformBallot);
@ -2504,7 +2509,7 @@ namespace dxvk {
// If we're using subgroup ops, we have to broadcast // If we're using subgroup ops, we have to broadcast
// the result of the atomic op and compute the index // the result of the atomic op and compute the index
if (m_moduleInfo.options.useSubgroupOpsForAtomicCounters) { if (useSubgroupOps) {
m_module.opBranch(elect.labelEnd); m_module.opBranch(elect.labelEnd);
m_module.opLabel (elect.labelEnd); m_module.opLabel (elect.labelEnd);

View File

@ -15,14 +15,12 @@ namespace dxvk {
const DxvkDeviceFeatures& devFeatures = device->features(); const DxvkDeviceFeatures& devFeatures = device->features();
const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt(); const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt();
const VkShaderStageFlags allShaderStages = device->getShaderPipelineStages();
useDepthClipWorkaround useDepthClipWorkaround
= !devFeatures.extDepthClipEnable.depthClipEnable; = !devFeatures.extDepthClipEnable.depthClipEnable;
useStorageImageReadWithoutFormat useStorageImageReadWithoutFormat
= devFeatures.core.features.shaderStorageImageReadWithoutFormat; = devFeatures.core.features.shaderStorageImageReadWithoutFormat;
useSubgroupOpsForAtomicCounters useSubgroupOpsForAtomicCounters
= (devInfo.coreSubgroup.supportedStages & allShaderStages) == allShaderStages = (devInfo.coreSubgroup.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT)
&& (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT); && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT);
useDemoteToHelperInvocation useDemoteToHelperInvocation
= (devFeatures.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation); = (devFeatures.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation);