diff --git a/dxvk.conf b/dxvk.conf index 453024feb..f97f8512f 100644 --- a/dxvk.conf +++ b/dxvk.conf @@ -529,6 +529,17 @@ # d3d11.longMad = False # d3d9.longMad = False + +# Long Dot +# +# Whether to emit dot products as an FMA chain or as a plain SPIR-V dot product. +# +# Supported values: +# - True/False + +# d3d11.longDot = False + + # Device Local Constant Buffers # # Enables using device local, host accessible memory for constant buffers in D3D9. diff --git a/src/d3d11/d3d11_options.cpp b/src/d3d11/d3d11_options.cpp index 835ce985b..5d0f1bd2a 100644 --- a/src/d3d11/d3d11_options.cpp +++ b/src/d3d11/d3d11_options.cpp @@ -32,6 +32,7 @@ namespace dxvk { this->maxFrameLatency = config.getOption("dxgi.maxFrameLatency", 0); this->exposeDriverCommandLists = config.getOption("d3d11.exposeDriverCommandLists", true); this->longMad = config.getOption("d3d11.longMad", false); + this->longDot = config.getOption("d3d11.longDot", false); this->reproducibleCommandStream = config.getOption("d3d11.reproducibleCommandStream", false); // Clamp LOD bias so that people don't abuse this in unintended ways diff --git a/src/d3d11/d3d11_options.h b/src/d3d11/d3d11_options.h index e556a89d8..f5e756586 100644 --- a/src/d3d11/d3d11_options.h +++ b/src/d3d11/d3d11_options.h @@ -118,9 +118,12 @@ namespace dxvk { /// Shader dump path std::string shaderDumpPath; - /// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd? + /// Translate Mad/Dfma to separate FMul+FAdd bool longMad; + /// Translate DpX to a precise FMul+FFma chain + bool longDot; + /// Ensure that for the same D3D commands the output VK commands /// don't change between runs. Useful for comparative benchmarking, /// can negatively affect performance. diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 9975c87d5..81620680c 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -2044,15 +2044,41 @@ namespace dxvk { DxbcRegisterValue dst; dst.type.ctype = ins.dst[0].dataType; dst.type.ccount = 1; - - dst.id = m_module.opDot( - getVectorTypeId(dst.type), - src.at(0).id, - src.at(1).id); - - if (ins.controls.precise() || m_precise) - m_module.decorate(dst.id, spv::DecorationNoContraction); - + dst.id = 0; + + if (!m_moduleInfo.options.longDot) { + dst.id = m_module.opDot( + getVectorTypeId(dst.type), + src.at(0).id, + src.at(1).id); + + if (ins.controls.precise() || m_precise) + m_module.decorate(dst.id, spv::DecorationNoContraction); + } else { + uint32_t componentType = getVectorTypeId(dst.type); + uint32_t componentCount = srcMask.popCount(); + + for (uint32_t i = 1; i <= componentCount; i++) { + uint32_t idx = componentCount - i; + + if (dst.id) { + dst.id = m_module.opFFma(componentType, + m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx), + m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx), + dst.id); + } else { + dst.id = m_module.opFMul(componentType, + m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx), + m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx)); + } + + // Unconditionally mark as precise since the exact order of operation + // matters for some games, even if the instruction itself is not marked + // as precise. + m_module.decorate(dst.id, spv::DecorationNoContraction); + } + } + dst = emitDstOperandModifiers(dst, ins.modifiers); emitRegisterStore(ins.dst[0], dst); } diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp index 3d8d86c2c..9730366a0 100644 --- a/src/dxbc/dxbc_options.cpp +++ b/src/dxbc/dxbc_options.cpp @@ -39,6 +39,7 @@ namespace dxvk { forceSampleRateShading = options.forceSampleRateShading; enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock; longMad = options.longMad; + longDot = options.longDot; // Figure out float control flags to match D3D11 rules if (options.floatControls) { diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h index 4b21f2f88..12b3bb939 100644 --- a/src/dxbc/dxbc_options.h +++ b/src/dxbc/dxbc_options.h @@ -55,8 +55,11 @@ namespace dxvk { /// Minimum storage buffer alignment VkDeviceSize minSsboAlignment = 0; - /// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd? + /// Translate Mad/Dfma to separate FMul+FAdd bool longMad; + + /// Translate DpX to a precise FMul+FFma chain + bool longDot; }; } \ No newline at end of file