[dxbc,d3d11] Add option to explicitly emit long dot products

2025-04-03 16:25:19 +02:00 · 2024-08-08 16:03:16 +02:00 · 2024-08-08 16:03:16 +02:00 · 4ee907a6df
commit 4ee907a6df
parent 5c987ea3d1
6 changed files with 56 additions and 11 deletions
--- a/dxvk.conf
+++ b/dxvk.conf
@ -529,6 +529,17 @@
 # d3d11.longMad = False
 # d3d9.longMad = False

+
+# Long Dot
+#
+# Whether to emit dot products as an FMA chain or as a plain SPIR-V dot product.
+#
+# Supported values:
+# - True/False
+
+# d3d11.longDot = False
+
+
 # Device Local Constant Buffers
 #
 # Enables using device local, host accessible memory for constant buffers in D3D9.
--- a/src/d3d11/d3d11_options.cpp
+++ b/src/d3d11/d3d11_options.cpp
@ -32,6 +32,7 @@ namespace dxvk {
    this->maxFrameLatency       = config.getOption<int32_t>("dxgi.maxFrameLatency", 0);
    this->exposeDriverCommandLists = config.getOption<bool>("d3d11.exposeDriverCommandLists", true);
    this->longMad               = config.getOption<bool>("d3d11.longMad", false);
+    this->longDot               = config.getOption<bool>("d3d11.longDot", false);
    this->reproducibleCommandStream = config.getOption<bool>("d3d11.reproducibleCommandStream", false);

    // Clamp LOD bias so that people don't abuse this in unintended ways
--- a/src/d3d11/d3d11_options.h
+++ b/src/d3d11/d3d11_options.h
@ -118,9 +118,12 @@ namespace dxvk {
    /// Shader dump path
    std::string shaderDumpPath;

-    /// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
+    /// Translate Mad/Dfma to separate FMul+FAdd
    bool longMad;

+    /// Translate DpX to a precise FMul+FFma chain
+    bool longDot;
+
    /// Ensure that for the same D3D commands the output VK commands
    /// don't change between runs. Useful for comparative benchmarking,
    /// can negatively affect performance.
--- a/src/dxbc/dxbc_compiler.cpp
+++ b/src/dxbc/dxbc_compiler.cpp
@ -2044,15 +2044,41 @@ namespace dxvk {
    DxbcRegisterValue dst;
    dst.type.ctype  = ins.dst[0].dataType;
    dst.type.ccount = 1;
-    
-    dst.id = m_module.opDot(
-      getVectorTypeId(dst.type),
-      src.at(0).id,
-      src.at(1).id);
-    
-    if (ins.controls.precise() || m_precise)
-      m_module.decorate(dst.id, spv::DecorationNoContraction);
-    
+    dst.id = 0;
+
+    if (!m_moduleInfo.options.longDot) {
+      dst.id = m_module.opDot(
+        getVectorTypeId(dst.type),
+        src.at(0).id,
+        src.at(1).id);
+
+      if (ins.controls.precise() || m_precise)
+        m_module.decorate(dst.id, spv::DecorationNoContraction);
+    } else {
+      uint32_t componentType = getVectorTypeId(dst.type);
+      uint32_t componentCount = srcMask.popCount();
+
+      for (uint32_t i = 1; i <= componentCount; i++) {
+        uint32_t idx = componentCount - i;
+
+        if (dst.id) {
+          dst.id = m_module.opFFma(componentType,
+            m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
+            m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx),
+            dst.id);
+        } else {
+          dst.id = m_module.opFMul(componentType,
+            m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
+            m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx));
+        }
+
+        // Unconditionally mark as precise since the exact order of operation
+        // matters for some games, even if the instruction itself is not marked
+        // as precise.
+        m_module.decorate(dst.id, spv::DecorationNoContraction);
+      }
+    }
+
    dst = emitDstOperandModifiers(dst, ins.modifiers);
    emitRegisterStore(ins.dst[0], dst);
  }
--- a/src/dxbc/dxbc_options.cpp
+++ b/src/dxbc/dxbc_options.cpp
@ -39,6 +39,7 @@ namespace dxvk {
    forceSampleRateShading   = options.forceSampleRateShading;
    enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;
    longMad                  = options.longMad;
+    longDot                  = options.longDot;

    // Figure out float control flags to match D3D11 rules
    if (options.floatControls) {
--- a/src/dxbc/dxbc_options.h
+++ b/src/dxbc/dxbc_options.h
@ -55,8 +55,11 @@ namespace dxvk {
    /// Minimum storage buffer alignment
    VkDeviceSize minSsboAlignment = 0;

-    /// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
+    /// Translate Mad/Dfma to separate FMul+FAdd
    bool longMad;
+
+    /// Translate DpX to a precise FMul+FFma chain
+    bool longDot;
  };
  
 }