[dxbc] Implemented new workaround for depth-compare ops on Nvidia

2025-03-14 04:29:15 +01:00 · 2018-03-01 10:11:15 +01:00 · 2018-03-01 10:11:15 +01:00 · 0e9b7d7ccd
commit 0e9b7d7ccd
parent c21ebd72ef
4 changed files with 38 additions and 2 deletions
--- a/src/dxbc/dxbc_compiler.cpp
+++ b/src/dxbc/dxbc_compiler.cpp
@ -2683,6 +2683,9 @@ namespace dxvk {
          DxbcRegMask(true, false, false, false))
      : DxbcRegisterValue();
    
+    if (isDepthCompare && m_options.addExtraDrefCoordComponent && coord.type.ccount < 4)
+      coord = emitRegisterConcat(coord, referenceValue);
+    
    // Determine the sampled image type based on the opcode.
    const uint32_t sampledImageType = isDepthCompare
      ? m_module.defSampledImageType(m_textures.at(textureId).depthTypeId)
@ -2797,6 +2800,9 @@ namespace dxvk {
      ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
      : DxbcRegisterValue();
    
+    if (isDepthCompare && m_options.addExtraDrefCoordComponent && coord.type.ccount < 4)
+      coord = emitRegisterConcat(coord, referenceValue);
+    
    // Load explicit gradients for sample operations that require them
    const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD;
    
@ -3583,16 +3589,32 @@ namespace dxvk {
  }
  
  
+  DxbcRegisterValue DxbcCompiler::emitRegisterConcat(
+          DxbcRegisterValue       value1,
+          DxbcRegisterValue       value2) {
+    std::array<uint32_t, 2> ids =
+      {{ value1.id, value2.id }};
+    
+    DxbcRegisterValue result;
+    result.type.ctype  = value1.type.ctype;
+    result.type.ccount = value1.type.ccount + value2.type.ccount;
+    result.id = m_module.opCompositeConstruct(
+      getVectorTypeId(result.type),
+      ids.size(), ids.data());
+    return result;
+  }
+  
+  
  DxbcRegisterValue DxbcCompiler::emitRegisterExtend(
          DxbcRegisterValue       value,
          uint32_t                size) {
    if (size == 1)
      return value;
    
-    std::array<uint32_t, 4> ids = {
+    std::array<uint32_t, 4> ids = {{
      value.id, value.id,
      value.id, value.id, 
-    };
+    }};
    
    DxbcRegisterValue result;
    result.type.ctype  = value.type.ctype;
--- a/src/dxbc/dxbc_compiler.h
+++ b/src/dxbc/dxbc_compiler.h
@ -652,6 +652,10 @@ namespace dxvk {
            DxbcRegisterValue       srcValue,
            DxbcRegMask             srcMask);
    
+    DxbcRegisterValue emitRegisterConcat(
+            DxbcRegisterValue       value1,
+            DxbcRegisterValue       value2);
+    
    DxbcRegisterValue emitRegisterExtend(
            DxbcRegisterValue       value,
            uint32_t                size);
--- a/src/dxbc/dxbc_options.cpp
+++ b/src/dxbc/dxbc_options.cpp
@ -10,11 +10,18 @@ namespace dxvk {
      = static_cast<DxvkGpuVendor>(deviceProps.vendorID);
    
    if (vendor == DxvkGpuVendor::Nvidia) {
+      // The driver expects the coordinate
+      // vector to have an extra component
+      this->addExtraDrefCoordComponent = true;
+      
      // From vkd3d: NMin/NMax/NClamp crash the driver.
      this->useSimpleMinMaxClamp = true;
    }
    
    // Inform the user about which workarounds are enabled
+    if (this->addExtraDrefCoordComponent)
+      Logger::warn("DxbcOptions: Growing coordinate vector for Dref operations");
+    
    if (this->useSimpleMinMaxClamp)
      Logger::warn("DxbcOptions: Using FMin/FMax/FClamp instead of NMin/NMax/NClamp");
  }
--- a/src/dxbc/dxbc_options.h
+++ b/src/dxbc/dxbc_options.h
@ -15,6 +15,9 @@ namespace dxvk {
    DxbcOptions(
      const Rc<DxvkDevice>& device);
      
+    /// Add extra component to dref coordinate vector
+    bool addExtraDrefCoordComponent = false;
+      
    /// Use Fmin/Fmax instead of Nmin/Nmax.
    bool useSimpleMinMaxClamp = false;
  };