[dxbc] Implement MSAD4 instruction

Apparently we're required to support this for Direct3D 11.2+. There are currently no known games that require this instruction.
2025-02-20 19:54:19 +01:00 · 2019-09-16 00:48:39 +02:00 · 2019-09-16 00:48:39 +02:00 · 26afaba410
commit 26afaba410
parent 72a356fe01
5 changed files with 46 additions and 2 deletions
--- a/src/d3d11/d3d11_device.cpp
+++ b/src/d3d11/d3d11_device.cpp
@ -1284,7 +1284,7 @@ namespace dxvk {
        info->MapNoOverwriteOnDynamicConstantBuffer   = TRUE;
        info->MapNoOverwriteOnDynamicBufferSRV        = TRUE;
        info->MultisampleRTVWithForcedSampleCountOne  = TRUE; /* not really */
-        info->SAD4ShaderInstructions                  = FALSE;
+        info->SAD4ShaderInstructions                  = TRUE;
        info->ExtendedDoublesShaderInstructions       = TRUE;
        info->ExtendedResourceSharing                 = TRUE; /* not really */
      } return S_OK;
--- a/src/dxbc/dxbc_compiler.cpp
+++ b/src/dxbc/dxbc_compiler.cpp
@ -175,6 +175,9 @@ namespace dxvk {
      case DxbcInstClass::VectorImul:
        return this->emitVectorImul(ins);
        
+      case DxbcInstClass::VectorMsad:
+        return this->emitVectorMsad(ins);
+        
      case DxbcInstClass::VectorShift:
        return this->emitVectorShift(ins);
        
@ -2110,6 +2113,38 @@ namespace dxvk {
  }
  
  
+  void DxbcCompiler::emitVectorMsad(const DxbcShaderInstruction& ins) {
+    // msad has four operands:
+    //    (dst0) Destination
+    //    (src0) Reference (packed uint8)
+    //    (src1) Source (packed uint8)
+    //    (src2) Accumulator
+    DxbcRegisterValue refReg = emitRegisterLoad(ins.src[0], ins.dst[0].mask);
+    DxbcRegisterValue srcReg = emitRegisterLoad(ins.src[1], ins.dst[0].mask);
+    DxbcRegisterValue result = emitRegisterLoad(ins.src[2], ins.dst[0].mask);
+    
+    auto typeId = getVectorTypeId(result.type);
+    auto bvecId = getVectorTypeId({ DxbcScalarType::Bool, result.type.ccount });
+
+    for (uint32_t i = 0; i < 4; i++) {
+      auto shift = m_module.constu32(8 * i);
+      auto count = m_module.constu32(8);
+
+      auto ref = m_module.opBitFieldUExtract(typeId, refReg.id, shift, count);
+      auto src = m_module.opBitFieldUExtract(typeId, srcReg.id, shift, count);
+
+      auto zero = emitBuildConstVecu32(0, 0, 0, 0, ins.dst[0].mask);
+      auto mask = m_module.opINotEqual(bvecId, ref, zero.id);
+
+      auto diff = m_module.opSAbs(typeId, m_module.opISub(typeId, ref, src));
+      result.id = m_module.opSelect(typeId, mask, m_module.opIAdd(typeId, result.id, diff), result.id);
+    }
+
+    result = emitDstOperandModifiers(result, ins.modifiers);
+    emitRegisterStore(ins.dst[0], result);
+  }
+
+
  void DxbcCompiler::emitVectorShift(const DxbcShaderInstruction& ins) {
    // Shift operations have three operands:
    //    (dst0) The destination register
--- a/src/dxbc/dxbc_compiler.h
+++ b/src/dxbc/dxbc_compiler.h
@ -658,6 +658,9 @@ namespace dxvk {
    void emitVectorImul(
      const DxbcShaderInstruction&  ins);
    
+    void emitVectorMsad(
+      const DxbcShaderInstruction&  ins);
+    
    void emitVectorShift(
      const DxbcShaderInstruction&  ins);
    
--- a/src/dxbc/dxbc_defs.cpp
+++ b/src/dxbc/dxbc_defs.cpp
@ -1075,7 +1075,12 @@ namespace dxvk {
      { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 },
    } },
    /* Msad                                 */
-    { },
+    { 4, DxbcInstClass::VectorMsad, {
+      { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 },
+      { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 },
+      { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 },
+      { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 },
+    } },
    /* DtoI                                 */
    { 2, DxbcInstClass::ConvertFloat64, {
      { DxbcOperandKind::DstReg, DxbcScalarType::Sint32  },
--- a/src/dxbc/dxbc_defs.h
+++ b/src/dxbc/dxbc_defs.h
@ -63,6 +63,7 @@ namespace dxvk {
    VectorDot,          ///< Dot product instruction
    VectorIdiv,         ///< Component-wise integer division
    VectorImul,         ///< Component-wise integer multiplication
+    VectorMsad,         ///< Component-wise sum of absolute difference
    VectorShift,        ///< Bit shift operations on vectors
    VectorSinCos,       ///< Sine and Cosine instruction
    Undefined,          ///< Instruction code not defined