diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp index 48af0047d..18922b25d 100644 --- a/src/d3d11/d3d11_device.cpp +++ b/src/d3d11/d3d11_device.cpp @@ -1284,7 +1284,7 @@ namespace dxvk { info->MapNoOverwriteOnDynamicConstantBuffer = TRUE; info->MapNoOverwriteOnDynamicBufferSRV = TRUE; info->MultisampleRTVWithForcedSampleCountOne = TRUE; /* not really */ - info->SAD4ShaderInstructions = FALSE; + info->SAD4ShaderInstructions = TRUE; info->ExtendedDoublesShaderInstructions = TRUE; info->ExtendedResourceSharing = TRUE; /* not really */ } return S_OK; diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 59eea0b92..ffd0c0ad3 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -175,6 +175,9 @@ namespace dxvk { case DxbcInstClass::VectorImul: return this->emitVectorImul(ins); + case DxbcInstClass::VectorMsad: + return this->emitVectorMsad(ins); + case DxbcInstClass::VectorShift: return this->emitVectorShift(ins); @@ -2110,6 +2113,38 @@ namespace dxvk { } + void DxbcCompiler::emitVectorMsad(const DxbcShaderInstruction& ins) { + // msad has four operands: + // (dst0) Destination + // (src0) Reference (packed uint8) + // (src1) Source (packed uint8) + // (src2) Accumulator + DxbcRegisterValue refReg = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + DxbcRegisterValue srcReg = emitRegisterLoad(ins.src[1], ins.dst[0].mask); + DxbcRegisterValue result = emitRegisterLoad(ins.src[2], ins.dst[0].mask); + + auto typeId = getVectorTypeId(result.type); + auto bvecId = getVectorTypeId({ DxbcScalarType::Bool, result.type.ccount }); + + for (uint32_t i = 0; i < 4; i++) { + auto shift = m_module.constu32(8 * i); + auto count = m_module.constu32(8); + + auto ref = m_module.opBitFieldUExtract(typeId, refReg.id, shift, count); + auto src = m_module.opBitFieldUExtract(typeId, srcReg.id, shift, count); + + auto zero = emitBuildConstVecu32(0, 0, 0, 0, ins.dst[0].mask); + auto mask = m_module.opINotEqual(bvecId, ref, zero.id); + + auto diff = m_module.opSAbs(typeId, m_module.opISub(typeId, ref, src)); + result.id = m_module.opSelect(typeId, mask, m_module.opIAdd(typeId, result.id, diff), result.id); + } + + result = emitDstOperandModifiers(result, ins.modifiers); + emitRegisterStore(ins.dst[0], result); + } + + void DxbcCompiler::emitVectorShift(const DxbcShaderInstruction& ins) { // Shift operations have three operands: // (dst0) The destination register diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index 8dd1913ed..e7a6eaebc 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -658,6 +658,9 @@ namespace dxvk { void emitVectorImul( const DxbcShaderInstruction& ins); + void emitVectorMsad( + const DxbcShaderInstruction& ins); + void emitVectorShift( const DxbcShaderInstruction& ins); diff --git a/src/dxbc/dxbc_defs.cpp b/src/dxbc/dxbc_defs.cpp index f560bf5dd..72c40e9f2 100644 --- a/src/dxbc/dxbc_defs.cpp +++ b/src/dxbc/dxbc_defs.cpp @@ -1075,7 +1075,12 @@ namespace dxvk { { DxbcOperandKind::SrcReg, DxbcScalarType::Float64 }, } }, /* Msad */ - { }, + { 4, DxbcInstClass::VectorMsad, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, /* DtoI */ { 2, DxbcInstClass::ConvertFloat64, { { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, diff --git a/src/dxbc/dxbc_defs.h b/src/dxbc/dxbc_defs.h index 35c9118e2..decd76707 100644 --- a/src/dxbc/dxbc_defs.h +++ b/src/dxbc/dxbc_defs.h @@ -63,6 +63,7 @@ namespace dxvk { VectorDot, ///< Dot product instruction VectorIdiv, ///< Component-wise integer division VectorImul, ///< Component-wise integer multiplication + VectorMsad, ///< Component-wise sum of absolute difference VectorShift, ///< Bit shift operations on vectors VectorSinCos, ///< Sine and Cosine instruction Undefined, ///< Instruction code not defined