1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-27 13:54:16 +01:00

[dxso] Explicitly emit long dot products

Potentially avoids vertex shader bugs if a game expects dot product and
a chain of mad instructions to return the same result.
This commit is contained in:
Philip Rebohle 2024-11-10 21:54:12 +01:00
parent 35a82b2ff3
commit 24adbb5d52

View File

@ -1375,7 +1375,7 @@ namespace dxvk {
DxsoRegisterValue DxsoCompiler::emitMulOperand( DxsoRegisterValue DxsoCompiler::emitMulOperand(
DxsoRegisterValue operand, DxsoRegisterValue operand,
DxsoRegisterValue other) { DxsoRegisterValue other) {
if (m_moduleInfo.options.d3d9FloatEmulation != D3D9FloatEmulation::Strict) if (m_moduleInfo.options.d3d9FloatEmulation != D3D9FloatEmulation::Strict || operand.id == other.id)
return operand; return operand;
uint32_t boolId = getVectorTypeId({ DxsoScalarType::Bool, other.type.ccount }); uint32_t boolId = getVectorTypeId({ DxsoScalarType::Bool, other.type.ccount });
@ -1419,10 +1419,20 @@ namespace dxvk {
auto bz = emitMulOperand(b, a); auto bz = emitMulOperand(b, a);
DxsoRegisterValue dot; DxsoRegisterValue dot;
dot.type = a.type; dot.type.ctype = a.type.ctype;
dot.type.ccount = 1; dot.type.ccount = 1;
dot.id = 0;
uint32_t componentType = getVectorTypeId(dot.type);
for (uint32_t i = 0; i < a.type.ccount; i++) {
uint32_t product = m_module.opFMul(componentType,
m_module.opCompositeExtract(componentType, az.id, 1, &i),
m_module.opCompositeExtract(componentType, bz.id, 1, &i));
dot.id = dot.id ? m_module.opFAdd(componentType, dot.id, product) : product;
}
dot.id = m_module.opDot(getVectorTypeId(dot.type), az.id, bz.id);
return dot; return dot;
} }
@ -2084,13 +2094,11 @@ namespace dxvk {
// Nrm is 3D... // Nrm is 3D...
DxsoRegMask srcMask(true, true, true, false); DxsoRegMask srcMask(true, true, true, false);
auto vec3 = emitRegisterLoad(src[0], srcMask); auto vec3 = emitRegisterLoad(src[0], srcMask);
auto dot = emitDot(vec3, vec3);
// No need for emitDot, either both arguments or none are zero.
// mul_zero has the same result as ieee mul.
uint32_t dot = m_module.opDot(scalarTypeId, vec3.id, vec3.id);
DxsoRegisterValue rcpLength; DxsoRegisterValue rcpLength;
rcpLength.type = scalarType; rcpLength.type = scalarType;
rcpLength.id = m_module.opInverseSqrt(scalarTypeId, dot); rcpLength.id = m_module.opInverseSqrt(scalarTypeId, dot.id);
if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) { if (m_moduleInfo.options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled) {
rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(FLT_MAX)); rcpLength.id = m_module.opNMin(scalarTypeId, rcpLength.id, m_module.constf32(FLT_MAX));
} }