1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-20 10:54:16 +01:00

[dxbc,d3d11] Add option to explicitly emit long dot products

This commit is contained in:
Philip Rebohle 2024-08-08 16:03:16 +02:00 committed by Philip Rebohle
parent 5c987ea3d1
commit 4ee907a6df
6 changed files with 56 additions and 11 deletions

View File

@ -529,6 +529,17 @@
# d3d11.longMad = False
# d3d9.longMad = False
# Long Dot
#
# Whether to emit dot products as an FMA chain or as a plain SPIR-V dot product.
#
# Supported values:
# - True/False
# d3d11.longDot = False
# Device Local Constant Buffers
#
# Enables using device local, host accessible memory for constant buffers in D3D9.

View File

@ -32,6 +32,7 @@ namespace dxvk {
this->maxFrameLatency = config.getOption<int32_t>("dxgi.maxFrameLatency", 0);
this->exposeDriverCommandLists = config.getOption<bool>("d3d11.exposeDriverCommandLists", true);
this->longMad = config.getOption<bool>("d3d11.longMad", false);
this->longDot = config.getOption<bool>("d3d11.longDot", false);
this->reproducibleCommandStream = config.getOption<bool>("d3d11.reproducibleCommandStream", false);
// Clamp LOD bias so that people don't abuse this in unintended ways

View File

@ -118,9 +118,12 @@ namespace dxvk {
/// Shader dump path
std::string shaderDumpPath;
/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
/// Translate Mad/Dfma to separate FMul+FAdd
bool longMad;
/// Translate DpX to a precise FMul+FFma chain
bool longDot;
/// Ensure that for the same D3D commands the output VK commands
/// don't change between runs. Useful for comparative benchmarking,
/// can negatively affect performance.

View File

@ -2044,15 +2044,41 @@ namespace dxvk {
DxbcRegisterValue dst;
dst.type.ctype = ins.dst[0].dataType;
dst.type.ccount = 1;
dst.id = m_module.opDot(
getVectorTypeId(dst.type),
src.at(0).id,
src.at(1).id);
if (ins.controls.precise() || m_precise)
m_module.decorate(dst.id, spv::DecorationNoContraction);
dst.id = 0;
if (!m_moduleInfo.options.longDot) {
dst.id = m_module.opDot(
getVectorTypeId(dst.type),
src.at(0).id,
src.at(1).id);
if (ins.controls.precise() || m_precise)
m_module.decorate(dst.id, spv::DecorationNoContraction);
} else {
uint32_t componentType = getVectorTypeId(dst.type);
uint32_t componentCount = srcMask.popCount();
for (uint32_t i = 1; i <= componentCount; i++) {
uint32_t idx = componentCount - i;
if (dst.id) {
dst.id = m_module.opFFma(componentType,
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx),
dst.id);
} else {
dst.id = m_module.opFMul(componentType,
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx));
}
// Unconditionally mark as precise since the exact order of operation
// matters for some games, even if the instruction itself is not marked
// as precise.
m_module.decorate(dst.id, spv::DecorationNoContraction);
}
}
dst = emitDstOperandModifiers(dst, ins.modifiers);
emitRegisterStore(ins.dst[0], dst);
}

View File

@ -39,6 +39,7 @@ namespace dxvk {
forceSampleRateShading = options.forceSampleRateShading;
enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;
longMad = options.longMad;
longDot = options.longDot;
// Figure out float control flags to match D3D11 rules
if (options.floatControls) {

View File

@ -55,8 +55,11 @@ namespace dxvk {
/// Minimum storage buffer alignment
VkDeviceSize minSsboAlignment = 0;
/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
/// Translate Mad/Dfma to separate FMul+FAdd
bool longMad;
/// Translate DpX to a precise FMul+FFma chain
bool longDot;
};
}