mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-20 10:54:16 +01:00
[dxbc,d3d11] Add option to explicitly emit long dot products
This commit is contained in:
parent
5c987ea3d1
commit
4ee907a6df
11
dxvk.conf
11
dxvk.conf
@ -529,6 +529,17 @@
|
||||
# d3d11.longMad = False
|
||||
# d3d9.longMad = False
|
||||
|
||||
|
||||
# Long Dot
|
||||
#
|
||||
# Whether to emit dot products as an FMA chain or as a plain SPIR-V dot product.
|
||||
#
|
||||
# Supported values:
|
||||
# - True/False
|
||||
|
||||
# d3d11.longDot = False
|
||||
|
||||
|
||||
# Device Local Constant Buffers
|
||||
#
|
||||
# Enables using device local, host accessible memory for constant buffers in D3D9.
|
||||
|
@ -32,6 +32,7 @@ namespace dxvk {
|
||||
this->maxFrameLatency = config.getOption<int32_t>("dxgi.maxFrameLatency", 0);
|
||||
this->exposeDriverCommandLists = config.getOption<bool>("d3d11.exposeDriverCommandLists", true);
|
||||
this->longMad = config.getOption<bool>("d3d11.longMad", false);
|
||||
this->longDot = config.getOption<bool>("d3d11.longDot", false);
|
||||
this->reproducibleCommandStream = config.getOption<bool>("d3d11.reproducibleCommandStream", false);
|
||||
|
||||
// Clamp LOD bias so that people don't abuse this in unintended ways
|
||||
|
@ -118,9 +118,12 @@ namespace dxvk {
|
||||
/// Shader dump path
|
||||
std::string shaderDumpPath;
|
||||
|
||||
/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
|
||||
/// Translate Mad/Dfma to separate FMul+FAdd
|
||||
bool longMad;
|
||||
|
||||
/// Translate DpX to a precise FMul+FFma chain
|
||||
bool longDot;
|
||||
|
||||
/// Ensure that for the same D3D commands the output VK commands
|
||||
/// don't change between runs. Useful for comparative benchmarking,
|
||||
/// can negatively affect performance.
|
||||
|
@ -2044,15 +2044,41 @@ namespace dxvk {
|
||||
DxbcRegisterValue dst;
|
||||
dst.type.ctype = ins.dst[0].dataType;
|
||||
dst.type.ccount = 1;
|
||||
|
||||
dst.id = m_module.opDot(
|
||||
getVectorTypeId(dst.type),
|
||||
src.at(0).id,
|
||||
src.at(1).id);
|
||||
|
||||
if (ins.controls.precise() || m_precise)
|
||||
m_module.decorate(dst.id, spv::DecorationNoContraction);
|
||||
|
||||
dst.id = 0;
|
||||
|
||||
if (!m_moduleInfo.options.longDot) {
|
||||
dst.id = m_module.opDot(
|
||||
getVectorTypeId(dst.type),
|
||||
src.at(0).id,
|
||||
src.at(1).id);
|
||||
|
||||
if (ins.controls.precise() || m_precise)
|
||||
m_module.decorate(dst.id, spv::DecorationNoContraction);
|
||||
} else {
|
||||
uint32_t componentType = getVectorTypeId(dst.type);
|
||||
uint32_t componentCount = srcMask.popCount();
|
||||
|
||||
for (uint32_t i = 1; i <= componentCount; i++) {
|
||||
uint32_t idx = componentCount - i;
|
||||
|
||||
if (dst.id) {
|
||||
dst.id = m_module.opFFma(componentType,
|
||||
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
|
||||
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx),
|
||||
dst.id);
|
||||
} else {
|
||||
dst.id = m_module.opFMul(componentType,
|
||||
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
|
||||
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx));
|
||||
}
|
||||
|
||||
// Unconditionally mark as precise since the exact order of operation
|
||||
// matters for some games, even if the instruction itself is not marked
|
||||
// as precise.
|
||||
m_module.decorate(dst.id, spv::DecorationNoContraction);
|
||||
}
|
||||
}
|
||||
|
||||
dst = emitDstOperandModifiers(dst, ins.modifiers);
|
||||
emitRegisterStore(ins.dst[0], dst);
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ namespace dxvk {
|
||||
forceSampleRateShading = options.forceSampleRateShading;
|
||||
enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;
|
||||
longMad = options.longMad;
|
||||
longDot = options.longDot;
|
||||
|
||||
// Figure out float control flags to match D3D11 rules
|
||||
if (options.floatControls) {
|
||||
|
@ -55,8 +55,11 @@ namespace dxvk {
|
||||
/// Minimum storage buffer alignment
|
||||
VkDeviceSize minSsboAlignment = 0;
|
||||
|
||||
/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
|
||||
/// Translate Mad/Dfma to separate FMul+FAdd
|
||||
bool longMad;
|
||||
|
||||
/// Translate DpX to a precise FMul+FFma chain
|
||||
bool longDot;
|
||||
};
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user