mirror of
https://github.com/doitsujin/dxvk.git
synced 2024-11-30 04:24:11 +01:00
[dxbc] Implemented vendor-specific workarounds in an attemt to fix Nvidia
This commit is contained in:
parent
4e06f498dd
commit
f4cd90d6fa
@ -23,7 +23,8 @@ namespace dxvk {
|
||||
m_featureLevel (featureLevel),
|
||||
m_featureFlags (featureFlags),
|
||||
m_dxvkDevice (m_dxgiDevice->GetDXVKDevice()),
|
||||
m_dxvkAdapter (m_dxvkDevice->adapter()) {
|
||||
m_dxvkAdapter (m_dxvkDevice->adapter()),
|
||||
m_dxbcOptions (m_dxvkDevice) {
|
||||
Com<IDXGIAdapter> adapter;
|
||||
|
||||
if (FAILED(m_dxgiDevice->GetAdapter(&adapter))
|
||||
@ -1356,7 +1357,7 @@ namespace dxvk {
|
||||
|
||||
try {
|
||||
*pShaderModule = D3D11ShaderModule(
|
||||
this, pShaderBytecode, BytecodeLength);
|
||||
&m_dxbcOptions, this, pShaderBytecode, BytecodeLength);
|
||||
return S_OK;
|
||||
} catch (const DxvkError& e) {
|
||||
Logger::err(e.message());
|
||||
|
@ -1,13 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include "../dxbc/dxbc_options.h"
|
||||
|
||||
#include "../dxgi/dxgi_object.h"
|
||||
|
||||
#include "../util/com/com_private_data.h"
|
||||
|
||||
#include "d3d11_interfaces.h"
|
||||
#include "d3d11_state.h"
|
||||
#include "d3d11_util.h"
|
||||
|
||||
#include "../util/com/com_private_data.h"
|
||||
|
||||
namespace dxvk {
|
||||
class DxgiAdapter;
|
||||
|
||||
@ -255,6 +257,8 @@ namespace dxvk {
|
||||
const Rc<DxvkDevice> m_dxvkDevice;
|
||||
const Rc<DxvkAdapter> m_dxvkAdapter;
|
||||
|
||||
const DxbcOptions m_dxbcOptions;
|
||||
|
||||
D3D11DeviceContext* m_context = nullptr;
|
||||
|
||||
std::mutex m_resourceInitMutex;
|
||||
|
@ -8,6 +8,7 @@ namespace dxvk {
|
||||
|
||||
|
||||
D3D11ShaderModule::D3D11ShaderModule(
|
||||
const DxbcOptions* pDxbcOptions,
|
||||
D3D11Device* pDevice,
|
||||
const void* pShaderBytecode,
|
||||
size_t BytecodeLength) {
|
||||
@ -33,7 +34,7 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
m_shader = module.compile();
|
||||
m_shader = module.compile(*pDxbcOptions);
|
||||
|
||||
if (dumpPath.size() != 0) {
|
||||
const std::string baseName = str::format(dumpPath, "/",
|
||||
|
@ -25,6 +25,7 @@ namespace dxvk {
|
||||
|
||||
D3D11ShaderModule();
|
||||
D3D11ShaderModule(
|
||||
const DxbcOptions* pDxbcOptions,
|
||||
D3D11Device* pDevice,
|
||||
const void* pShaderBytecode,
|
||||
size_t BytecodeLength);
|
||||
|
@ -7,10 +7,12 @@ namespace dxvk {
|
||||
constexpr uint32_t PerVertex_ClipDist = 2;
|
||||
|
||||
DxbcCompiler::DxbcCompiler(
|
||||
const DxbcOptions& options,
|
||||
const DxbcProgramVersion& version,
|
||||
const Rc<DxbcIsgn>& isgn,
|
||||
const Rc<DxbcIsgn>& osgn)
|
||||
: m_version (version),
|
||||
: m_options (options),
|
||||
m_version (version),
|
||||
m_isgn (isgn),
|
||||
m_osgn (osgn) {
|
||||
// Declare an entry point ID. We'll need it during the
|
||||
@ -1040,13 +1042,15 @@ namespace dxvk {
|
||||
break;
|
||||
|
||||
case DxbcOpcode::Max:
|
||||
dst.id = m_module.opNMax(typeId,
|
||||
src.at(0).id, src.at(1).id);
|
||||
dst.id = m_options.useSimpleMinMaxClamp
|
||||
? m_module.opFMax(typeId, src.at(0).id, src.at(1).id)
|
||||
: m_module.opNMax(typeId, src.at(0).id, src.at(1).id);
|
||||
break;
|
||||
|
||||
case DxbcOpcode::Min:
|
||||
dst.id = m_module.opNMin(typeId,
|
||||
src.at(0).id, src.at(1).id);
|
||||
dst.id = m_options.useSimpleMinMaxClamp
|
||||
? m_module.opFMin(typeId, src.at(0).id, src.at(1).id)
|
||||
: m_module.opNMin(typeId, src.at(0).id, src.at(1).id);
|
||||
break;
|
||||
|
||||
case DxbcOpcode::Mul:
|
||||
@ -2211,7 +2215,7 @@ namespace dxvk {
|
||||
|
||||
// Load the texture coordinates. SPIR-V allows these
|
||||
// to be float4 even if not all components are used.
|
||||
const DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask);
|
||||
DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask);
|
||||
|
||||
// Load reference value for depth-compare operations
|
||||
const bool isDepthCompare = ins.op == DxbcOpcode::SampleC
|
||||
@ -2221,6 +2225,17 @@ namespace dxvk {
|
||||
? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
|
||||
: DxbcRegisterValue();
|
||||
|
||||
if (isDepthCompare && m_options.packDrefValueIntoCoordinates) {
|
||||
const std::array<uint32_t, 2> packedCoordIds
|
||||
= {{ coord.id, referenceValue.id }};
|
||||
|
||||
coord.type.ccount += 1;
|
||||
coord.id = m_module.opCompositeConstruct(
|
||||
getVectorTypeId(coord.type),
|
||||
packedCoordIds.size(),
|
||||
packedCoordIds.data());
|
||||
}
|
||||
|
||||
// Load explicit gradients for sample operations that require them
|
||||
const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD;
|
||||
|
||||
@ -3034,10 +3049,9 @@ namespace dxvk {
|
||||
if (value.type.ctype == DxbcScalarType::Float32) {
|
||||
// Saturating only makes sense on floats
|
||||
if (modifiers.saturate) {
|
||||
value.id = m_module.opNClamp(
|
||||
typeId, value.id,
|
||||
m_module.constf32(0.0f),
|
||||
m_module.constf32(1.0f));
|
||||
value.id = m_options.useSimpleMinMaxClamp
|
||||
? m_module.opFClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f))
|
||||
: m_module.opNClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "dxbc_decoder.h"
|
||||
#include "dxbc_defs.h"
|
||||
#include "dxbc_names.h"
|
||||
#include "dxbc_options.h"
|
||||
#include "dxbc_util.h"
|
||||
|
||||
namespace dxvk {
|
||||
@ -213,6 +214,7 @@ namespace dxvk {
|
||||
public:
|
||||
|
||||
DxbcCompiler(
|
||||
const DxbcOptions& options,
|
||||
const DxbcProgramVersion& version,
|
||||
const Rc<DxbcIsgn>& isgn,
|
||||
const Rc<DxbcIsgn>& osgn);
|
||||
@ -233,6 +235,7 @@ namespace dxvk {
|
||||
|
||||
private:
|
||||
|
||||
DxbcOptions m_options;
|
||||
DxbcProgramVersion m_version;
|
||||
SpirvModule m_module;
|
||||
|
||||
|
@ -40,13 +40,13 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
Rc<DxvkShader> DxbcModule::compile() const {
|
||||
Rc<DxvkShader> DxbcModule::compile(const DxbcOptions& options) const {
|
||||
if (m_shexChunk == nullptr)
|
||||
throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk");
|
||||
|
||||
DxbcCodeSlice slice = m_shexChunk->slice();
|
||||
|
||||
DxbcCompiler compiler(
|
||||
DxbcCompiler compiler(options,
|
||||
m_shexChunk->version(),
|
||||
m_isgnChunk, m_osgnChunk);
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "dxbc_chunk_isgn.h"
|
||||
#include "dxbc_chunk_shex.h"
|
||||
#include "dxbc_header.h"
|
||||
#include "dxbc_options.h"
|
||||
#include "dxbc_reader.h"
|
||||
|
||||
// References used for figuring out DXBC:
|
||||
@ -46,9 +47,11 @@ namespace dxvk {
|
||||
|
||||
/**
|
||||
* \brief Compiles DXBC shader to SPIR-V module
|
||||
*
|
||||
* \param [in] options DXBC compiler options
|
||||
* \returns The compiled shader object
|
||||
*/
|
||||
Rc<DxvkShader> compile() const;
|
||||
Rc<DxvkShader> compile(const DxbcOptions& options) const;
|
||||
|
||||
private:
|
||||
|
||||
|
29
src/dxbc/dxbc_options.cpp
Normal file
29
src/dxbc/dxbc_options.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include "dxbc_options.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
DxbcOptions::DxbcOptions(const Rc<DxvkDevice>& device) {
|
||||
const VkPhysicalDeviceProperties deviceProps
|
||||
= device->adapter()->deviceProperties();
|
||||
|
||||
const DxvkGpuVendor vendor
|
||||
= static_cast<DxvkGpuVendor>(deviceProps.vendorID);
|
||||
|
||||
if (vendor == DxvkGpuVendor::Nvidia) {
|
||||
// From vkd3d: NMin/NMax/NClamp crash the driver.
|
||||
this->useSimpleMinMaxClamp = true;
|
||||
|
||||
// From vkd3d: Nvidia expects the depth reference
|
||||
// value to be packed into the coordinate vector.
|
||||
this->packDrefValueIntoCoordinates = true;
|
||||
}
|
||||
|
||||
// Inform the user about which workarounds are enabled
|
||||
if (this->useSimpleMinMaxClamp)
|
||||
Logger::warn("DxbcOptions: Using FMin/FMax/FClamp instead of NMin/NMax/NClamp");
|
||||
|
||||
if (this->packDrefValueIntoCoordinates)
|
||||
Logger::warn("DxbcOptions: Packing depth reference value into coordinate vector");
|
||||
}
|
||||
|
||||
}
|
26
src/dxbc/dxbc_options.h
Normal file
26
src/dxbc/dxbc_options.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include "../dxvk/dxvk_device.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
/**
|
||||
* \brief DXBC compiler options
|
||||
*
|
||||
* Defines driver- or device-specific options,
|
||||
* which are mostly workarounds for driver bugs.
|
||||
*/
|
||||
struct DxbcOptions {
|
||||
DxbcOptions() { }
|
||||
DxbcOptions(
|
||||
const Rc<DxvkDevice>& device);
|
||||
|
||||
/// Use Fmin/Fmax instead of Nmin/Nmax.
|
||||
bool useSimpleMinMaxClamp = false;
|
||||
|
||||
/// Pack the depth reference value into the
|
||||
/// coordinate vector for depth-compare ops.
|
||||
bool packDrefValueIntoCoordinates = false;
|
||||
};
|
||||
|
||||
}
|
@ -8,6 +8,7 @@ dxbc_src = files([
|
||||
'dxbc_header.cpp',
|
||||
'dxbc_module.cpp',
|
||||
'dxbc_names.cpp',
|
||||
'dxbc_options.cpp',
|
||||
'dxbc_reader.cpp',
|
||||
'dxbc_util.cpp',
|
||||
])
|
||||
|
@ -10,6 +10,16 @@ namespace dxvk {
|
||||
class DxvkInstance;
|
||||
class DxvkSurface;
|
||||
|
||||
/**
|
||||
* \brief GPU vendors
|
||||
* Based on PCIe IDs.
|
||||
*/
|
||||
enum class DxvkGpuVendor : uint16_t {
|
||||
Amd = 0x1002,
|
||||
Nvidia = 0x10de,
|
||||
Intel = 0x8086,
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief DXVK adapter
|
||||
*
|
||||
|
@ -70,7 +70,6 @@ namespace dxvk {
|
||||
|
||||
if (env::getEnvVar(L"DXVK_DEBUG_LAYERS") == "1")
|
||||
layers.push_back("VK_LAYER_LUNARG_standard_validation");
|
||||
|
||||
|
||||
const vk::NameSet layersAvailable
|
||||
= vk::NameSet::enumerateInstanceLayers(*m_vkl);
|
||||
|
@ -1507,7 +1507,7 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opNMax(
|
||||
uint32_t SpirvModule::opFMax(
|
||||
uint32_t resultType,
|
||||
uint32_t a,
|
||||
uint32_t b) {
|
||||
@ -1524,7 +1524,7 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opNMin(
|
||||
uint32_t SpirvModule::opFMin(
|
||||
uint32_t resultType,
|
||||
uint32_t a,
|
||||
uint32_t b) {
|
||||
@ -1539,6 +1539,40 @@ namespace dxvk {
|
||||
m_code.putWord(b);
|
||||
return resultId;
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opNMax(
|
||||
uint32_t resultType,
|
||||
uint32_t a,
|
||||
uint32_t b) {
|
||||
uint32_t resultId = this->allocateId();
|
||||
|
||||
m_code.putIns (spv::OpExtInst, 7);
|
||||
m_code.putWord(resultType);
|
||||
m_code.putWord(resultId);
|
||||
m_code.putWord(m_instExtGlsl450);
|
||||
m_code.putWord(spv::GLSLstd450NMax);
|
||||
m_code.putWord(a);
|
||||
m_code.putWord(b);
|
||||
return resultId;
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opNMin(
|
||||
uint32_t resultType,
|
||||
uint32_t a,
|
||||
uint32_t b) {
|
||||
uint32_t resultId = this->allocateId();
|
||||
|
||||
m_code.putIns (spv::OpExtInst, 7);
|
||||
m_code.putWord(resultType);
|
||||
m_code.putWord(resultId);
|
||||
m_code.putWord(m_instExtGlsl450);
|
||||
m_code.putWord(spv::GLSLstd450NMin);
|
||||
m_code.putWord(a);
|
||||
m_code.putWord(b);
|
||||
return resultId;
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opSMax(
|
||||
@ -1609,7 +1643,7 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opNClamp(
|
||||
uint32_t SpirvModule::opFClamp(
|
||||
uint32_t resultType,
|
||||
uint32_t x,
|
||||
uint32_t minVal,
|
||||
@ -1628,6 +1662,25 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opNClamp(
|
||||
uint32_t resultType,
|
||||
uint32_t x,
|
||||
uint32_t minVal,
|
||||
uint32_t maxVal) {
|
||||
uint32_t resultId = this->allocateId();
|
||||
|
||||
m_code.putIns (spv::OpExtInst, 8);
|
||||
m_code.putWord(resultType);
|
||||
m_code.putWord(resultId);
|
||||
m_code.putWord(m_instExtGlsl450);
|
||||
m_code.putWord(spv::GLSLstd450NClamp);
|
||||
m_code.putWord(x);
|
||||
m_code.putWord(minVal);
|
||||
m_code.putWord(maxVal);
|
||||
return resultId;
|
||||
}
|
||||
|
||||
|
||||
uint32_t SpirvModule::opIEqual(
|
||||
uint32_t resultType,
|
||||
uint32_t vector1,
|
||||
|
@ -565,6 +565,16 @@ namespace dxvk {
|
||||
uint32_t b,
|
||||
uint32_t c);
|
||||
|
||||
uint32_t opFMax(
|
||||
uint32_t resultType,
|
||||
uint32_t a,
|
||||
uint32_t b);
|
||||
|
||||
uint32_t opFMin(
|
||||
uint32_t resultType,
|
||||
uint32_t a,
|
||||
uint32_t b);
|
||||
|
||||
uint32_t opNMax(
|
||||
uint32_t resultType,
|
||||
uint32_t a,
|
||||
@ -595,6 +605,12 @@ namespace dxvk {
|
||||
uint32_t a,
|
||||
uint32_t b);
|
||||
|
||||
uint32_t opFClamp(
|
||||
uint32_t resultType,
|
||||
uint32_t x,
|
||||
uint32_t minVal,
|
||||
uint32_t maxVal);
|
||||
|
||||
uint32_t opNClamp(
|
||||
uint32_t resultType,
|
||||
uint32_t x,
|
||||
|
@ -40,7 +40,7 @@ int WINAPI WinMain(HINSTANCE hInstance,
|
||||
DxbcReader reader(dxbcCode.data(), dxbcCode.size());
|
||||
DxbcModule module(reader);
|
||||
|
||||
Rc<DxvkShader> shader = module.compile();
|
||||
Rc<DxvkShader> shader = module.compile(DxbcOptions());
|
||||
shader->dump(std::ofstream(
|
||||
str::fromws(argv[2]), std::ios::binary));
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user