diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp index e814918b..d135345a 100644 --- a/src/d3d11/d3d11_device.cpp +++ b/src/d3d11/d3d11_device.cpp @@ -23,7 +23,8 @@ namespace dxvk { m_featureLevel (featureLevel), m_featureFlags (featureFlags), m_dxvkDevice (m_dxgiDevice->GetDXVKDevice()), - m_dxvkAdapter (m_dxvkDevice->adapter()) { + m_dxvkAdapter (m_dxvkDevice->adapter()), + m_dxbcOptions (m_dxvkDevice) { Com adapter; if (FAILED(m_dxgiDevice->GetAdapter(&adapter)) @@ -1356,7 +1357,7 @@ namespace dxvk { try { *pShaderModule = D3D11ShaderModule( - this, pShaderBytecode, BytecodeLength); + &m_dxbcOptions, this, pShaderBytecode, BytecodeLength); return S_OK; } catch (const DxvkError& e) { Logger::err(e.message()); diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h index e243c50e..668dea5b 100644 --- a/src/d3d11/d3d11_device.h +++ b/src/d3d11/d3d11_device.h @@ -1,13 +1,15 @@ #pragma once +#include "../dxbc/dxbc_options.h" + #include "../dxgi/dxgi_object.h" +#include "../util/com/com_private_data.h" + #include "d3d11_interfaces.h" #include "d3d11_state.h" #include "d3d11_util.h" -#include "../util/com/com_private_data.h" - namespace dxvk { class DxgiAdapter; @@ -255,6 +257,8 @@ namespace dxvk { const Rc m_dxvkDevice; const Rc m_dxvkAdapter; + const DxbcOptions m_dxbcOptions; + D3D11DeviceContext* m_context = nullptr; std::mutex m_resourceInitMutex; diff --git a/src/d3d11/d3d11_shader.cpp b/src/d3d11/d3d11_shader.cpp index 7ca0edc2..1c7f2aee 100644 --- a/src/d3d11/d3d11_shader.cpp +++ b/src/d3d11/d3d11_shader.cpp @@ -8,6 +8,7 @@ namespace dxvk { D3D11ShaderModule::D3D11ShaderModule( + const DxbcOptions* pDxbcOptions, D3D11Device* pDevice, const void* pShaderBytecode, size_t BytecodeLength) { @@ -33,7 +34,7 @@ namespace dxvk { } - m_shader = module.compile(); + m_shader = module.compile(*pDxbcOptions); if (dumpPath.size() != 0) { const std::string baseName = str::format(dumpPath, "/", diff --git a/src/d3d11/d3d11_shader.h b/src/d3d11/d3d11_shader.h index 92a4a430..65ca3107 100644 --- a/src/d3d11/d3d11_shader.h +++ b/src/d3d11/d3d11_shader.h @@ -25,6 +25,7 @@ namespace dxvk { D3D11ShaderModule(); D3D11ShaderModule( + const DxbcOptions* pDxbcOptions, D3D11Device* pDevice, const void* pShaderBytecode, size_t BytecodeLength); diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 34e69b20..9ff27745 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -7,10 +7,12 @@ namespace dxvk { constexpr uint32_t PerVertex_ClipDist = 2; DxbcCompiler::DxbcCompiler( + const DxbcOptions& options, const DxbcProgramVersion& version, const Rc& isgn, const Rc& osgn) - : m_version (version), + : m_options (options), + m_version (version), m_isgn (isgn), m_osgn (osgn) { // Declare an entry point ID. We'll need it during the @@ -1040,13 +1042,15 @@ namespace dxvk { break; case DxbcOpcode::Max: - dst.id = m_module.opNMax(typeId, - src.at(0).id, src.at(1).id); + dst.id = m_options.useSimpleMinMaxClamp + ? m_module.opFMax(typeId, src.at(0).id, src.at(1).id) + : m_module.opNMax(typeId, src.at(0).id, src.at(1).id); break; case DxbcOpcode::Min: - dst.id = m_module.opNMin(typeId, - src.at(0).id, src.at(1).id); + dst.id = m_options.useSimpleMinMaxClamp + ? m_module.opFMin(typeId, src.at(0).id, src.at(1).id) + : m_module.opNMin(typeId, src.at(0).id, src.at(1).id); break; case DxbcOpcode::Mul: @@ -2211,7 +2215,7 @@ namespace dxvk { // Load the texture coordinates. SPIR-V allows these // to be float4 even if not all components are used. - const DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask); + DxbcRegisterValue coord = emitRegisterLoad(texCoordReg, coordArrayMask); // Load reference value for depth-compare operations const bool isDepthCompare = ins.op == DxbcOpcode::SampleC @@ -2221,6 +2225,17 @@ namespace dxvk { ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false)) : DxbcRegisterValue(); + if (isDepthCompare && m_options.packDrefValueIntoCoordinates) { + const std::array packedCoordIds + = {{ coord.id, referenceValue.id }}; + + coord.type.ccount += 1; + coord.id = m_module.opCompositeConstruct( + getVectorTypeId(coord.type), + packedCoordIds.size(), + packedCoordIds.data()); + } + // Load explicit gradients for sample operations that require them const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD; @@ -3034,10 +3049,9 @@ namespace dxvk { if (value.type.ctype == DxbcScalarType::Float32) { // Saturating only makes sense on floats if (modifiers.saturate) { - value.id = m_module.opNClamp( - typeId, value.id, - m_module.constf32(0.0f), - m_module.constf32(1.0f)); + value.id = m_options.useSimpleMinMaxClamp + ? m_module.opFClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f)) + : m_module.opNClamp(typeId, value.id, m_module.constf32(0.0f), m_module.constf32(1.0f)); } } diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index b04fa337..16851b03 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -9,6 +9,7 @@ #include "dxbc_decoder.h" #include "dxbc_defs.h" #include "dxbc_names.h" +#include "dxbc_options.h" #include "dxbc_util.h" namespace dxvk { @@ -213,6 +214,7 @@ namespace dxvk { public: DxbcCompiler( + const DxbcOptions& options, const DxbcProgramVersion& version, const Rc& isgn, const Rc& osgn); @@ -233,6 +235,7 @@ namespace dxvk { private: + DxbcOptions m_options; DxbcProgramVersion m_version; SpirvModule m_module; diff --git a/src/dxbc/dxbc_module.cpp b/src/dxbc/dxbc_module.cpp index 8a617cd5..792022fa 100644 --- a/src/dxbc/dxbc_module.cpp +++ b/src/dxbc/dxbc_module.cpp @@ -40,13 +40,13 @@ namespace dxvk { } - Rc DxbcModule::compile() const { + Rc DxbcModule::compile(const DxbcOptions& options) const { if (m_shexChunk == nullptr) throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk"); DxbcCodeSlice slice = m_shexChunk->slice(); - DxbcCompiler compiler( + DxbcCompiler compiler(options, m_shexChunk->version(), m_isgnChunk, m_osgnChunk); diff --git a/src/dxbc/dxbc_module.h b/src/dxbc/dxbc_module.h index a33eed92..d4a8ef76 100644 --- a/src/dxbc/dxbc_module.h +++ b/src/dxbc/dxbc_module.h @@ -5,6 +5,7 @@ #include "dxbc_chunk_isgn.h" #include "dxbc_chunk_shex.h" #include "dxbc_header.h" +#include "dxbc_options.h" #include "dxbc_reader.h" // References used for figuring out DXBC: @@ -46,9 +47,11 @@ namespace dxvk { /** * \brief Compiles DXBC shader to SPIR-V module + * + * \param [in] options DXBC compiler options * \returns The compiled shader object */ - Rc compile() const; + Rc compile(const DxbcOptions& options) const; private: diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp new file mode 100644 index 00000000..20b8b184 --- /dev/null +++ b/src/dxbc/dxbc_options.cpp @@ -0,0 +1,29 @@ +#include "dxbc_options.h" + +namespace dxvk { + + DxbcOptions::DxbcOptions(const Rc& device) { + const VkPhysicalDeviceProperties deviceProps + = device->adapter()->deviceProperties(); + + const DxvkGpuVendor vendor + = static_cast(deviceProps.vendorID); + + if (vendor == DxvkGpuVendor::Nvidia) { + // From vkd3d: NMin/NMax/NClamp crash the driver. + this->useSimpleMinMaxClamp = true; + + // From vkd3d: Nvidia expects the depth reference + // value to be packed into the coordinate vector. + this->packDrefValueIntoCoordinates = true; + } + + // Inform the user about which workarounds are enabled + if (this->useSimpleMinMaxClamp) + Logger::warn("DxbcOptions: Using FMin/FMax/FClamp instead of NMin/NMax/NClamp"); + + if (this->packDrefValueIntoCoordinates) + Logger::warn("DxbcOptions: Packing depth reference value into coordinate vector"); + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h new file mode 100644 index 00000000..48e8f944 --- /dev/null +++ b/src/dxbc/dxbc_options.h @@ -0,0 +1,26 @@ +#pragma once + +#include "../dxvk/dxvk_device.h" + +namespace dxvk { + + /** + * \brief DXBC compiler options + * + * Defines driver- or device-specific options, + * which are mostly workarounds for driver bugs. + */ + struct DxbcOptions { + DxbcOptions() { } + DxbcOptions( + const Rc& device); + + /// Use Fmin/Fmax instead of Nmin/Nmax. + bool useSimpleMinMaxClamp = false; + + /// Pack the depth reference value into the + /// coordinate vector for depth-compare ops. + bool packDrefValueIntoCoordinates = false; + }; + +} \ No newline at end of file diff --git a/src/dxbc/meson.build b/src/dxbc/meson.build index c9948f7b..86700735 100644 --- a/src/dxbc/meson.build +++ b/src/dxbc/meson.build @@ -8,6 +8,7 @@ dxbc_src = files([ 'dxbc_header.cpp', 'dxbc_module.cpp', 'dxbc_names.cpp', + 'dxbc_options.cpp', 'dxbc_reader.cpp', 'dxbc_util.cpp', ]) diff --git a/src/dxvk/dxvk_adapter.h b/src/dxvk/dxvk_adapter.h index 4a7dfc5d..6907e295 100644 --- a/src/dxvk/dxvk_adapter.h +++ b/src/dxvk/dxvk_adapter.h @@ -10,6 +10,16 @@ namespace dxvk { class DxvkInstance; class DxvkSurface; + /** + * \brief GPU vendors + * Based on PCIe IDs. + */ + enum class DxvkGpuVendor : uint16_t { + Amd = 0x1002, + Nvidia = 0x10de, + Intel = 0x8086, + }; + /** * \brief DXVK adapter * diff --git a/src/dxvk/dxvk_instance.cpp b/src/dxvk/dxvk_instance.cpp index 98c73b4c..d53f5dde 100644 --- a/src/dxvk/dxvk_instance.cpp +++ b/src/dxvk/dxvk_instance.cpp @@ -70,7 +70,6 @@ namespace dxvk { if (env::getEnvVar(L"DXVK_DEBUG_LAYERS") == "1") layers.push_back("VK_LAYER_LUNARG_standard_validation"); - const vk::NameSet layersAvailable = vk::NameSet::enumerateInstanceLayers(*m_vkl); diff --git a/src/spirv/spirv_module.cpp b/src/spirv/spirv_module.cpp index 16eacd34..bfe601d9 100644 --- a/src/spirv/spirv_module.cpp +++ b/src/spirv/spirv_module.cpp @@ -1507,7 +1507,7 @@ namespace dxvk { } - uint32_t SpirvModule::opNMax( + uint32_t SpirvModule::opFMax( uint32_t resultType, uint32_t a, uint32_t b) { @@ -1524,7 +1524,7 @@ namespace dxvk { } - uint32_t SpirvModule::opNMin( + uint32_t SpirvModule::opFMin( uint32_t resultType, uint32_t a, uint32_t b) { @@ -1539,6 +1539,40 @@ namespace dxvk { m_code.putWord(b); return resultId; } + + + uint32_t SpirvModule::opNMax( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450NMax); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opNMin( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450NMin); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } uint32_t SpirvModule::opSMax( @@ -1609,7 +1643,7 @@ namespace dxvk { } - uint32_t SpirvModule::opNClamp( + uint32_t SpirvModule::opFClamp( uint32_t resultType, uint32_t x, uint32_t minVal, @@ -1628,6 +1662,25 @@ namespace dxvk { } + uint32_t SpirvModule::opNClamp( + uint32_t resultType, + uint32_t x, + uint32_t minVal, + uint32_t maxVal) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 8); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450NClamp); + m_code.putWord(x); + m_code.putWord(minVal); + m_code.putWord(maxVal); + return resultId; + } + + uint32_t SpirvModule::opIEqual( uint32_t resultType, uint32_t vector1, diff --git a/src/spirv/spirv_module.h b/src/spirv/spirv_module.h index 0df08c19..65f490d6 100644 --- a/src/spirv/spirv_module.h +++ b/src/spirv/spirv_module.h @@ -565,6 +565,16 @@ namespace dxvk { uint32_t b, uint32_t c); + uint32_t opFMax( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opFMin( + uint32_t resultType, + uint32_t a, + uint32_t b); + uint32_t opNMax( uint32_t resultType, uint32_t a, @@ -595,6 +605,12 @@ namespace dxvk { uint32_t a, uint32_t b); + uint32_t opFClamp( + uint32_t resultType, + uint32_t x, + uint32_t minVal, + uint32_t maxVal); + uint32_t opNClamp( uint32_t resultType, uint32_t x, diff --git a/tests/dxbc/test_dxbc_compiler.cpp b/tests/dxbc/test_dxbc_compiler.cpp index 88ee2424..cd7ae57d 100644 --- a/tests/dxbc/test_dxbc_compiler.cpp +++ b/tests/dxbc/test_dxbc_compiler.cpp @@ -40,7 +40,7 @@ int WINAPI WinMain(HINSTANCE hInstance, DxbcReader reader(dxbcCode.data(), dxbcCode.size()); DxbcModule module(reader); - Rc shader = module.compile(); + Rc shader = module.compile(DxbcOptions()); shader->dump(std::ofstream( str::fromws(argv[2]), std::ios::binary)); return 0;