1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-03-15 07:29:17 +01:00

Merge f1f9ae979d970936b6c9484d136066f6b5685ffb into c04410ca00f33162d0875bc8500d3f8185bc73df

This commit is contained in:
Philip Rebohle 2025-02-28 13:29:43 +01:00 committed by GitHub
commit f721a8ae8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 145 additions and 22 deletions

View File

@ -15,6 +15,7 @@ namespace dxvk {
D3D11Options::D3D11Options(const Config& config) { D3D11Options::D3D11Options(const Config& config) {
this->zeroInitWorkgroupMemory = config.getOption<bool>("d3d11.zeroInitWorkgroupMemory", false); this->zeroInitWorkgroupMemory = config.getOption<bool>("d3d11.zeroInitWorkgroupMemory", false);
this->forceVolatileTgsmAccess = config.getOption<bool>("d3d11.forceVolatileTgsmAccess", false); this->forceVolatileTgsmAccess = config.getOption<bool>("d3d11.forceVolatileTgsmAccess", false);
this->forceComputeUavBarriers = config.getOption<bool>("d3d11.forceComputeUavBarriers", false);
this->relaxedBarriers = config.getOption<bool>("d3d11.relaxedBarriers", false); this->relaxedBarriers = config.getOption<bool>("d3d11.relaxedBarriers", false);
this->relaxedGraphicsBarriers = config.getOption<bool>("d3d11.relaxedGraphicsBarriers", false); this->relaxedGraphicsBarriers = config.getOption<bool>("d3d11.relaxedGraphicsBarriers", false);
this->maxTessFactor = config.getOption<int32_t>("d3d11.maxTessFactor", 0); this->maxTessFactor = config.getOption<int32_t>("d3d11.maxTessFactor", 0);

View File

@ -26,6 +26,13 @@ namespace dxvk {
/// without explicit synchronization. /// without explicit synchronization.
bool forceVolatileTgsmAccess = false; bool forceVolatileTgsmAccess = false;
/// Force UAV synchronization insided compute shaders
///
/// Workaround for compute shaders that access overlapping
/// memory regions within a UAV without proper workgroup
/// synchroniation. Will have a negative performance impact.
bool forceComputeUavBarriers = false;
/// Use relaxed memory barriers /// Use relaxed memory barriers
/// ///
/// May improve performance in some games, /// May improve performance in some games,

View File

@ -2831,6 +2831,9 @@ namespace dxvk {
const DxbcRegister& dstReg = ins.dst[0]; const DxbcRegister& dstReg = ins.dst[0];
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1]; const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
emitUavBarrier(uint64_t(1u) << srcReg.idx[0].offset, 0u);
// Retrieve common info about the buffer // Retrieve common info about the buffer
const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg); const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg);
@ -3049,6 +3052,9 @@ namespace dxvk {
const DxbcRegister& dstReg = ins.dst[0]; const DxbcRegister& dstReg = ins.dst[0];
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1]; const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
emitUavBarrier(0u, uint64_t(1u) << dstReg.idx[0].offset);
DxbcRegisterValue value = emitRegisterLoad(srcReg, dstReg.mask); DxbcRegisterValue value = emitRegisterLoad(srcReg, dstReg.mask);
value = emitRegisterBitcast(value, DxbcScalarType::Uint32); value = emitRegisterBitcast(value, DxbcScalarType::Uint32);
@ -4158,6 +4164,8 @@ namespace dxvk {
const uint32_t registerId = ins.src[1].idx[0].offset; const uint32_t registerId = ins.src[1].idx[0].offset;
const DxbcUav uavInfo = m_uavs.at(registerId); const DxbcUav uavInfo = m_uavs.at(registerId);
emitUavBarrier(uint64_t(1u) << registerId, 0u);
// Load texture coordinates // Load texture coordinates
DxbcRegisterValue texCoord = emitLoadTexCoord( DxbcRegisterValue texCoord = emitLoadTexCoord(
ins.src[0], uavInfo.imageInfo); ins.src[0], uavInfo.imageInfo);
@ -4210,6 +4218,7 @@ namespace dxvk {
// (src0) The texture or buffer coordinates // (src0) The texture or buffer coordinates
// (src1) The value to store // (src1) The value to store
const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]); const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]);
emitUavBarrier(0u, uint64_t(1u) << ins.dst[0].idx[0].offset);
// Set image operands for coherent access if necessary // Set image operands for coherent access if necessary
SpirvImageOperands imageOperands; SpirvImageOperands imageOperands;
@ -4551,6 +4560,8 @@ namespace dxvk {
// return can be used in place of break to terminate a case block // return can be used in place of break to terminate a case block
if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch) if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch)
m_controlFlowBlocks.back().b_switch.labelCase = labelId; m_controlFlowBlocks.back().b_switch.labelCase = labelId;
m_topLevelIsUniform = false;
} else { } else {
// Last instruction in the current function // Last instruction in the current function
this->emitFunctionEnd(); this->emitFunctionEnd();
@ -4580,6 +4591,9 @@ namespace dxvk {
m_module.opReturn(); m_module.opReturn();
m_module.opLabel(continueLabel); m_module.opLabel(continueLabel);
// The return condition may be non-uniform
m_topLevelIsUniform = false;
} }
@ -4606,6 +4620,9 @@ namespace dxvk {
m_module.opLabel(cond.labelEnd); m_module.opLabel(cond.labelEnd);
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation); m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation);
// Discard is just retc in a trenchcoat
m_topLevelIsUniform = false;
} }
@ -4623,6 +4640,10 @@ namespace dxvk {
m_module.setDebugName(functionId, str::format("label", functionNr).c_str()); m_module.setDebugName(functionId, str::format("label", functionNr).c_str());
m_insideFunction = true; m_insideFunction = true;
// We have to assume that this function gets
// called from non-uniform control flow
m_topLevelIsUniform = false;
} }
@ -4670,57 +4691,85 @@ namespace dxvk {
void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) { void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) {
switch (ins.op) { switch (ins.op) {
case DxbcOpcode::If: case DxbcOpcode::If:
return this->emitControlFlowIf(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowIf(ins);
break;
case DxbcOpcode::Else: case DxbcOpcode::Else:
return this->emitControlFlowElse(ins); this->emitControlFlowElse(ins);
break;
case DxbcOpcode::EndIf: case DxbcOpcode::EndIf:
return this->emitControlFlowEndIf(ins); this->emitControlFlowEndIf(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Switch: case DxbcOpcode::Switch:
return this->emitControlFlowSwitch(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowSwitch(ins);
break;
case DxbcOpcode::Case: case DxbcOpcode::Case:
return this->emitControlFlowCase(ins); this->emitControlFlowCase(ins);
break;
case DxbcOpcode::Default: case DxbcOpcode::Default:
return this->emitControlFlowDefault(ins); this->emitControlFlowDefault(ins);
break;
case DxbcOpcode::EndSwitch: case DxbcOpcode::EndSwitch:
return this->emitControlFlowEndSwitch(ins); this->emitControlFlowEndSwitch(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Loop: case DxbcOpcode::Loop:
return this->emitControlFlowLoop(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowLoop(ins);
break;
case DxbcOpcode::EndLoop: case DxbcOpcode::EndLoop:
return this->emitControlFlowEndLoop(ins); this->emitControlFlowEndLoop(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Break: case DxbcOpcode::Break:
case DxbcOpcode::Continue: case DxbcOpcode::Continue:
return this->emitControlFlowBreak(ins); this->emitControlFlowBreak(ins);
break;
case DxbcOpcode::Breakc: case DxbcOpcode::Breakc:
case DxbcOpcode::Continuec: case DxbcOpcode::Continuec:
return this->emitControlFlowBreakc(ins); this->emitControlFlowBreakc(ins);
break;
case DxbcOpcode::Ret: case DxbcOpcode::Ret:
return this->emitControlFlowRet(ins); this->emitControlFlowRet(ins);
break;
case DxbcOpcode::Retc: case DxbcOpcode::Retc:
return this->emitControlFlowRetc(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowRetc(ins);
break;
case DxbcOpcode::Discard: case DxbcOpcode::Discard:
return this->emitControlFlowDiscard(ins); this->emitControlFlowDiscard(ins);
break;
case DxbcOpcode::Label: case DxbcOpcode::Label:
return this->emitControlFlowLabel(ins); this->emitControlFlowLabel(ins);
break;
case DxbcOpcode::Call: case DxbcOpcode::Call:
return this->emitControlFlowCall(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowCall(ins);
this->emitUavBarrier(-1, -1);
break;
case DxbcOpcode::Callc: case DxbcOpcode::Callc:
return this->emitControlFlowCallc(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowCallc(ins);
this->emitUavBarrier(-1, -1);
break;
default: default:
Logger::warn(str::format( Logger::warn(str::format(
@ -7873,6 +7922,53 @@ namespace dxvk {
} }
void DxbcCompiler::emitUavBarrier(uint64_t readMask, uint64_t writeMask) {
if (!m_moduleInfo.options.forceComputeUavBarriers
|| m_programInfo.type() != DxbcProgramType::ComputeShader)
return;
// If both masks are 0, emit a barrier in case at least one read-write UAV
// has a pending unsynchronized access. Only consider read-after-write and
// write-after-read hazards, assume that back-to-back stores are safe and
// do not overlap in memory. Atomics are also completely ignored here.
uint64_t rdMask = m_uavRdMask;
uint64_t wrMask = m_uavWrMask;
bool insertBarrier = bool(rdMask & wrMask);
if (readMask || writeMask) {
rdMask &= m_uavWrMask;
wrMask &= m_uavRdMask;
}
for (auto uav : bit::BitMask(rdMask | wrMask)) {
constexpr VkAccessFlags rwAccess = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
insertBarrier |= (m_analysis->uavInfos[uav].accessFlags & rwAccess) == rwAccess;
}
// Need to be in uniform top-level control flow, or otherwise
// it is not safe to insert control barriers.
if (insertBarrier && m_controlFlowBlocks.empty() && m_topLevelIsUniform) {
m_module.opControlBarrier(
m_module.constu32(spv::ScopeWorkgroup),
m_module.constu32(m_hasGloballyCoherentUav ? spv::ScopeQueueFamily : spv::ScopeWorkgroup),
m_module.constu32(spv::MemorySemanticsWorkgroupMemoryMask
| spv::MemorySemanticsImageMemoryMask
| spv::MemorySemanticsUniformMemoryMask
| spv::MemorySemanticsAcquireReleaseMask
| spv::MemorySemanticsMakeAvailableMask
| spv::MemorySemanticsMakeVisibleMask));
m_uavWrMask = 0u;
m_uavRdMask = 0u;
}
// Mark pending accesses
m_uavWrMask |= writeMask;
m_uavRdMask |= readMask;
}
DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const { DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const {
switch (m_programInfo.type()) { switch (m_programInfo.type()) {
case DxbcProgramType::VertexShader: { case DxbcProgramType::VertexShader: {

View File

@ -484,6 +484,11 @@ namespace dxvk {
// currently active if-else blocks and loops. // currently active if-else blocks and loops.
std::vector<DxbcCfgBlock> m_controlFlowBlocks; std::vector<DxbcCfgBlock> m_controlFlowBlocks;
bool m_topLevelIsUniform = true;
uint64_t m_uavRdMask = 0u;
uint64_t m_uavWrMask = 0u;
////////////////////////////////////////////// //////////////////////////////////////////////
// Function state tracking. Required in order // Function state tracking. Required in order
// to properly end functions in some cases. // to properly end functions in some cases.
@ -1258,6 +1263,10 @@ namespace dxvk {
bool ignoreInputSystemValue( bool ignoreInputSystemValue(
DxbcSystemValue sv) const; DxbcSystemValue sv) const;
void emitUavBarrier(
uint64_t readMask,
uint64_t writeMask);
/////////////////////////// ///////////////////////////
// Type definition methods // Type definition methods
uint32_t getScalarTypeId( uint32_t getScalarTypeId(

View File

@ -35,6 +35,7 @@ namespace dxvk {
invariantPosition = options.invariantPosition; invariantPosition = options.invariantPosition;
zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory; zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory;
forceVolatileTgsmAccess = options.forceVolatileTgsmAccess; forceVolatileTgsmAccess = options.forceVolatileTgsmAccess;
forceComputeUavBarriers = options.forceComputeUavBarriers;
disableMsaa = options.disableMsaa; disableMsaa = options.disableMsaa;
forceSampleRateShading = options.forceSampleRateShading; forceSampleRateShading = options.forceSampleRateShading;
enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock; enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;

View File

@ -39,6 +39,10 @@ namespace dxvk {
/// Insert memory barriers after TGSM stoes /// Insert memory barriers after TGSM stoes
bool forceVolatileTgsmAccess = false; bool forceVolatileTgsmAccess = false;
/// Try to detect hazards in UAV access and insert
/// barriers when we know control flow is uniform.
bool forceComputeUavBarriers = false;
/// Replace ld_ms with ld /// Replace ld_ms with ld
bool disableMsaa = false; bool disableMsaa = false;

View File

@ -458,6 +458,11 @@ namespace dxvk {
{ R"(\\FarCry(5|NewDawn)\.exe$)", {{ { R"(\\FarCry(5|NewDawn)\.exe$)", {{
{ "d3d11.zeroInitWorkgroupMemory", "True" }, { "d3d11.zeroInitWorkgroupMemory", "True" },
}} }, }} },
/* Watch Dogs 2 - ships broken compute shaders *
* with no barriers when they are needed */
{ R"(\\WatchDogs2\.exe$)", {{
{ "d3d11.forceComputeUavBarriers", "True" },
}} },
/**********************************************/ /**********************************************/
/* D3D9 GAMES */ /* D3D9 GAMES */