1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-03-14 04:29:15 +01:00

Merge f1f9ae979d970936b6c9484d136066f6b5685ffb into c04410ca00f33162d0875bc8500d3f8185bc73df

This commit is contained in:
Philip Rebohle 2025-02-28 13:29:43 +01:00 committed by GitHub
commit f721a8ae8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 145 additions and 22 deletions

View File

@ -15,6 +15,7 @@ namespace dxvk {
D3D11Options::D3D11Options(const Config& config) {
this->zeroInitWorkgroupMemory = config.getOption<bool>("d3d11.zeroInitWorkgroupMemory", false);
this->forceVolatileTgsmAccess = config.getOption<bool>("d3d11.forceVolatileTgsmAccess", false);
this->forceComputeUavBarriers = config.getOption<bool>("d3d11.forceComputeUavBarriers", false);
this->relaxedBarriers = config.getOption<bool>("d3d11.relaxedBarriers", false);
this->relaxedGraphicsBarriers = config.getOption<bool>("d3d11.relaxedGraphicsBarriers", false);
this->maxTessFactor = config.getOption<int32_t>("d3d11.maxTessFactor", 0);

View File

@ -26,6 +26,13 @@ namespace dxvk {
/// without explicit synchronization.
bool forceVolatileTgsmAccess = false;
/// Force UAV synchronization insided compute shaders
///
/// Workaround for compute shaders that access overlapping
/// memory regions within a UAV without proper workgroup
/// synchroniation. Will have a negative performance impact.
bool forceComputeUavBarriers = false;
/// Use relaxed memory barriers
///
/// May improve performance in some games,

View File

@ -2831,6 +2831,9 @@ namespace dxvk {
const DxbcRegister& dstReg = ins.dst[0];
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
emitUavBarrier(uint64_t(1u) << srcReg.idx[0].offset, 0u);
// Retrieve common info about the buffer
const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg);
@ -3049,6 +3052,9 @@ namespace dxvk {
const DxbcRegister& dstReg = ins.dst[0];
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
emitUavBarrier(0u, uint64_t(1u) << dstReg.idx[0].offset);
DxbcRegisterValue value = emitRegisterLoad(srcReg, dstReg.mask);
value = emitRegisterBitcast(value, DxbcScalarType::Uint32);
@ -4157,7 +4163,9 @@ namespace dxvk {
// (src1) The UAV to load from
const uint32_t registerId = ins.src[1].idx[0].offset;
const DxbcUav uavInfo = m_uavs.at(registerId);
emitUavBarrier(uint64_t(1u) << registerId, 0u);
// Load texture coordinates
DxbcRegisterValue texCoord = emitLoadTexCoord(
ins.src[0], uavInfo.imageInfo);
@ -4210,6 +4218,7 @@ namespace dxvk {
// (src0) The texture or buffer coordinates
// (src1) The value to store
const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]);
emitUavBarrier(0u, uint64_t(1u) << ins.dst[0].idx[0].offset);
// Set image operands for coherent access if necessary
SpirvImageOperands imageOperands;
@ -4551,6 +4560,8 @@ namespace dxvk {
// return can be used in place of break to terminate a case block
if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch)
m_controlFlowBlocks.back().b_switch.labelCase = labelId;
m_topLevelIsUniform = false;
} else {
// Last instruction in the current function
this->emitFunctionEnd();
@ -4580,6 +4591,9 @@ namespace dxvk {
m_module.opReturn();
m_module.opLabel(continueLabel);
// The return condition may be non-uniform
m_topLevelIsUniform = false;
}
@ -4606,8 +4620,11 @@ namespace dxvk {
m_module.opLabel(cond.labelEnd);
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation);
// Discard is just retc in a trenchcoat
m_topLevelIsUniform = false;
}
void DxbcCompiler::emitControlFlowLabel(const DxbcShaderInstruction& ins) {
uint32_t functionNr = ins.dst[0].idx[0].offset;
@ -4623,6 +4640,10 @@ namespace dxvk {
m_module.setDebugName(functionId, str::format("label", functionNr).c_str());
m_insideFunction = true;
// We have to assume that this function gets
// called from non-uniform control flow
m_topLevelIsUniform = false;
}
@ -4670,57 +4691,85 @@ namespace dxvk {
void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) {
switch (ins.op) {
case DxbcOpcode::If:
return this->emitControlFlowIf(ins);
this->emitUavBarrier(0, 0);
this->emitControlFlowIf(ins);
break;
case DxbcOpcode::Else:
return this->emitControlFlowElse(ins);
this->emitControlFlowElse(ins);
break;
case DxbcOpcode::EndIf:
return this->emitControlFlowEndIf(ins);
this->emitControlFlowEndIf(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Switch:
return this->emitControlFlowSwitch(ins);
this->emitUavBarrier(0, 0);
this->emitControlFlowSwitch(ins);
break;
case DxbcOpcode::Case:
return this->emitControlFlowCase(ins);
this->emitControlFlowCase(ins);
break;
case DxbcOpcode::Default:
return this->emitControlFlowDefault(ins);
this->emitControlFlowDefault(ins);
break;
case DxbcOpcode::EndSwitch:
return this->emitControlFlowEndSwitch(ins);
this->emitControlFlowEndSwitch(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Loop:
return this->emitControlFlowLoop(ins);
this->emitUavBarrier(0, 0);
this->emitControlFlowLoop(ins);
break;
case DxbcOpcode::EndLoop:
return this->emitControlFlowEndLoop(ins);
this->emitControlFlowEndLoop(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Break:
case DxbcOpcode::Continue:
return this->emitControlFlowBreak(ins);
this->emitControlFlowBreak(ins);
break;
case DxbcOpcode::Breakc:
case DxbcOpcode::Continuec:
return this->emitControlFlowBreakc(ins);
this->emitControlFlowBreakc(ins);
break;
case DxbcOpcode::Ret:
return this->emitControlFlowRet(ins);
this->emitControlFlowRet(ins);
break;
case DxbcOpcode::Retc:
return this->emitControlFlowRetc(ins);
this->emitUavBarrier(0, 0);
this->emitControlFlowRetc(ins);
break;
case DxbcOpcode::Discard:
return this->emitControlFlowDiscard(ins);
this->emitControlFlowDiscard(ins);
break;
case DxbcOpcode::Label:
return this->emitControlFlowLabel(ins);
this->emitControlFlowLabel(ins);
break;
case DxbcOpcode::Call:
return this->emitControlFlowCall(ins);
this->emitUavBarrier(0, 0);
this->emitControlFlowCall(ins);
this->emitUavBarrier(-1, -1);
break;
case DxbcOpcode::Callc:
return this->emitControlFlowCallc(ins);
this->emitUavBarrier(0, 0);
this->emitControlFlowCallc(ins);
this->emitUavBarrier(-1, -1);
break;
default:
Logger::warn(str::format(
@ -7872,6 +7921,53 @@ namespace dxvk {
}
}
void DxbcCompiler::emitUavBarrier(uint64_t readMask, uint64_t writeMask) {
if (!m_moduleInfo.options.forceComputeUavBarriers
|| m_programInfo.type() != DxbcProgramType::ComputeShader)
return;
// If both masks are 0, emit a barrier in case at least one read-write UAV
// has a pending unsynchronized access. Only consider read-after-write and
// write-after-read hazards, assume that back-to-back stores are safe and
// do not overlap in memory. Atomics are also completely ignored here.
uint64_t rdMask = m_uavRdMask;
uint64_t wrMask = m_uavWrMask;
bool insertBarrier = bool(rdMask & wrMask);
if (readMask || writeMask) {
rdMask &= m_uavWrMask;
wrMask &= m_uavRdMask;
}
for (auto uav : bit::BitMask(rdMask | wrMask)) {
constexpr VkAccessFlags rwAccess = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
insertBarrier |= (m_analysis->uavInfos[uav].accessFlags & rwAccess) == rwAccess;
}
// Need to be in uniform top-level control flow, or otherwise
// it is not safe to insert control barriers.
if (insertBarrier && m_controlFlowBlocks.empty() && m_topLevelIsUniform) {
m_module.opControlBarrier(
m_module.constu32(spv::ScopeWorkgroup),
m_module.constu32(m_hasGloballyCoherentUav ? spv::ScopeQueueFamily : spv::ScopeWorkgroup),
m_module.constu32(spv::MemorySemanticsWorkgroupMemoryMask
| spv::MemorySemanticsImageMemoryMask
| spv::MemorySemanticsUniformMemoryMask
| spv::MemorySemanticsAcquireReleaseMask
| spv::MemorySemanticsMakeAvailableMask
| spv::MemorySemanticsMakeVisibleMask));
m_uavWrMask = 0u;
m_uavRdMask = 0u;
}
// Mark pending accesses
m_uavWrMask |= writeMask;
m_uavRdMask |= readMask;
}
DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const {
switch (m_programInfo.type()) {

View File

@ -483,7 +483,12 @@ namespace dxvk {
// Control flow information. Stores labels for
// currently active if-else blocks and loops.
std::vector<DxbcCfgBlock> m_controlFlowBlocks;
bool m_topLevelIsUniform = true;
uint64_t m_uavRdMask = 0u;
uint64_t m_uavWrMask = 0u;
//////////////////////////////////////////////
// Function state tracking. Required in order
// to properly end functions in some cases.
@ -1258,6 +1263,10 @@ namespace dxvk {
bool ignoreInputSystemValue(
DxbcSystemValue sv) const;
void emitUavBarrier(
uint64_t readMask,
uint64_t writeMask);
///////////////////////////
// Type definition methods
uint32_t getScalarTypeId(

View File

@ -35,6 +35,7 @@ namespace dxvk {
invariantPosition = options.invariantPosition;
zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory;
forceVolatileTgsmAccess = options.forceVolatileTgsmAccess;
forceComputeUavBarriers = options.forceComputeUavBarriers;
disableMsaa = options.disableMsaa;
forceSampleRateShading = options.forceSampleRateShading;
enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;

View File

@ -39,6 +39,10 @@ namespace dxvk {
/// Insert memory barriers after TGSM stoes
bool forceVolatileTgsmAccess = false;
/// Try to detect hazards in UAV access and insert
/// barriers when we know control flow is uniform.
bool forceComputeUavBarriers = false;
/// Replace ld_ms with ld
bool disableMsaa = false;

View File

@ -458,6 +458,11 @@ namespace dxvk {
{ R"(\\FarCry(5|NewDawn)\.exe$)", {{
{ "d3d11.zeroInitWorkgroupMemory", "True" },
}} },
/* Watch Dogs 2 - ships broken compute shaders *
* with no barriers when they are needed */
{ R"(\\WatchDogs2\.exe$)", {{
{ "d3d11.forceComputeUavBarriers", "True" },
}} },
/**********************************************/
/* D3D9 GAMES */