1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-03-14 22:29:15 +01:00

[dxbc] Add option to implicitly synchronize UAV accesses

This commit is contained in:
Philip Rebohle 2025-02-26 22:26:57 +01:00 committed by Philip Rebohle
parent c04410ca00
commit 396a4e0235
3 changed files with 131 additions and 22 deletions

View File

@ -2831,6 +2831,9 @@ namespace dxvk {
const DxbcRegister& dstReg = ins.dst[0]; const DxbcRegister& dstReg = ins.dst[0];
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1]; const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
emitUavBarrier(uint64_t(1u) << srcReg.idx[0].offset, 0u);
// Retrieve common info about the buffer // Retrieve common info about the buffer
const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg); const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg);
@ -3049,6 +3052,9 @@ namespace dxvk {
const DxbcRegister& dstReg = ins.dst[0]; const DxbcRegister& dstReg = ins.dst[0];
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1]; const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
emitUavBarrier(0u, uint64_t(1u) << dstReg.idx[0].offset);
DxbcRegisterValue value = emitRegisterLoad(srcReg, dstReg.mask); DxbcRegisterValue value = emitRegisterLoad(srcReg, dstReg.mask);
value = emitRegisterBitcast(value, DxbcScalarType::Uint32); value = emitRegisterBitcast(value, DxbcScalarType::Uint32);
@ -4157,7 +4163,9 @@ namespace dxvk {
// (src1) The UAV to load from // (src1) The UAV to load from
const uint32_t registerId = ins.src[1].idx[0].offset; const uint32_t registerId = ins.src[1].idx[0].offset;
const DxbcUav uavInfo = m_uavs.at(registerId); const DxbcUav uavInfo = m_uavs.at(registerId);
emitUavBarrier(uint64_t(1u) << registerId, 0u);
// Load texture coordinates // Load texture coordinates
DxbcRegisterValue texCoord = emitLoadTexCoord( DxbcRegisterValue texCoord = emitLoadTexCoord(
ins.src[0], uavInfo.imageInfo); ins.src[0], uavInfo.imageInfo);
@ -4210,6 +4218,7 @@ namespace dxvk {
// (src0) The texture or buffer coordinates // (src0) The texture or buffer coordinates
// (src1) The value to store // (src1) The value to store
const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]); const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]);
emitUavBarrier(0u, uint64_t(1u) << ins.dst[0].idx[0].offset);
// Set image operands for coherent access if necessary // Set image operands for coherent access if necessary
SpirvImageOperands imageOperands; SpirvImageOperands imageOperands;
@ -4551,6 +4560,8 @@ namespace dxvk {
// return can be used in place of break to terminate a case block // return can be used in place of break to terminate a case block
if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch) if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch)
m_controlFlowBlocks.back().b_switch.labelCase = labelId; m_controlFlowBlocks.back().b_switch.labelCase = labelId;
m_topLevelIsUniform = false;
} else { } else {
// Last instruction in the current function // Last instruction in the current function
this->emitFunctionEnd(); this->emitFunctionEnd();
@ -4580,6 +4591,9 @@ namespace dxvk {
m_module.opReturn(); m_module.opReturn();
m_module.opLabel(continueLabel); m_module.opLabel(continueLabel);
// The return condition may be non-uniform
m_topLevelIsUniform = false;
} }
@ -4606,8 +4620,11 @@ namespace dxvk {
m_module.opLabel(cond.labelEnd); m_module.opLabel(cond.labelEnd);
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation); m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation);
// Discard is just retc in a trenchcoat
m_topLevelIsUniform = false;
} }
void DxbcCompiler::emitControlFlowLabel(const DxbcShaderInstruction& ins) { void DxbcCompiler::emitControlFlowLabel(const DxbcShaderInstruction& ins) {
uint32_t functionNr = ins.dst[0].idx[0].offset; uint32_t functionNr = ins.dst[0].idx[0].offset;
@ -4623,6 +4640,10 @@ namespace dxvk {
m_module.setDebugName(functionId, str::format("label", functionNr).c_str()); m_module.setDebugName(functionId, str::format("label", functionNr).c_str());
m_insideFunction = true; m_insideFunction = true;
// We have to assume that this function gets
// called from non-uniform control flow
m_topLevelIsUniform = false;
} }
@ -4670,57 +4691,85 @@ namespace dxvk {
void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) { void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) {
switch (ins.op) { switch (ins.op) {
case DxbcOpcode::If: case DxbcOpcode::If:
return this->emitControlFlowIf(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowIf(ins);
break;
case DxbcOpcode::Else: case DxbcOpcode::Else:
return this->emitControlFlowElse(ins); this->emitControlFlowElse(ins);
break;
case DxbcOpcode::EndIf: case DxbcOpcode::EndIf:
return this->emitControlFlowEndIf(ins); this->emitControlFlowEndIf(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Switch: case DxbcOpcode::Switch:
return this->emitControlFlowSwitch(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowSwitch(ins);
break;
case DxbcOpcode::Case: case DxbcOpcode::Case:
return this->emitControlFlowCase(ins); this->emitControlFlowCase(ins);
break;
case DxbcOpcode::Default: case DxbcOpcode::Default:
return this->emitControlFlowDefault(ins); this->emitControlFlowDefault(ins);
break;
case DxbcOpcode::EndSwitch: case DxbcOpcode::EndSwitch:
return this->emitControlFlowEndSwitch(ins); this->emitControlFlowEndSwitch(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Loop: case DxbcOpcode::Loop:
return this->emitControlFlowLoop(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowLoop(ins);
break;
case DxbcOpcode::EndLoop: case DxbcOpcode::EndLoop:
return this->emitControlFlowEndLoop(ins); this->emitControlFlowEndLoop(ins);
this->emitUavBarrier(0, 0);
break;
case DxbcOpcode::Break: case DxbcOpcode::Break:
case DxbcOpcode::Continue: case DxbcOpcode::Continue:
return this->emitControlFlowBreak(ins); this->emitControlFlowBreak(ins);
break;
case DxbcOpcode::Breakc: case DxbcOpcode::Breakc:
case DxbcOpcode::Continuec: case DxbcOpcode::Continuec:
return this->emitControlFlowBreakc(ins); this->emitControlFlowBreakc(ins);
break;
case DxbcOpcode::Ret: case DxbcOpcode::Ret:
return this->emitControlFlowRet(ins); this->emitControlFlowRet(ins);
break;
case DxbcOpcode::Retc: case DxbcOpcode::Retc:
return this->emitControlFlowRetc(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowRetc(ins);
break;
case DxbcOpcode::Discard: case DxbcOpcode::Discard:
return this->emitControlFlowDiscard(ins); this->emitControlFlowDiscard(ins);
break;
case DxbcOpcode::Label: case DxbcOpcode::Label:
return this->emitControlFlowLabel(ins); this->emitControlFlowLabel(ins);
break;
case DxbcOpcode::Call: case DxbcOpcode::Call:
return this->emitControlFlowCall(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowCall(ins);
this->emitUavBarrier(-1, -1);
break;
case DxbcOpcode::Callc: case DxbcOpcode::Callc:
return this->emitControlFlowCallc(ins); this->emitUavBarrier(0, 0);
this->emitControlFlowCallc(ins);
this->emitUavBarrier(-1, -1);
break;
default: default:
Logger::warn(str::format( Logger::warn(str::format(
@ -7872,6 +7921,53 @@ namespace dxvk {
} }
} }
void DxbcCompiler::emitUavBarrier(uint64_t readMask, uint64_t writeMask) {
if (!m_moduleInfo.options.forceComputeUavBarriers
|| m_programInfo.type() != DxbcProgramType::ComputeShader)
return;
// If both masks are 0, emit a barrier in case at least one read-write UAV
// has a pending unsynchronized access. Only consider read-after-write and
// write-after-read hazards, assume that back-to-back stores are safe and
// do not overlap in memory. Atomics are also completely ignored here.
uint64_t rdMask = m_uavRdMask;
uint64_t wrMask = m_uavWrMask;
bool insertBarrier = bool(rdMask & wrMask);
if (readMask || writeMask) {
rdMask &= m_uavWrMask;
wrMask &= m_uavRdMask;
}
for (auto uav : bit::BitMask(rdMask | wrMask)) {
constexpr VkAccessFlags rwAccess = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
insertBarrier |= (m_analysis->uavInfos[uav].accessFlags & rwAccess) == rwAccess;
}
// Need to be in uniform top-level control flow, or otherwise
// it is not safe to insert control barriers.
if (insertBarrier && m_controlFlowBlocks.empty() && m_topLevelIsUniform) {
m_module.opControlBarrier(
m_module.constu32(spv::ScopeWorkgroup),
m_module.constu32(m_hasGloballyCoherentUav ? spv::ScopeQueueFamily : spv::ScopeWorkgroup),
m_module.constu32(spv::MemorySemanticsWorkgroupMemoryMask
| spv::MemorySemanticsImageMemoryMask
| spv::MemorySemanticsUniformMemoryMask
| spv::MemorySemanticsAcquireReleaseMask
| spv::MemorySemanticsMakeAvailableMask
| spv::MemorySemanticsMakeVisibleMask));
m_uavWrMask = 0u;
m_uavRdMask = 0u;
}
// Mark pending accesses
m_uavWrMask |= writeMask;
m_uavRdMask |= readMask;
}
DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const { DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const {
switch (m_programInfo.type()) { switch (m_programInfo.type()) {

View File

@ -483,7 +483,12 @@ namespace dxvk {
// Control flow information. Stores labels for // Control flow information. Stores labels for
// currently active if-else blocks and loops. // currently active if-else blocks and loops.
std::vector<DxbcCfgBlock> m_controlFlowBlocks; std::vector<DxbcCfgBlock> m_controlFlowBlocks;
bool m_topLevelIsUniform = true;
uint64_t m_uavRdMask = 0u;
uint64_t m_uavWrMask = 0u;
////////////////////////////////////////////// //////////////////////////////////////////////
// Function state tracking. Required in order // Function state tracking. Required in order
// to properly end functions in some cases. // to properly end functions in some cases.
@ -1258,6 +1263,10 @@ namespace dxvk {
bool ignoreInputSystemValue( bool ignoreInputSystemValue(
DxbcSystemValue sv) const; DxbcSystemValue sv) const;
void emitUavBarrier(
uint64_t readMask,
uint64_t writeMask);
/////////////////////////// ///////////////////////////
// Type definition methods // Type definition methods
uint32_t getScalarTypeId( uint32_t getScalarTypeId(

View File

@ -39,6 +39,10 @@ namespace dxvk {
/// Insert memory barriers after TGSM stoes /// Insert memory barriers after TGSM stoes
bool forceVolatileTgsmAccess = false; bool forceVolatileTgsmAccess = false;
/// Try to detect hazards in UAV access and insert
/// barriers when we know control flow is uniform.
bool forceComputeUavBarriers = false;
/// Replace ld_ms with ld /// Replace ld_ms with ld
bool disableMsaa = false; bool disableMsaa = false;