mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-15 07:29:17 +01:00
[dxbc] Add option to implicitly synchronize UAV accesses
This commit is contained in:
parent
c04410ca00
commit
396a4e0235
@ -2831,6 +2831,9 @@ namespace dxvk {
|
|||||||
const DxbcRegister& dstReg = ins.dst[0];
|
const DxbcRegister& dstReg = ins.dst[0];
|
||||||
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
|
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
|
||||||
|
|
||||||
|
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
|
||||||
|
emitUavBarrier(uint64_t(1u) << srcReg.idx[0].offset, 0u);
|
||||||
|
|
||||||
// Retrieve common info about the buffer
|
// Retrieve common info about the buffer
|
||||||
const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg);
|
const DxbcBufferInfo bufferInfo = getBufferInfo(srcReg);
|
||||||
|
|
||||||
@ -3049,6 +3052,9 @@ namespace dxvk {
|
|||||||
const DxbcRegister& dstReg = ins.dst[0];
|
const DxbcRegister& dstReg = ins.dst[0];
|
||||||
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
|
const DxbcRegister& srcReg = isStructured ? ins.src[2] : ins.src[1];
|
||||||
|
|
||||||
|
if (dstReg.type == DxbcOperandType::UnorderedAccessView)
|
||||||
|
emitUavBarrier(0u, uint64_t(1u) << dstReg.idx[0].offset);
|
||||||
|
|
||||||
DxbcRegisterValue value = emitRegisterLoad(srcReg, dstReg.mask);
|
DxbcRegisterValue value = emitRegisterLoad(srcReg, dstReg.mask);
|
||||||
value = emitRegisterBitcast(value, DxbcScalarType::Uint32);
|
value = emitRegisterBitcast(value, DxbcScalarType::Uint32);
|
||||||
|
|
||||||
@ -4158,6 +4164,8 @@ namespace dxvk {
|
|||||||
const uint32_t registerId = ins.src[1].idx[0].offset;
|
const uint32_t registerId = ins.src[1].idx[0].offset;
|
||||||
const DxbcUav uavInfo = m_uavs.at(registerId);
|
const DxbcUav uavInfo = m_uavs.at(registerId);
|
||||||
|
|
||||||
|
emitUavBarrier(uint64_t(1u) << registerId, 0u);
|
||||||
|
|
||||||
// Load texture coordinates
|
// Load texture coordinates
|
||||||
DxbcRegisterValue texCoord = emitLoadTexCoord(
|
DxbcRegisterValue texCoord = emitLoadTexCoord(
|
||||||
ins.src[0], uavInfo.imageInfo);
|
ins.src[0], uavInfo.imageInfo);
|
||||||
@ -4210,6 +4218,7 @@ namespace dxvk {
|
|||||||
// (src0) The texture or buffer coordinates
|
// (src0) The texture or buffer coordinates
|
||||||
// (src1) The value to store
|
// (src1) The value to store
|
||||||
const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]);
|
const DxbcBufferInfo uavInfo = getBufferInfo(ins.dst[0]);
|
||||||
|
emitUavBarrier(0u, uint64_t(1u) << ins.dst[0].idx[0].offset);
|
||||||
|
|
||||||
// Set image operands for coherent access if necessary
|
// Set image operands for coherent access if necessary
|
||||||
SpirvImageOperands imageOperands;
|
SpirvImageOperands imageOperands;
|
||||||
@ -4551,6 +4560,8 @@ namespace dxvk {
|
|||||||
// return can be used in place of break to terminate a case block
|
// return can be used in place of break to terminate a case block
|
||||||
if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch)
|
if (m_controlFlowBlocks.back().type == DxbcCfgBlockType::Switch)
|
||||||
m_controlFlowBlocks.back().b_switch.labelCase = labelId;
|
m_controlFlowBlocks.back().b_switch.labelCase = labelId;
|
||||||
|
|
||||||
|
m_topLevelIsUniform = false;
|
||||||
} else {
|
} else {
|
||||||
// Last instruction in the current function
|
// Last instruction in the current function
|
||||||
this->emitFunctionEnd();
|
this->emitFunctionEnd();
|
||||||
@ -4580,6 +4591,9 @@ namespace dxvk {
|
|||||||
m_module.opReturn();
|
m_module.opReturn();
|
||||||
|
|
||||||
m_module.opLabel(continueLabel);
|
m_module.opLabel(continueLabel);
|
||||||
|
|
||||||
|
// The return condition may be non-uniform
|
||||||
|
m_topLevelIsUniform = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -4606,6 +4620,9 @@ namespace dxvk {
|
|||||||
m_module.opLabel(cond.labelEnd);
|
m_module.opLabel(cond.labelEnd);
|
||||||
|
|
||||||
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation);
|
m_module.enableCapability(spv::CapabilityDemoteToHelperInvocation);
|
||||||
|
|
||||||
|
// Discard is just retc in a trenchcoat
|
||||||
|
m_topLevelIsUniform = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -4623,6 +4640,10 @@ namespace dxvk {
|
|||||||
m_module.setDebugName(functionId, str::format("label", functionNr).c_str());
|
m_module.setDebugName(functionId, str::format("label", functionNr).c_str());
|
||||||
|
|
||||||
m_insideFunction = true;
|
m_insideFunction = true;
|
||||||
|
|
||||||
|
// We have to assume that this function gets
|
||||||
|
// called from non-uniform control flow
|
||||||
|
m_topLevelIsUniform = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -4670,57 +4691,85 @@ namespace dxvk {
|
|||||||
void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) {
|
void DxbcCompiler::emitControlFlow(const DxbcShaderInstruction& ins) {
|
||||||
switch (ins.op) {
|
switch (ins.op) {
|
||||||
case DxbcOpcode::If:
|
case DxbcOpcode::If:
|
||||||
return this->emitControlFlowIf(ins);
|
this->emitUavBarrier(0, 0);
|
||||||
|
this->emitControlFlowIf(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Else:
|
case DxbcOpcode::Else:
|
||||||
return this->emitControlFlowElse(ins);
|
this->emitControlFlowElse(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::EndIf:
|
case DxbcOpcode::EndIf:
|
||||||
return this->emitControlFlowEndIf(ins);
|
this->emitControlFlowEndIf(ins);
|
||||||
|
this->emitUavBarrier(0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Switch:
|
case DxbcOpcode::Switch:
|
||||||
return this->emitControlFlowSwitch(ins);
|
this->emitUavBarrier(0, 0);
|
||||||
|
this->emitControlFlowSwitch(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Case:
|
case DxbcOpcode::Case:
|
||||||
return this->emitControlFlowCase(ins);
|
this->emitControlFlowCase(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Default:
|
case DxbcOpcode::Default:
|
||||||
return this->emitControlFlowDefault(ins);
|
this->emitControlFlowDefault(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::EndSwitch:
|
case DxbcOpcode::EndSwitch:
|
||||||
return this->emitControlFlowEndSwitch(ins);
|
this->emitControlFlowEndSwitch(ins);
|
||||||
|
this->emitUavBarrier(0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Loop:
|
case DxbcOpcode::Loop:
|
||||||
return this->emitControlFlowLoop(ins);
|
this->emitUavBarrier(0, 0);
|
||||||
|
this->emitControlFlowLoop(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::EndLoop:
|
case DxbcOpcode::EndLoop:
|
||||||
return this->emitControlFlowEndLoop(ins);
|
this->emitControlFlowEndLoop(ins);
|
||||||
|
this->emitUavBarrier(0, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Break:
|
case DxbcOpcode::Break:
|
||||||
case DxbcOpcode::Continue:
|
case DxbcOpcode::Continue:
|
||||||
return this->emitControlFlowBreak(ins);
|
this->emitControlFlowBreak(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Breakc:
|
case DxbcOpcode::Breakc:
|
||||||
case DxbcOpcode::Continuec:
|
case DxbcOpcode::Continuec:
|
||||||
return this->emitControlFlowBreakc(ins);
|
this->emitControlFlowBreakc(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Ret:
|
case DxbcOpcode::Ret:
|
||||||
return this->emitControlFlowRet(ins);
|
this->emitControlFlowRet(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Retc:
|
case DxbcOpcode::Retc:
|
||||||
return this->emitControlFlowRetc(ins);
|
this->emitUavBarrier(0, 0);
|
||||||
|
this->emitControlFlowRetc(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Discard:
|
case DxbcOpcode::Discard:
|
||||||
return this->emitControlFlowDiscard(ins);
|
this->emitControlFlowDiscard(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Label:
|
case DxbcOpcode::Label:
|
||||||
return this->emitControlFlowLabel(ins);
|
this->emitControlFlowLabel(ins);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Call:
|
case DxbcOpcode::Call:
|
||||||
return this->emitControlFlowCall(ins);
|
this->emitUavBarrier(0, 0);
|
||||||
|
this->emitControlFlowCall(ins);
|
||||||
|
this->emitUavBarrier(-1, -1);
|
||||||
|
break;
|
||||||
|
|
||||||
case DxbcOpcode::Callc:
|
case DxbcOpcode::Callc:
|
||||||
return this->emitControlFlowCallc(ins);
|
this->emitUavBarrier(0, 0);
|
||||||
|
this->emitControlFlowCallc(ins);
|
||||||
|
this->emitUavBarrier(-1, -1);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
Logger::warn(str::format(
|
Logger::warn(str::format(
|
||||||
@ -7873,6 +7922,53 @@ namespace dxvk {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxbcCompiler::emitUavBarrier(uint64_t readMask, uint64_t writeMask) {
|
||||||
|
if (!m_moduleInfo.options.forceComputeUavBarriers
|
||||||
|
|| m_programInfo.type() != DxbcProgramType::ComputeShader)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// If both masks are 0, emit a barrier in case at least one read-write UAV
|
||||||
|
// has a pending unsynchronized access. Only consider read-after-write and
|
||||||
|
// write-after-read hazards, assume that back-to-back stores are safe and
|
||||||
|
// do not overlap in memory. Atomics are also completely ignored here.
|
||||||
|
uint64_t rdMask = m_uavRdMask;
|
||||||
|
uint64_t wrMask = m_uavWrMask;
|
||||||
|
|
||||||
|
bool insertBarrier = bool(rdMask & wrMask);
|
||||||
|
|
||||||
|
if (readMask || writeMask) {
|
||||||
|
rdMask &= m_uavWrMask;
|
||||||
|
wrMask &= m_uavRdMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto uav : bit::BitMask(rdMask | wrMask)) {
|
||||||
|
constexpr VkAccessFlags rwAccess = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
insertBarrier |= (m_analysis->uavInfos[uav].accessFlags & rwAccess) == rwAccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Need to be in uniform top-level control flow, or otherwise
|
||||||
|
// it is not safe to insert control barriers.
|
||||||
|
if (insertBarrier && m_controlFlowBlocks.empty() && m_topLevelIsUniform) {
|
||||||
|
m_module.opControlBarrier(
|
||||||
|
m_module.constu32(spv::ScopeWorkgroup),
|
||||||
|
m_module.constu32(m_hasGloballyCoherentUav ? spv::ScopeQueueFamily : spv::ScopeWorkgroup),
|
||||||
|
m_module.constu32(spv::MemorySemanticsWorkgroupMemoryMask
|
||||||
|
| spv::MemorySemanticsImageMemoryMask
|
||||||
|
| spv::MemorySemanticsUniformMemoryMask
|
||||||
|
| spv::MemorySemanticsAcquireReleaseMask
|
||||||
|
| spv::MemorySemanticsMakeAvailableMask
|
||||||
|
| spv::MemorySemanticsMakeVisibleMask));
|
||||||
|
|
||||||
|
m_uavWrMask = 0u;
|
||||||
|
m_uavRdMask = 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark pending accesses
|
||||||
|
m_uavWrMask |= writeMask;
|
||||||
|
m_uavRdMask |= readMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const {
|
DxbcVectorType DxbcCompiler::getInputRegType(uint32_t regIdx) const {
|
||||||
switch (m_programInfo.type()) {
|
switch (m_programInfo.type()) {
|
||||||
case DxbcProgramType::VertexShader: {
|
case DxbcProgramType::VertexShader: {
|
||||||
|
@ -484,6 +484,11 @@ namespace dxvk {
|
|||||||
// currently active if-else blocks and loops.
|
// currently active if-else blocks and loops.
|
||||||
std::vector<DxbcCfgBlock> m_controlFlowBlocks;
|
std::vector<DxbcCfgBlock> m_controlFlowBlocks;
|
||||||
|
|
||||||
|
bool m_topLevelIsUniform = true;
|
||||||
|
|
||||||
|
uint64_t m_uavRdMask = 0u;
|
||||||
|
uint64_t m_uavWrMask = 0u;
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
// Function state tracking. Required in order
|
// Function state tracking. Required in order
|
||||||
// to properly end functions in some cases.
|
// to properly end functions in some cases.
|
||||||
@ -1258,6 +1263,10 @@ namespace dxvk {
|
|||||||
bool ignoreInputSystemValue(
|
bool ignoreInputSystemValue(
|
||||||
DxbcSystemValue sv) const;
|
DxbcSystemValue sv) const;
|
||||||
|
|
||||||
|
void emitUavBarrier(
|
||||||
|
uint64_t readMask,
|
||||||
|
uint64_t writeMask);
|
||||||
|
|
||||||
///////////////////////////
|
///////////////////////////
|
||||||
// Type definition methods
|
// Type definition methods
|
||||||
uint32_t getScalarTypeId(
|
uint32_t getScalarTypeId(
|
||||||
|
@ -39,6 +39,10 @@ namespace dxvk {
|
|||||||
/// Insert memory barriers after TGSM stoes
|
/// Insert memory barriers after TGSM stoes
|
||||||
bool forceVolatileTgsmAccess = false;
|
bool forceVolatileTgsmAccess = false;
|
||||||
|
|
||||||
|
/// Try to detect hazards in UAV access and insert
|
||||||
|
/// barriers when we know control flow is uniform.
|
||||||
|
bool forceComputeUavBarriers = false;
|
||||||
|
|
||||||
/// Replace ld_ms with ld
|
/// Replace ld_ms with ld
|
||||||
bool disableMsaa = false;
|
bool disableMsaa = false;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user