1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-29 17:52:18 +01:00

[dxbc] Run HS fork/join phases in parallel

May reduce execution time of hull shaders on the GPU by running
the fork/join phases in parallel, as originally intended. Tested
on RADV 18.0.99 with LLVM 6.0.0.
This commit is contained in:
Philip Rebohle 2018-03-29 13:29:50 +02:00
parent 6e27f12e22
commit 2ab4710054
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99

View File

@ -5600,20 +5600,13 @@ namespace dxvk {
this->emitHsControlPointPhase(m_hs.cpPhase); this->emitHsControlPointPhase(m_hs.cpPhase);
this->emitHsPhaseBarrier(); this->emitHsPhaseBarrier();
// Fork/join phases. We cannot run this in parallel // Fork-join phases (will run in parallel)
// because synchronizing per-patch outputs does not
// work. We don't need to synchronize after this.
this->emitHsInvocationBlockBegin(1);
for (const auto& phase : m_hs.forkPhases) for (const auto& phase : m_hs.forkPhases)
this->emitHsForkJoinPhase(phase); this->emitHsForkJoinPhase(phase);
for (const auto& phase : m_hs.joinPhases) for (const auto& phase : m_hs.joinPhases)
this->emitHsForkJoinPhase(phase); this->emitHsForkJoinPhase(phase);
this->emitHsInvocationBlockEnd();
this->emitHsPhaseBarrier();
// Output setup phase // Output setup phase
this->emitHsInvocationBlockBegin(1); this->emitHsInvocationBlockBegin(1);
this->emitOutputSetup(); this->emitOutputSetup();
@ -5679,13 +5672,15 @@ namespace dxvk {
void DxbcCompiler::emitHsForkJoinPhase( void DxbcCompiler::emitHsForkJoinPhase(
const DxbcCompilerHsForkJoinPhase& phase) { const DxbcCompilerHsForkJoinPhase& phase) {
for (uint32_t i = 0; i < phase.instanceCount; i++) { this->emitHsInvocationBlockBegin(phase.instanceCount);
const uint32_t instanceId = m_module.constu32(i);
m_module.opFunctionCall( m_module.opFunctionCall(
m_module.defVoidType(), m_module.defVoidType(),
phase.functionId, 1, &instanceId); phase.functionId, 1,
} &m_hs.builtinInvocationId);
this->emitHsInvocationBlockEnd();
this->emitHsPhaseBarrier();
} }