1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-18 02:52:10 +01:00

[dxbc] Run HS fork/join phases in parallel

May reduce execution time of hull shaders on the GPU by running
the fork/join phases in parallel, as originally intended. Tested
on RADV 18.0.99 with LLVM 6.0.0.
This commit is contained in:
Philip Rebohle 2018-03-29 13:29:50 +02:00
parent 6e27f12e22
commit 2ab4710054
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99

View File

@ -5600,20 +5600,13 @@ namespace dxvk {
this->emitHsControlPointPhase(m_hs.cpPhase);
this->emitHsPhaseBarrier();
// Fork/join phases. We cannot run this in parallel
// because synchronizing per-patch outputs does not
// work. We don't need to synchronize after this.
this->emitHsInvocationBlockBegin(1);
// Fork-join phases (will run in parallel)
for (const auto& phase : m_hs.forkPhases)
this->emitHsForkJoinPhase(phase);
for (const auto& phase : m_hs.joinPhases)
this->emitHsForkJoinPhase(phase);
this->emitHsInvocationBlockEnd();
this->emitHsPhaseBarrier();
// Output setup phase
this->emitHsInvocationBlockBegin(1);
this->emitOutputSetup();
@ -5679,13 +5672,15 @@ namespace dxvk {
void DxbcCompiler::emitHsForkJoinPhase(
const DxbcCompilerHsForkJoinPhase& phase) {
for (uint32_t i = 0; i < phase.instanceCount; i++) {
const uint32_t instanceId = m_module.constu32(i);
m_module.opFunctionCall(
m_module.defVoidType(),
phase.functionId, 1, &instanceId);
}
this->emitHsInvocationBlockBegin(phase.instanceCount);
m_module.opFunctionCall(
m_module.defVoidType(),
phase.functionId, 1,
&m_hs.builtinInvocationId);
this->emitHsInvocationBlockEnd();
this->emitHsPhaseBarrier();
}