1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-07 16:46:17 +01:00

[dxvk] Use normalized state to look up optimized graphics pipelines

We can't normalize all state at the time it is bound, e.g. disabling
unused blend state before render targets are known. By looking up
pipelines using normalized state we ensure that our VkPipelines are
actually unique.

Based on my testing this only affects a small number of pipelines in
most games (anywhere from 0 to a couple dozen), with some outliers
like The Witcher 1, where a third of the pipelines are redundant due
to stale render state.
This commit is contained in:
Philip Rebohle 2022-07-31 00:59:08 +02:00
parent db786cda6c
commit 00eaec1619
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
2 changed files with 116 additions and 37 deletions

View File

@ -868,8 +868,8 @@ namespace dxvk {
DxvkGraphicsPipeline::~DxvkGraphicsPipeline() { DxvkGraphicsPipeline::~DxvkGraphicsPipeline() {
for (const auto& instance : m_pipelines) for (const auto& instance : m_fastPipelines)
this->destroyPipeline(instance.fastHandle.load()); this->destroyPipeline(instance.second);
for (const auto& instance : m_basePipelines) for (const auto& instance : m_basePipelines)
this->destroyPipeline(instance.second); this->destroyPipeline(instance.second);
@ -962,7 +962,7 @@ namespace dxvk {
|| instance->isCompiling.exchange(VK_TRUE, std::memory_order_acquire)) || instance->isCompiling.exchange(VK_TRUE, std::memory_order_acquire))
return; return;
VkPipeline pipeline = this->createOptimizedPipeline(state, 0); VkPipeline pipeline = this->getOptimizedPipeline(state, 0);
instance->fastHandle.store(pipeline, std::memory_order_release); instance->fastHandle.store(pipeline, std::memory_order_release);
// Log pipeline state on error // Log pipeline state on error
@ -981,7 +981,7 @@ namespace dxvk {
// Try to create an optimized pipeline from the cache // Try to create an optimized pipeline from the cache
// first, since this is expected to be the fastest path. // first, since this is expected to be the fastest path.
if (m_device->canUsePipelineCacheControl()) { if (m_device->canUsePipelineCacheControl()) {
fastHandle = this->createOptimizedPipeline(state, fastHandle = this->getOptimizedPipeline(state,
VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT); VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT);
} }
@ -991,7 +991,7 @@ namespace dxvk {
baseHandle = this->getBasePipeline(state); baseHandle = this->getBasePipeline(state);
} else { } else {
// Create optimized variant right away, no choice // Create optimized variant right away, no choice
fastHandle = this->createOptimizedPipeline(state, 0); fastHandle = this->getOptimizedPipeline(state, 0);
} }
// Log pipeline state if requested, or on failure // Log pipeline state if requested, or on failure
@ -1109,58 +1109,73 @@ namespace dxvk {
} }
VkPipeline DxvkGraphicsPipeline::createOptimizedPipeline( VkPipeline DxvkGraphicsPipeline::getOptimizedPipeline(
const DxvkGraphicsPipelineStateInfo& state, const DxvkGraphicsPipelineStateInfo& state,
VkPipelineCreateFlags flags) {
DxvkGraphicsPipelineFastInstanceKey key(m_device,
m_shaders, state, m_flags, m_specConstantMask);
std::lock_guard lock(m_fastMutex);
auto entry = m_fastPipelines.find(key);
if (entry != m_fastPipelines.end())
return entry->second;
// Keep pipeline locked to prevent multiple threads from compiling
// identical Vulkan pipelines. This should be rare, but has been
// buggy on some drivers in the past, so just don't allow it.
VkPipeline handle = createOptimizedPipeline(key, flags);
if (handle)
m_fastPipelines.insert({ key, handle });
return handle;
}
VkPipeline DxvkGraphicsPipeline::createOptimizedPipeline(
const DxvkGraphicsPipelineFastInstanceKey& key,
VkPipelineCreateFlags flags) const { VkPipelineCreateFlags flags) const {
auto vk = m_device->vkd(); auto vk = m_device->vkd();
// Set up pipeline state
DxvkGraphicsPipelineShaderState shState(m_shaders, state);
DxvkGraphicsPipelineDynamicState dyState(m_device, state, m_flags);
DxvkGraphicsPipelineVertexInputState viState(m_device, state, m_shaders.vs.ptr());
DxvkGraphicsPipelinePreRasterizationState prState(m_device, state, m_shaders.gs.ptr());
DxvkGraphicsPipelineFragmentShaderState fsState(m_device, state);
DxvkGraphicsPipelineFragmentOutputState foState(m_device, state, m_shaders.fs.ptr());
DxvkPipelineSpecConstantState scState(m_specConstantMask, state.sc);
// Build stage infos for all provided shaders // Build stage infos for all provided shaders
DxvkShaderStageInfo stageInfo(m_device); DxvkShaderStageInfo stageInfo(m_device);
if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, m_vsLibrary->getModuleIdentifier(), &scState.scInfo); stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, m_vsLibrary->getModuleIdentifier(), &key.scState.scInfo);
if (m_shaders.fs != nullptr) if (m_shaders.fs != nullptr)
stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, m_fsLibrary->getModuleIdentifier(), &scState.scInfo); stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, m_fsLibrary->getModuleIdentifier(), &key.scState.scInfo);
} else { } else {
stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, getShaderCode(m_shaders.vs, shState.vsInfo), &scState.scInfo); stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, getShaderCode(m_shaders.vs, key.shState.vsInfo), &key.scState.scInfo);
if (m_shaders.tcs != nullptr) if (m_shaders.tcs != nullptr)
stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, getShaderCode(m_shaders.tcs, shState.tcsInfo), &scState.scInfo); stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, getShaderCode(m_shaders.tcs, key.shState.tcsInfo), &key.scState.scInfo);
if (m_shaders.tes != nullptr) if (m_shaders.tes != nullptr)
stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, getShaderCode(m_shaders.tes, shState.tesInfo), &scState.scInfo); stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, getShaderCode(m_shaders.tes, key.shState.tesInfo), &key.scState.scInfo);
if (m_shaders.gs != nullptr) if (m_shaders.gs != nullptr)
stageInfo.addStage(VK_SHADER_STAGE_GEOMETRY_BIT, getShaderCode(m_shaders.gs, shState.gsInfo), &scState.scInfo); stageInfo.addStage(VK_SHADER_STAGE_GEOMETRY_BIT, getShaderCode(m_shaders.gs, key.shState.gsInfo), &key.scState.scInfo);
if (m_shaders.fs != nullptr) if (m_shaders.fs != nullptr)
stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, getShaderCode(m_shaders.fs, shState.fsInfo), &scState.scInfo); stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, getShaderCode(m_shaders.fs, key.shState.fsInfo), &key.scState.scInfo);
} }
VkGraphicsPipelineCreateInfo info = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, &foState.rtInfo }; VkGraphicsPipelineCreateInfo info = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, &key.foState.rtInfo };
info.flags = flags; info.flags = flags;
info.stageCount = stageInfo.getStageCount(); info.stageCount = stageInfo.getStageCount();
info.pStages = stageInfo.getStageInfos(); info.pStages = stageInfo.getStageInfos();
info.pVertexInputState = &viState.viInfo; info.pVertexInputState = &key.viState.viInfo;
info.pInputAssemblyState = &viState.iaInfo; info.pInputAssemblyState = &key.viState.iaInfo;
info.pTessellationState = &prState.tsInfo; info.pTessellationState = &key.prState.tsInfo;
info.pViewportState = &prState.vpInfo; info.pViewportState = &key.prState.vpInfo;
info.pRasterizationState = &prState.rsInfo; info.pRasterizationState = &key.prState.rsInfo;
info.pMultisampleState = &foState.msInfo; info.pMultisampleState = &key.foState.msInfo;
info.pDepthStencilState = &fsState.dsInfo; info.pDepthStencilState = &key.fsState.dsInfo;
info.pColorBlendState = &foState.cbInfo; info.pColorBlendState = &key.foState.cbInfo;
info.pDynamicState = &dyState.dyInfo; info.pDynamicState = &key.dyState.dyInfo;
info.layout = m_bindings->getPipelineLayout(false); info.layout = m_bindings->getPipelineLayout(false);
info.basePipelineIndex = -1; info.basePipelineIndex = -1;
if (!prState.tsInfo.patchControlPoints) if (!key.prState.tsInfo.patchControlPoints)
info.pTessellationState = nullptr; info.pTessellationState = nullptr;
VkPipeline pipeline = VK_NULL_HANDLE; VkPipeline pipeline = VK_NULL_HANDLE;

View File

@ -385,6 +385,61 @@ namespace dxvk {
}; };
/**
* \brief Fast instance key
*
* Stores pipeline state used to compile an
* optimized pipeline.
*/
struct DxvkGraphicsPipelineFastInstanceKey {
DxvkGraphicsPipelineFastInstanceKey() { }
DxvkGraphicsPipelineFastInstanceKey(
DxvkDevice* device,
const DxvkGraphicsPipelineShaders& shaders,
const DxvkGraphicsPipelineStateInfo& state,
DxvkGraphicsPipelineFlags flags,
uint32_t specConstantMask)
: shState(shaders, state),
dyState(device, state, flags),
viState(device, state, shaders.vs.ptr()),
prState(device, state, shaders.gs.ptr()),
fsState(device, state),
foState(device, state, shaders.fs.ptr()),
scState(specConstantMask, state.sc) { }
DxvkGraphicsPipelineShaderState shState;
DxvkGraphicsPipelineDynamicState dyState;
DxvkGraphicsPipelineVertexInputState viState;
DxvkGraphicsPipelinePreRasterizationState prState;
DxvkGraphicsPipelineFragmentShaderState fsState;
DxvkGraphicsPipelineFragmentOutputState foState;
DxvkPipelineSpecConstantState scState;
bool eq(const DxvkGraphicsPipelineFastInstanceKey& other) const {
return shState.eq(other.shState)
&& dyState.eq(other.dyState)
&& viState.eq(other.viState)
&& prState.eq(other.prState)
&& fsState.eq(other.fsState)
&& foState.eq(other.foState)
&& scState.eq(other.scState);
}
size_t hash() const {
DxvkHashState hash;
hash.add(shState.hash());
hash.add(dyState.hash());
hash.add(viState.hash());
hash.add(prState.hash());
hash.add(fsState.hash());
hash.add(foState.hash());
hash.add(scState.hash());
return hash;
}
};
/** /**
* \brief Graphics pipeline * \brief Graphics pipeline
* *
@ -500,7 +555,6 @@ namespace dxvk {
uint32_t m_specConstantMask = 0; uint32_t m_specConstantMask = 0;
// List of pipeline instances, shared between threads
alignas(CACHE_LINE_SIZE) alignas(CACHE_LINE_SIZE)
dxvk::mutex m_mutex; dxvk::mutex m_mutex;
sync::List<DxvkGraphicsPipelineInstance> m_pipelines; sync::List<DxvkGraphicsPipelineInstance> m_pipelines;
@ -509,6 +563,12 @@ namespace dxvk {
DxvkGraphicsPipelineBaseInstanceKey, DxvkGraphicsPipelineBaseInstanceKey,
VkPipeline, DxvkHash, DxvkEq> m_basePipelines; VkPipeline, DxvkHash, DxvkEq> m_basePipelines;
alignas(CACHE_LINE_SIZE)
dxvk::mutex m_fastMutex;
std::unordered_map<
DxvkGraphicsPipelineFastInstanceKey,
VkPipeline, DxvkHash, DxvkEq> m_fastPipelines;
DxvkGraphicsPipelineInstance* createInstance( DxvkGraphicsPipelineInstance* createInstance(
const DxvkGraphicsPipelineStateInfo& state, const DxvkGraphicsPipelineStateInfo& state,
bool doCreateBasePipeline); bool doCreateBasePipeline);
@ -525,8 +585,12 @@ namespace dxvk {
VkPipeline createBasePipeline( VkPipeline createBasePipeline(
const DxvkGraphicsPipelineBaseInstanceKey& key) const; const DxvkGraphicsPipelineBaseInstanceKey& key) const;
VkPipeline createOptimizedPipeline( VkPipeline getOptimizedPipeline(
const DxvkGraphicsPipelineStateInfo& state, const DxvkGraphicsPipelineStateInfo& state,
VkPipelineCreateFlags flags);
VkPipeline createOptimizedPipeline(
const DxvkGraphicsPipelineFastInstanceKey& key,
VkPipelineCreateFlags flags) const; VkPipelineCreateFlags flags) const;
void destroyPipeline( void destroyPipeline(