diff --git a/src/dxvk/dxvk_graphics.cpp b/src/dxvk/dxvk_graphics.cpp index fe653264..86633ba1 100644 --- a/src/dxvk/dxvk_graphics.cpp +++ b/src/dxvk/dxvk_graphics.cpp @@ -868,8 +868,8 @@ namespace dxvk { DxvkGraphicsPipeline::~DxvkGraphicsPipeline() { - for (const auto& instance : m_pipelines) - this->destroyPipeline(instance.fastHandle.load()); + for (const auto& instance : m_fastPipelines) + this->destroyPipeline(instance.second); for (const auto& instance : m_basePipelines) this->destroyPipeline(instance.second); @@ -962,7 +962,7 @@ namespace dxvk { || instance->isCompiling.exchange(VK_TRUE, std::memory_order_acquire)) return; - VkPipeline pipeline = this->createOptimizedPipeline(state, 0); + VkPipeline pipeline = this->getOptimizedPipeline(state, 0); instance->fastHandle.store(pipeline, std::memory_order_release); // Log pipeline state on error @@ -981,7 +981,7 @@ namespace dxvk { // Try to create an optimized pipeline from the cache // first, since this is expected to be the fastest path. if (m_device->canUsePipelineCacheControl()) { - fastHandle = this->createOptimizedPipeline(state, + fastHandle = this->getOptimizedPipeline(state, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT); } @@ -991,7 +991,7 @@ namespace dxvk { baseHandle = this->getBasePipeline(state); } else { // Create optimized variant right away, no choice - fastHandle = this->createOptimizedPipeline(state, 0); + fastHandle = this->getOptimizedPipeline(state, 0); } // Log pipeline state if requested, or on failure @@ -1108,59 +1108,74 @@ namespace dxvk { return pipeline; } - - VkPipeline DxvkGraphicsPipeline::createOptimizedPipeline( + + VkPipeline DxvkGraphicsPipeline::getOptimizedPipeline( const DxvkGraphicsPipelineStateInfo& state, + VkPipelineCreateFlags flags) { + DxvkGraphicsPipelineFastInstanceKey key(m_device, + m_shaders, state, m_flags, m_specConstantMask); + + std::lock_guard lock(m_fastMutex); + + auto entry = m_fastPipelines.find(key); + if (entry != m_fastPipelines.end()) + return entry->second; + + // Keep pipeline locked to prevent multiple threads from compiling + // identical Vulkan pipelines. This should be rare, but has been + // buggy on some drivers in the past, so just don't allow it. + VkPipeline handle = createOptimizedPipeline(key, flags); + + if (handle) + m_fastPipelines.insert({ key, handle }); + + return handle; + } + + + VkPipeline DxvkGraphicsPipeline::createOptimizedPipeline( + const DxvkGraphicsPipelineFastInstanceKey& key, VkPipelineCreateFlags flags) const { auto vk = m_device->vkd(); - // Set up pipeline state - DxvkGraphicsPipelineShaderState shState(m_shaders, state); - DxvkGraphicsPipelineDynamicState dyState(m_device, state, m_flags); - DxvkGraphicsPipelineVertexInputState viState(m_device, state, m_shaders.vs.ptr()); - DxvkGraphicsPipelinePreRasterizationState prState(m_device, state, m_shaders.gs.ptr()); - DxvkGraphicsPipelineFragmentShaderState fsState(m_device, state); - DxvkGraphicsPipelineFragmentOutputState foState(m_device, state, m_shaders.fs.ptr()); - DxvkPipelineSpecConstantState scState(m_specConstantMask, state.sc); - // Build stage infos for all provided shaders DxvkShaderStageInfo stageInfo(m_device); if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { - stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, m_vsLibrary->getModuleIdentifier(), &scState.scInfo); + stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, m_vsLibrary->getModuleIdentifier(), &key.scState.scInfo); if (m_shaders.fs != nullptr) - stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, m_fsLibrary->getModuleIdentifier(), &scState.scInfo); + stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, m_fsLibrary->getModuleIdentifier(), &key.scState.scInfo); } else { - stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, getShaderCode(m_shaders.vs, shState.vsInfo), &scState.scInfo); + stageInfo.addStage(VK_SHADER_STAGE_VERTEX_BIT, getShaderCode(m_shaders.vs, key.shState.vsInfo), &key.scState.scInfo); if (m_shaders.tcs != nullptr) - stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, getShaderCode(m_shaders.tcs, shState.tcsInfo), &scState.scInfo); + stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, getShaderCode(m_shaders.tcs, key.shState.tcsInfo), &key.scState.scInfo); if (m_shaders.tes != nullptr) - stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, getShaderCode(m_shaders.tes, shState.tesInfo), &scState.scInfo); + stageInfo.addStage(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, getShaderCode(m_shaders.tes, key.shState.tesInfo), &key.scState.scInfo); if (m_shaders.gs != nullptr) - stageInfo.addStage(VK_SHADER_STAGE_GEOMETRY_BIT, getShaderCode(m_shaders.gs, shState.gsInfo), &scState.scInfo); + stageInfo.addStage(VK_SHADER_STAGE_GEOMETRY_BIT, getShaderCode(m_shaders.gs, key.shState.gsInfo), &key.scState.scInfo); if (m_shaders.fs != nullptr) - stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, getShaderCode(m_shaders.fs, shState.fsInfo), &scState.scInfo); + stageInfo.addStage(VK_SHADER_STAGE_FRAGMENT_BIT, getShaderCode(m_shaders.fs, key.shState.fsInfo), &key.scState.scInfo); } - VkGraphicsPipelineCreateInfo info = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, &foState.rtInfo }; + VkGraphicsPipelineCreateInfo info = { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, &key.foState.rtInfo }; info.flags = flags; info.stageCount = stageInfo.getStageCount(); info.pStages = stageInfo.getStageInfos(); - info.pVertexInputState = &viState.viInfo; - info.pInputAssemblyState = &viState.iaInfo; - info.pTessellationState = &prState.tsInfo; - info.pViewportState = &prState.vpInfo; - info.pRasterizationState = &prState.rsInfo; - info.pMultisampleState = &foState.msInfo; - info.pDepthStencilState = &fsState.dsInfo; - info.pColorBlendState = &foState.cbInfo; - info.pDynamicState = &dyState.dyInfo; + info.pVertexInputState = &key.viState.viInfo; + info.pInputAssemblyState = &key.viState.iaInfo; + info.pTessellationState = &key.prState.tsInfo; + info.pViewportState = &key.prState.vpInfo; + info.pRasterizationState = &key.prState.rsInfo; + info.pMultisampleState = &key.foState.msInfo; + info.pDepthStencilState = &key.fsState.dsInfo; + info.pColorBlendState = &key.foState.cbInfo; + info.pDynamicState = &key.dyState.dyInfo; info.layout = m_bindings->getPipelineLayout(false); info.basePipelineIndex = -1; - if (!prState.tsInfo.patchControlPoints) + if (!key.prState.tsInfo.patchControlPoints) info.pTessellationState = nullptr; VkPipeline pipeline = VK_NULL_HANDLE; diff --git a/src/dxvk/dxvk_graphics.h b/src/dxvk/dxvk_graphics.h index 31a13049..a0c0bfba 100644 --- a/src/dxvk/dxvk_graphics.h +++ b/src/dxvk/dxvk_graphics.h @@ -385,6 +385,61 @@ namespace dxvk { }; + /** + * \brief Fast instance key + * + * Stores pipeline state used to compile an + * optimized pipeline. + */ + struct DxvkGraphicsPipelineFastInstanceKey { + DxvkGraphicsPipelineFastInstanceKey() { } + + DxvkGraphicsPipelineFastInstanceKey( + DxvkDevice* device, + const DxvkGraphicsPipelineShaders& shaders, + const DxvkGraphicsPipelineStateInfo& state, + DxvkGraphicsPipelineFlags flags, + uint32_t specConstantMask) + : shState(shaders, state), + dyState(device, state, flags), + viState(device, state, shaders.vs.ptr()), + prState(device, state, shaders.gs.ptr()), + fsState(device, state), + foState(device, state, shaders.fs.ptr()), + scState(specConstantMask, state.sc) { } + + DxvkGraphicsPipelineShaderState shState; + DxvkGraphicsPipelineDynamicState dyState; + DxvkGraphicsPipelineVertexInputState viState; + DxvkGraphicsPipelinePreRasterizationState prState; + DxvkGraphicsPipelineFragmentShaderState fsState; + DxvkGraphicsPipelineFragmentOutputState foState; + DxvkPipelineSpecConstantState scState; + + bool eq(const DxvkGraphicsPipelineFastInstanceKey& other) const { + return shState.eq(other.shState) + && dyState.eq(other.dyState) + && viState.eq(other.viState) + && prState.eq(other.prState) + && fsState.eq(other.fsState) + && foState.eq(other.foState) + && scState.eq(other.scState); + } + + size_t hash() const { + DxvkHashState hash; + hash.add(shState.hash()); + hash.add(dyState.hash()); + hash.add(viState.hash()); + hash.add(prState.hash()); + hash.add(fsState.hash()); + hash.add(foState.hash()); + hash.add(scState.hash()); + return hash; + } + }; + + /** * \brief Graphics pipeline * @@ -500,7 +555,6 @@ namespace dxvk { uint32_t m_specConstantMask = 0; - // List of pipeline instances, shared between threads alignas(CACHE_LINE_SIZE) dxvk::mutex m_mutex; sync::List m_pipelines; @@ -509,6 +563,12 @@ namespace dxvk { DxvkGraphicsPipelineBaseInstanceKey, VkPipeline, DxvkHash, DxvkEq> m_basePipelines; + alignas(CACHE_LINE_SIZE) + dxvk::mutex m_fastMutex; + std::unordered_map< + DxvkGraphicsPipelineFastInstanceKey, + VkPipeline, DxvkHash, DxvkEq> m_fastPipelines; + DxvkGraphicsPipelineInstance* createInstance( const DxvkGraphicsPipelineStateInfo& state, bool doCreateBasePipeline); @@ -525,10 +585,14 @@ namespace dxvk { VkPipeline createBasePipeline( const DxvkGraphicsPipelineBaseInstanceKey& key) const; - VkPipeline createOptimizedPipeline( + VkPipeline getOptimizedPipeline( const DxvkGraphicsPipelineStateInfo& state, + VkPipelineCreateFlags flags); + + VkPipeline createOptimizedPipeline( + const DxvkGraphicsPipelineFastInstanceKey& key, VkPipelineCreateFlags flags) const; - + void destroyPipeline( VkPipeline pipeline) const;