From d3f84688cccfbc4b36fdbaea202af054a126ccaf Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 16 Jan 2018 15:00:19 +0100 Subject: [PATCH] [dxvk] Make use of VK_AMD_rasterization_order May slightly improve GPU performance in some scenarios. --- src/dxvk/dxvk_compute.cpp | 10 +++--- src/dxvk/dxvk_compute.h | 8 +++-- src/dxvk/dxvk_device.cpp | 2 +- src/dxvk/dxvk_graphics.cpp | 62 ++++++++++++++++++++++++++--------- src/dxvk/dxvk_graphics.h | 13 ++++++-- src/dxvk/dxvk_options.cpp | 2 +- src/dxvk/dxvk_pipemanager.cpp | 8 ++--- src/dxvk/dxvk_pipemanager.h | 5 ++- 8 files changed, 77 insertions(+), 33 deletions(-) diff --git a/src/dxvk/dxvk_compute.cpp b/src/dxvk/dxvk_compute.cpp index 03d2703fe..a86c1dd63 100644 --- a/src/dxvk/dxvk_compute.cpp +++ b/src/dxvk/dxvk_compute.cpp @@ -1,20 +1,22 @@ #include "dxvk_compute.h" +#include "dxvk_device.h" namespace dxvk { DxvkComputePipeline::DxvkComputePipeline( - const Rc& vkd, + const DxvkDevice* device, const Rc& cache, const Rc& cs) - : m_vkd(vkd), m_cache(cache) { + : m_device(device), m_vkd(device->vkd()), + m_cache(cache) { DxvkDescriptorSlotMapping slotMapping; cs->defineResourceSlots(slotMapping); - m_layout = new DxvkBindingLayout(vkd, + m_layout = new DxvkBindingLayout(m_vkd, slotMapping.bindingCount(), slotMapping.bindingInfos()); - m_cs = cs->createShaderModule(vkd, slotMapping); + m_cs = cs->createShaderModule(m_vkd, slotMapping); this->compilePipeline(); } diff --git a/src/dxvk/dxvk_compute.h b/src/dxvk/dxvk_compute.h index 89b1dd465..0a7cccfd6 100644 --- a/src/dxvk/dxvk_compute.h +++ b/src/dxvk/dxvk_compute.h @@ -7,6 +7,8 @@ namespace dxvk { + class DxvkDevice; + /** * \brief Compute pipeline * @@ -20,7 +22,7 @@ namespace dxvk { public: DxvkComputePipeline( - const Rc& vkd, + const DxvkDevice* device, const Rc& cache, const Rc& cs); ~DxvkComputePipeline(); @@ -47,7 +49,9 @@ namespace dxvk { private: - Rc m_vkd; + const DxvkDevice* const m_device; + const Rc m_vkd; + Rc m_cache; Rc m_layout; Rc m_cs; diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index bd672d1a6..eafb38d68 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -15,7 +15,7 @@ namespace dxvk { m_memory (new DxvkMemoryAllocator(adapter, vkd)), m_renderPassPool (new DxvkRenderPassPool (vkd)), m_pipelineCache (new DxvkPipelineCache (vkd)), - m_pipelineManager (new DxvkPipelineManager(vkd)), + m_pipelineManager (new DxvkPipelineManager(this)), m_submissionQueue (this) { m_options.adjustAppOptions(env::getExeName()); m_options.adjustDeviceOptions(m_adapter); diff --git a/src/dxvk/dxvk_graphics.cpp b/src/dxvk/dxvk_graphics.cpp index 321d1c5a6..7a7b243b6 100644 --- a/src/dxvk/dxvk_graphics.cpp +++ b/src/dxvk/dxvk_graphics.cpp @@ -1,5 +1,6 @@ #include +#include "dxvk_device.h" #include "dxvk_graphics.h" namespace dxvk { @@ -33,14 +34,15 @@ namespace dxvk { DxvkGraphicsPipeline::DxvkGraphicsPipeline( - const Rc& vkd, - const Rc& cache, - const Rc& vs, - const Rc& tcs, - const Rc& tes, - const Rc& gs, - const Rc& fs) - : m_vkd(vkd), m_cache(cache) { + const DxvkDevice* device, + const Rc& cache, + const Rc& vs, + const Rc& tcs, + const Rc& tes, + const Rc& gs, + const Rc& fs) + : m_device(device), m_vkd(device->vkd()), + m_cache(cache) { DxvkDescriptorSlotMapping slotMapping; if (vs != nullptr) vs ->defineResourceSlots(slotMapping); if (tcs != nullptr) tcs->defineResourceSlots(slotMapping); @@ -48,15 +50,15 @@ namespace dxvk { if (gs != nullptr) gs ->defineResourceSlots(slotMapping); if (fs != nullptr) fs ->defineResourceSlots(slotMapping); - m_layout = new DxvkBindingLayout(vkd, + m_layout = new DxvkBindingLayout(m_vkd, slotMapping.bindingCount(), slotMapping.bindingInfos()); - if (vs != nullptr) m_vs = vs ->createShaderModule(vkd, slotMapping); - if (tcs != nullptr) m_tcs = tcs->createShaderModule(vkd, slotMapping); - if (tes != nullptr) m_tes = tes->createShaderModule(vkd, slotMapping); - if (gs != nullptr) m_gs = gs ->createShaderModule(vkd, slotMapping); - if (fs != nullptr) m_fs = fs ->createShaderModule(vkd, slotMapping); + if (vs != nullptr) m_vs = vs ->createShaderModule(m_vkd, slotMapping); + if (tcs != nullptr) m_tcs = tcs->createShaderModule(m_vkd, slotMapping); + if (tes != nullptr) m_tes = tes->createShaderModule(m_vkd, slotMapping); + if (gs != nullptr) m_gs = gs ->createShaderModule(m_vkd, slotMapping); + if (fs != nullptr) m_fs = fs ->createShaderModule(m_vkd, slotMapping); m_vsIn = vs != nullptr ? vs->interfaceSlots().inputSlots : 0; m_fsOut = fs != nullptr ? fs->interfaceSlots().outputSlots : 0; @@ -146,9 +148,14 @@ namespace dxvk { vpInfo.scissorCount = state.rsViewportCount; vpInfo.pScissors = nullptr; + VkPipelineRasterizationStateRasterizationOrderAMD rsOrder; + rsOrder.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD; + rsOrder.pNext = nullptr; + rsOrder.rasterizationOrder = this->pickRasterizationOrder(state); + VkPipelineRasterizationStateCreateInfo rsInfo; rsInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rsInfo.pNext = nullptr; + rsInfo.pNext = m_device->extensions().amdRasterizationOrder.enabled() ? &rsOrder : rsOrder.pNext; rsInfo.flags = 0; rsInfo.depthClampEnable = state.rsEnableDepthClamp; rsInfo.rasterizerDiscardEnable= state.rsEnableDiscard; @@ -260,4 +267,29 @@ namespace dxvk { return true; } + + VkRasterizationOrderAMD DxvkGraphicsPipeline::pickRasterizationOrder( + const DxvkGraphicsPipelineStateInfo& state) const { + // If blending is not enabled, we can enable out-of-order + // rasterization for certain depth-compare modes. + bool blendingEnabled = false; + + for (uint32_t i = 0; i < MaxNumRenderTargets; i++) { + if (m_fsOut & (1u << i)) + blendingEnabled |= state.omBlendAttachments[i].blendEnable; + } + + if (!blendingEnabled) { + if (m_device->hasOption(DxvkOption::AssumeNoZfight)) + return VK_RASTERIZATION_ORDER_RELAXED_AMD; + + if (state.dsDepthCompareOp == VK_COMPARE_OP_NEVER + || state.dsDepthCompareOp == VK_COMPARE_OP_LESS + || state.dsDepthCompareOp == VK_COMPARE_OP_GREATER) + return VK_RASTERIZATION_ORDER_RELAXED_AMD; + } + + return VK_RASTERIZATION_ORDER_STRICT_AMD; + } + } \ No newline at end of file diff --git a/src/dxvk/dxvk_graphics.h b/src/dxvk/dxvk_graphics.h index a3247aafe..58b246c74 100644 --- a/src/dxvk/dxvk_graphics.h +++ b/src/dxvk/dxvk_graphics.h @@ -12,6 +12,8 @@ namespace dxvk { + class DxvkDevice; + /** * \brief Graphics pipeline state info * @@ -72,7 +74,7 @@ namespace dxvk { VkBool32 omEnableLogicOp; VkLogicOp omLogicOp; VkRenderPass omRenderPass; - VkPipelineColorBlendAttachmentState omBlendAttachments[DxvkLimits::MaxNumRenderTargets]; + VkPipelineColorBlendAttachmentState omBlendAttachments[MaxNumRenderTargets]; }; @@ -88,7 +90,7 @@ namespace dxvk { public: DxvkGraphicsPipeline( - const Rc& vkd, + const DxvkDevice* device, const Rc& cache, const Rc& vs, const Rc& tcs, @@ -127,7 +129,9 @@ namespace dxvk { VkPipeline pipeline; }; - Rc m_vkd; + const DxvkDevice* const m_device; + const Rc m_vkd; + Rc m_cache; Rc m_layout; @@ -154,6 +158,9 @@ namespace dxvk { bool validatePipelineState( const DxvkGraphicsPipelineStateInfo& state) const; + VkRasterizationOrderAMD pickRasterizationOrder( + const DxvkGraphicsPipelineStateInfo& state) const; + }; } \ No newline at end of file diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index b6538bfaa..609bae76b 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -5,7 +5,7 @@ namespace dxvk { const static std::unordered_map g_appOptions = {{ - + { "NieRAutomata.exe", DxvkOptionSet(DxvkOption::AssumeNoZfight) }, }}; diff --git a/src/dxvk/dxvk_pipemanager.cpp b/src/dxvk/dxvk_pipemanager.cpp index 88a68d327..76f106e9b 100644 --- a/src/dxvk/dxvk_pipemanager.cpp +++ b/src/dxvk/dxvk_pipemanager.cpp @@ -35,8 +35,8 @@ namespace dxvk { } - DxvkPipelineManager::DxvkPipelineManager(const Rc& vkd) - : m_vkd(vkd) { + DxvkPipelineManager::DxvkPipelineManager(const DxvkDevice* device) + : m_device(device) { } @@ -62,7 +62,7 @@ namespace dxvk { return pair->second; const Rc pipeline - = new DxvkComputePipeline(m_vkd, cache, cs); + = new DxvkComputePipeline(m_device, cache, cs); m_computePipelines.insert(std::make_pair(key, pipeline)); return pipeline; } @@ -92,7 +92,7 @@ namespace dxvk { return pair->second; const Rc pipeline - = new DxvkGraphicsPipeline(m_vkd, cache, vs, tcs, tes, gs, fs); + = new DxvkGraphicsPipeline(m_device, cache, vs, tcs, tes, gs, fs); m_graphicsPipelines.insert(std::make_pair(key, pipeline)); return pipeline; } diff --git a/src/dxvk/dxvk_pipemanager.h b/src/dxvk/dxvk_pipemanager.h index 9d438cd72..2400cdc9c 100644 --- a/src/dxvk/dxvk_pipemanager.h +++ b/src/dxvk/dxvk_pipemanager.h @@ -59,8 +59,7 @@ namespace dxvk { public: - DxvkPipelineManager( - const Rc& vkd); + DxvkPipelineManager(const DxvkDevice* device); ~DxvkPipelineManager(); /** @@ -99,7 +98,7 @@ namespace dxvk { private: - const Rc m_vkd; + const DxvkDevice* m_device; std::mutex m_mutex;