From 4635397bb15193f9d3c362f2ad8b4a8fc40b5d19 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 26 Sep 2024 23:33:28 +0200 Subject: [PATCH] [dxvk] Implement sampler pool Deduplicates redundant sampler objects and makes sampler creation as well as lifetime tracking a bit more efficient. --- src/d3d11/d3d11_sampler.cpp | 85 +++++----- src/d3d9/d3d9_device.cpp | 55 +++--- src/dxvk/dxvk_cmdlist.h | 4 + src/dxvk/dxvk_context.cpp | 4 +- src/dxvk/dxvk_device.cpp | 4 +- src/dxvk/dxvk_device.h | 2 +- src/dxvk/dxvk_lifetime.cpp | 1 + src/dxvk/dxvk_lifetime.h | 11 ++ src/dxvk/dxvk_objects.h | 7 + src/dxvk/dxvk_sampler.cpp | 220 +++++++++++++++++++----- src/dxvk/dxvk_sampler.h | 253 +++++++++++++++++++++++----- src/dxvk/dxvk_swapchain_blitter.cpp | 37 ++-- src/dxvk/dxvk_unbound.cpp | 28 +-- src/dxvk/hud/dxvk_hud_renderer.cpp | 26 +-- src/util/rc/util_rc_ptr.h | 37 ++++ 15 files changed, 546 insertions(+), 228 deletions(-) diff --git a/src/d3d11/d3d11_sampler.cpp b/src/d3d11/d3d11_sampler.cpp index 15ff92ab..4c43391a 100644 --- a/src/d3d11/d3d11_sampler.cpp +++ b/src/d3d11/d3d11_sampler.cpp @@ -9,63 +9,58 @@ namespace dxvk { const D3D11_SAMPLER_DESC& desc) : D3D11StateObject(device), m_desc(desc), m_d3d10(this) { - DxvkSamplerCreateInfo info; - + DxvkSamplerKey info = { }; + // While D3D11_FILTER is technically an enum, its value bits // can be used to decode the filter properties more efficiently. const uint32_t filterBits = uint32_t(desc.Filter); - info.magFilter = (filterBits & 0x04) ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; - info.minFilter = (filterBits & 0x10) ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; - + + VkFilter minFilter = (filterBits & 0x10) ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + VkFilter magFilter = (filterBits & 0x04) ? VK_FILTER_LINEAR : VK_FILTER_NEAREST; + + info.setFilter(minFilter, magFilter, + (filterBits & 0x01) ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST); + + // Enforce LOD bias specified in the device options + float lodBias = desc.MipLODBias; + + if (minFilter == VK_FILTER_LINEAR && magFilter == VK_FILTER_LINEAR) { + lodBias += device->GetOptions()->samplerLodBias; + + if (device->GetOptions()->clampNegativeLodBias) + lodBias = std::max(lodBias, 0.0f); + } + + info.setLodRange(desc.MinLOD, desc.MaxLOD, lodBias); + + // Enforce anisotropy specified in the device options + uint32_t anisotropy = (filterBits & 0x40) ? desc.MaxAnisotropy : 0u; + int32_t samplerAnisotropyOption = device->GetOptions()->samplerAnisotropy; + + if (samplerAnisotropyOption >= 0 && minFilter == VK_FILTER_LINEAR) + anisotropy = samplerAnisotropyOption > 0; + + info.setAniso(anisotropy); + // Set up the remaining properties, which are // stored directly in the sampler description - info.mipmapMode = (filterBits & 0x01) ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST; - info.mipmapLodBias = desc.MipLODBias; - info.mipmapLodMin = desc.MinLOD; - info.mipmapLodMax = desc.MaxLOD; - - info.useAnisotropy = (filterBits & 0x40) ? VK_TRUE : VK_FALSE; - info.maxAnisotropy = float(desc.MaxAnisotropy); - - info.addressModeU = DecodeAddressMode(desc.AddressU); - info.addressModeV = DecodeAddressMode(desc.AddressV); - info.addressModeW = DecodeAddressMode(desc.AddressW); - - info.compareToDepth = (filterBits & 0x180) == 0x80 ? VK_TRUE : VK_FALSE; - info.compareOp = DecodeCompareOp(desc.ComparisonFunc); + info.setAddressModes( + DecodeAddressMode(desc.AddressU), + DecodeAddressMode(desc.AddressV), + DecodeAddressMode(desc.AddressW)); - info.reductionMode = DecodeReductionMode(filterBits); + info.setDepthCompare((filterBits & 0x180) == 0x80, + DecodeCompareOp(desc.ComparisonFunc)); + + info.setReduction(DecodeReductionMode(filterBits)); for (uint32_t i = 0; i < 4; i++) info.borderColor.float32[i] = desc.BorderColor[i]; - - info.usePixelCoord = VK_FALSE; // Not supported in D3D11 - info.nonSeamless = VK_FALSE; - - // Make sure to use a valid anisotropy value - if (desc.MaxAnisotropy < 1) info.maxAnisotropy = 1.0f; - if (desc.MaxAnisotropy > 16) info.maxAnisotropy = 16.0f; - - // Enforce LOD bias specified in the device options - if (info.minFilter == VK_FILTER_LINEAR && info.magFilter == VK_FILTER_LINEAR) { - info.mipmapLodBias += device->GetOptions()->samplerLodBias; - - if (device->GetOptions()->clampNegativeLodBias) - info.mipmapLodBias = std::max(info.mipmapLodBias, 0.0f); - } - - // Enforce anisotropy specified in the device options - int32_t samplerAnisotropyOption = device->GetOptions()->samplerAnisotropy; - - if (samplerAnisotropyOption >= 0 && info.minFilter == VK_FILTER_LINEAR) { - info.useAnisotropy = samplerAnisotropyOption > 0; - info.maxAnisotropy = float(samplerAnisotropyOption); - } m_sampler = device->GetDXVKDevice()->createSampler(info); } - - + + D3D11SamplerState::~D3D11SamplerState() { } diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 4fa7b8c9..69260393 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -6690,42 +6690,37 @@ namespace dxvk { auto mipFilter = DecodeMipFilter(cKey.MipFilter); - DxvkSamplerCreateInfo info; - info.addressModeU = DecodeAddressMode(cKey.AddressU); - info.addressModeV = DecodeAddressMode(cKey.AddressV); - info.addressModeW = DecodeAddressMode(cKey.AddressW); - info.compareToDepth = cKey.Depth; - info.compareOp = cKey.Depth ? VK_COMPARE_OP_LESS_OR_EQUAL : VK_COMPARE_OP_NEVER; - info.magFilter = DecodeFilter(cKey.MagFilter); - info.minFilter = DecodeFilter(cKey.MinFilter); - info.mipmapMode = mipFilter.MipFilter; - info.maxAnisotropy = float(cKey.MaxAnisotropy); - info.useAnisotropy = cKey.MaxAnisotropy > 1; + DxvkSamplerKey info = { }; + + info.setFilter( + DecodeFilter(cKey.MinFilter), + DecodeFilter(cKey.MagFilter), + mipFilter.MipFilter); + + info.setAddressModes( + DecodeAddressMode(cKey.AddressU), + DecodeAddressMode(cKey.AddressV), + DecodeAddressMode(cKey.AddressW)); + + info.setDepthCompare(cKey.Depth, + VK_COMPARE_OP_LESS_OR_EQUAL); + + info.setAniso(cKey.MaxAnisotropy); + + float lodBias = cKey.MipmapLodBias + m_d3d9Options.samplerLodBias; - info.mipmapLodBias = cKey.MipmapLodBias + m_d3d9Options.samplerLodBias; if (m_d3d9Options.clampNegativeLodBias) - info.mipmapLodBias = std::max(info.mipmapLodBias, 0.0f); + lodBias = std::max(lodBias, 0.0f); - info.mipmapLodMin = mipFilter.MipsEnabled ? float(cKey.MaxMipLevel) : 0; - info.mipmapLodMax = mipFilter.MipsEnabled ? FLT_MAX : 0; - info.reductionMode = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; - info.usePixelCoord = VK_FALSE; - info.nonSeamless = m_dxvkDevice->features().extNonSeamlessCubeMap.nonSeamlessCubeMap && !m_d3d9Options.seamlessCubes; + info.setLodRange( + mipFilter.MipsEnabled ? float(cKey.MaxMipLevel) : 0.0f, + mipFilter.MipsEnabled ? FLT_MAX : 0.0f, + lodBias); + + info.setLegacyCubeFilter(!m_d3d9Options.seamlessCubes); DecodeD3DCOLOR(cKey.BorderColor, info.borderColor.float32); - if (!m_dxvkDevice->features().extCustomBorderColor.customBorderColorWithoutFormat) { - // HACK: Let's get OPAQUE_WHITE border color over - // TRANSPARENT_BLACK if the border RGB is white. - if (info.borderColor.float32[0] == 1.0f - && info.borderColor.float32[1] == 1.0f - && info.borderColor.float32[2] == 1.0f - && !m_dxvkDevice->features().extCustomBorderColor.customBorderColors) { - // Then set the alpha to 1. - info.borderColor.float32[3] = 1.0f; - } - } - try { auto sampler = m_dxvkDevice->createSampler(info); diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index 60c35e90..366b9abf 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -281,6 +281,10 @@ namespace dxvk { m_resources.trackResource(DxvkLifetime(rc, Access)); } + void trackSampler(const Rc& sampler) { + m_resources.trackSampler(sampler); + } + /** * \brief Tracks a GPU event * diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 04d40ed1..e34f1312 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -5260,7 +5260,7 @@ namespace dxvk { descriptorInfo.image.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; if (m_rcTracked.set(binding.resourceBinding)) - m_cmd->trackResource(res.sampler); + m_cmd->trackSampler(res.sampler); } else { descriptorInfo.image.sampler = m_common->dummyResources().samplerHandle(); descriptorInfo.image.imageView = VK_NULL_HANDLE; @@ -5312,7 +5312,7 @@ namespace dxvk { descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; if (m_rcTracked.set(binding.resourceBinding)) { - m_cmd->trackResource(res.sampler); + m_cmd->trackSampler(res.sampler); m_cmd->trackResource(res.imageView->image()); } } else { diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 8581294c..27a211aa 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -172,8 +172,8 @@ namespace dxvk { Rc DxvkDevice::createSampler( - const DxvkSamplerCreateInfo& createInfo) { - return new DxvkSampler(this, createInfo); + const DxvkSamplerKey& createInfo) { + return m_objects.samplerPool().createSampler(createInfo); } diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index 49a51972..26ea98bc 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -330,7 +330,7 @@ namespace dxvk { * \returns Newly created sampler object */ Rc createSampler( - const DxvkSamplerCreateInfo& createInfo); + const DxvkSamplerKey& createInfo); /** * \brief Creates local allocation cache diff --git a/src/dxvk/dxvk_lifetime.cpp b/src/dxvk/dxvk_lifetime.cpp index 6b43f93e..2c09ec70 100644 --- a/src/dxvk/dxvk_lifetime.cpp +++ b/src/dxvk/dxvk_lifetime.cpp @@ -9,6 +9,7 @@ namespace dxvk { void DxvkLifetimeTracker::reset() { m_resources.clear(); m_allocations.clear(); + m_samplers.clear(); } } \ No newline at end of file diff --git a/src/dxvk/dxvk_lifetime.h b/src/dxvk/dxvk_lifetime.h index 5aa903ff..cb6ff3f0 100644 --- a/src/dxvk/dxvk_lifetime.h +++ b/src/dxvk/dxvk_lifetime.h @@ -3,6 +3,7 @@ #include #include "dxvk_resource.h" +#include "dxvk_sampler.h" namespace dxvk { @@ -106,6 +107,14 @@ namespace dxvk { DxvkLifetimeTracker(); ~DxvkLifetimeTracker(); + /** + * \brief Adds a sampler to track + * \param [in] res The sampler to track + */ + void trackSampler(const Rc& res) { + m_samplers.push_back(res); + } + /** * \brief Adds a resource to track * \param [in] res The resource to track @@ -132,6 +141,8 @@ namespace dxvk { private: + std::vector> m_samplers; + std::vector> m_resources; std::vector> m_allocations; diff --git a/src/dxvk/dxvk_objects.h b/src/dxvk/dxvk_objects.h index c8179d05..91508d1a 100644 --- a/src/dxvk/dxvk_objects.h +++ b/src/dxvk/dxvk_objects.h @@ -11,6 +11,7 @@ #include "dxvk_meta_resolve.h" #include "dxvk_pipemanager.h" #include "dxvk_renderpass.h" +#include "dxvk_sampler.h" #include "dxvk_unbound.h" #include "../util/util_lazy.h" @@ -25,6 +26,7 @@ namespace dxvk { : m_device (device), m_memoryManager (device), m_pipelineManager (device), + m_samplerPool (device), m_eventPool (device), m_queryPool (device), m_dummyResources (device) { @@ -39,6 +41,10 @@ namespace dxvk { return m_pipelineManager; } + DxvkSamplerPool& samplerPool() { + return m_samplerPool; + } + DxvkGpuEventPool& eventPool() { return m_eventPool; } @@ -78,6 +84,7 @@ namespace dxvk { DxvkMemoryAllocator m_memoryManager; DxvkPipelineManager m_pipelineManager; + DxvkSamplerPool m_samplerPool; DxvkGpuEventPool m_eventPool; DxvkGpuQueryPool m_queryPool; diff --git a/src/dxvk/dxvk_sampler.cpp b/src/dxvk/dxvk_sampler.cpp index 49a35f47..7555034e 100644 --- a/src/dxvk/dxvk_sampler.cpp +++ b/src/dxvk/dxvk_sampler.cpp @@ -4,82 +4,216 @@ namespace dxvk { DxvkSampler::DxvkSampler( - DxvkDevice* device, - const DxvkSamplerCreateInfo& info) - : m_vkd(device->vkd()) { + DxvkSamplerPool* pool, + const DxvkSamplerKey& key) + : m_pool(pool), m_key(key) { + auto vk = m_pool->m_device->vkd(); + VkSamplerCustomBorderColorCreateInfoEXT borderColorInfo = { VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT }; - borderColorInfo.customBorderColor = info.borderColor; + borderColorInfo.customBorderColor = key.borderColor; VkSamplerReductionModeCreateInfo reductionInfo = { VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO }; - reductionInfo.reductionMode = info.reductionMode; + reductionInfo.reductionMode = VkSamplerReductionMode(key.u.p.reduction); VkSamplerCreateInfo samplerInfo = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO }; - samplerInfo.flags = info.nonSeamless ? VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT : 0; - samplerInfo.magFilter = info.magFilter; - samplerInfo.minFilter = info.minFilter; - samplerInfo.mipmapMode = info.mipmapMode; - samplerInfo.addressModeU = info.addressModeU; - samplerInfo.addressModeV = info.addressModeV; - samplerInfo.addressModeW = info.addressModeW; - samplerInfo.mipLodBias = info.mipmapLodBias; - samplerInfo.anisotropyEnable = info.useAnisotropy; - samplerInfo.maxAnisotropy = info.maxAnisotropy; - samplerInfo.compareEnable = info.compareToDepth; - samplerInfo.compareOp = info.compareOp; - samplerInfo.minLod = info.mipmapLodMin; - samplerInfo.maxLod = info.mipmapLodMax; - samplerInfo.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - samplerInfo.unnormalizedCoordinates = info.usePixelCoord; + samplerInfo.magFilter = VkFilter(key.u.p.magFilter); + samplerInfo.minFilter = VkFilter(key.u.p.minFilter); + samplerInfo.mipmapMode = VkSamplerMipmapMode(key.u.p.mipMode); + samplerInfo.addressModeU = VkSamplerAddressMode(key.u.p.addressU); + samplerInfo.addressModeV = VkSamplerAddressMode(key.u.p.addressV); + samplerInfo.addressModeW = VkSamplerAddressMode(key.u.p.addressW); + samplerInfo.mipLodBias = bit::decodeFixed(key.u.p.lodBias); + samplerInfo.anisotropyEnable = key.u.p.anisotropy > 0u; + samplerInfo.maxAnisotropy = float(key.u.p.anisotropy); + samplerInfo.compareEnable = key.u.p.compareEnable != 0u; + samplerInfo.compareOp = VkCompareOp(key.u.p.compareOp); + samplerInfo.minLod = bit::decodeFixed(key.u.p.minLod); + samplerInfo.maxLod = bit::decodeFixed(key.u.p.maxLod); + samplerInfo.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + samplerInfo.unnormalizedCoordinates = key.u.p.pixelCoord; - if (!device->features().core.features.samplerAnisotropy) + if (key.u.p.legacyCube && m_pool->m_device->features().extNonSeamlessCubeMap.nonSeamlessCubeMap) + samplerInfo.flags |= VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT; + + if (!m_pool->m_device->features().core.features.samplerAnisotropy) samplerInfo.anisotropyEnable = VK_FALSE; - if (samplerInfo.addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER - || samplerInfo.addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER - || samplerInfo.addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER) - samplerInfo.borderColor = getBorderColor(device, info); + if (key.u.p.hasBorder) + samplerInfo.borderColor = determineBorderColorType(); - if (samplerInfo.borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT) + if (samplerInfo.borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT + || samplerInfo.borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) borderColorInfo.pNext = std::exchange(samplerInfo.pNext, &borderColorInfo); if (reductionInfo.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE) reductionInfo.pNext = std::exchange(samplerInfo.pNext, &reductionInfo); - if (m_vkd->vkCreateSampler(m_vkd->device(), + if (vk->vkCreateSampler(vk->device(), &samplerInfo, nullptr, &m_sampler) != VK_SUCCESS) throw DxvkError("DxvkSampler::DxvkSampler: Failed to create sampler"); } - - + + DxvkSampler::~DxvkSampler() { - m_vkd->vkDestroySampler( - m_vkd->device(), m_sampler, nullptr); + auto vk = m_pool->m_device->vkd(); + + vk->vkDestroySampler(vk->device(), m_sampler, nullptr); } - VkBorderColor DxvkSampler::getBorderColor(const Rc& device, const DxvkSamplerCreateInfo& info) { - static const std::array, 3> s_borderColors = {{ + void DxvkSampler::release() { + m_pool->releaseSampler(this); + } + + + VkBorderColor DxvkSampler::determineBorderColorType() const { + static const std::array, 4> s_borderColors = {{ { { { 0.0f, 0.0f, 0.0f, 0.0f } }, VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK }, { { { 0.0f, 0.0f, 0.0f, 1.0f } }, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK }, { { { 1.0f, 1.0f, 1.0f, 1.0f } }, VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE }, }}; - // Ignore G/B/A components for shadow samplers - size_t size = !info.compareToDepth - ? sizeof(VkClearColorValue) - : sizeof(float); + // Iterate over border colors and try to find an exact match + uint32_t componentCount = m_key.u.p.compareEnable ? 1u : 4u; for (const auto& e : s_borderColors) { - if (!std::memcmp(&e.first, &info.borderColor, size)) + bool allEqual = true; + + for (uint32_t i = 0; i < componentCount; i++) + allEqual &= m_key.borderColor.float32[i] == e.first.float32[i]; + + if (allEqual) return e.second; } - if (!device->features().extCustomBorderColor.customBorderColorWithoutFormat) { - Logger::warn("DXVK: Custom border colors not supported"); - return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + // If custom border colors are supported, use that + if (m_pool->m_device->features().extCustomBorderColor.customBorderColorWithoutFormat) + return VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; + + // Otherwise, use the sum of absolute differences to find the + // closest fallback value. Some D3D9 games may rely on this. + Logger::warn("DXVK: Custom border colors not supported"); + + VkBorderColor result = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; + + float minSad = -1.0f; + + for (const auto& e : s_borderColors) { + float sad = 0.0f; + + for (uint32_t i = 0; i < componentCount; i++) + sad += std::abs(m_key.borderColor.float32[i] - e.first.float32[i]); + + if (sad < minSad || minSad < 0.0f) { + minSad = sad; + result = e.second; + } } - return VK_BORDER_COLOR_FLOAT_CUSTOM_EXT; + return result; + } + + + + + DxvkSamplerPool::DxvkSamplerPool(DxvkDevice* device) + : m_device(device) { + + } + + + DxvkSamplerPool::~DxvkSamplerPool() { + m_samplers.clear(); + } + + + Rc DxvkSamplerPool::createSampler(const DxvkSamplerKey& key) { + std::unique_lock lock(m_mutex); + auto entry = m_samplers.find(key); + + if (entry != m_samplers.end()) { + DxvkSampler* sampler = &entry->second; + + // Remove the sampler from the LRU list if it's in there. Due + // to the way releasing samplers is implemented upon reaching + // a ref count of 0, it is possible that we reach this before + // the releasing thread inserted the list into the LRU list. + if (!sampler->m_refCount.fetch_add(1u, std::memory_order_acquire)) { + if (sampler->m_lruPrev) + sampler->m_lruPrev->m_lruNext = sampler->m_lruNext; + else if (m_lruHead == sampler) + m_lruHead = sampler->m_lruNext; + + if (sampler->m_lruNext) + sampler->m_lruNext->m_lruPrev = sampler->m_lruPrev; + else if (m_lruTail == sampler) + m_lruTail = sampler->m_lruPrev; + + sampler->m_lruPrev = nullptr; + sampler->m_lruNext = nullptr; + } + + // We already took a reference, forward the pointer as-is + return Rc::unsafeCreate(sampler); + } + + // If we're spamming sampler allocations, we might need + // to clean up unused ones here to stay within the limit + if (m_samplers.size() >= MaxSamplerCount) + destroyLeastRecentlyUsedSampler(); + + // Create new sampler object + return &m_samplers.emplace(std::piecewise_construct, + std::forward_as_tuple(key), + std::forward_as_tuple(this, key)).first->second; + } + + + void DxvkSamplerPool::releaseSampler(DxvkSampler* sampler) { + std::unique_lock lock(m_mutex); + + // Back off if another thread has re-aquired the sampler. This is + // safe since the ref count can only be incremented from zero when + // the pool is locked. + if (sampler->m_refCount.load()) + return; + + // It is also possible that two threads end up here while the ref + // count is zero. Make sure to not add the sampler to the LRU list + // more than once in that case. + if (sampler->m_lruPrev || m_lruHead == sampler) + return; + + // Add sampler to the end of the LRU list + sampler->m_lruPrev = m_lruTail; + sampler->m_lruNext = nullptr; + + if (m_lruTail) + m_lruTail->m_lruNext = sampler; + else + m_lruHead = sampler; + + m_lruTail = sampler; + + // Try to keep some samplers available for subsequent allocations + if (m_samplers.size() > MinSamplerCount) + destroyLeastRecentlyUsedSampler(); + } + + + void DxvkSamplerPool::destroyLeastRecentlyUsedSampler() { + DxvkSampler* sampler = m_lruHead; + + if (sampler) { + m_lruHead = sampler->m_lruNext; + + if (m_lruHead) + m_lruHead->m_lruPrev = nullptr; + else + m_lruTail = nullptr; + + m_samplers.erase(sampler->key()); + } } } diff --git a/src/dxvk/dxvk_sampler.h b/src/dxvk/dxvk_sampler.h index 61d64a07..5ae7a475 100644 --- a/src/dxvk/dxvk_sampler.h +++ b/src/dxvk/dxvk_sampler.h @@ -1,50 +1,131 @@ #pragma once +#include + +#include "../util/util_bit.h" +#include "../util/thread.h" + #include "dxvk_resource.h" namespace dxvk { class DxvkDevice; - + class DxvkSamplerPool; + /** - * \brief Sampler properties + * \brief Sampler key + * + * Stores packed sampler properties and in a way that + * can be reasonably efficiently used with a hash map. */ - struct DxvkSamplerCreateInfo { - /// Texture filter propertoes - VkFilter magFilter; - VkFilter minFilter; - - /// Mipmapping properties - VkSamplerMipmapMode mipmapMode; - float mipmapLodBias; - float mipmapLodMin; - float mipmapLodMax; - - /// Anisotropic filtering - VkBool32 useAnisotropy; - float maxAnisotropy; - - /// Address modes - VkSamplerAddressMode addressModeU; - VkSamplerAddressMode addressModeV; - VkSamplerAddressMode addressModeW; - - /// Compare op for shadow textures - VkBool32 compareToDepth; - VkCompareOp compareOp; - - /// Reduction mode for min/max samplers - VkSamplerReductionMode reductionMode; - - /// Texture border color - VkClearColorValue borderColor; + struct DxvkSamplerKey { + union { + struct { + uint32_t minFilter : 1; + uint32_t magFilter : 1; + uint32_t mipMode : 1; + uint32_t anisotropy : 5; - /// Enables unnormalized coordinates - VkBool32 usePixelCoord; + uint32_t addressU : 3; + uint32_t addressV : 3; + uint32_t addressW : 3; + uint32_t hasBorder : 1; + + uint32_t lodBias : 14; + + uint32_t minLod : 12; + uint32_t maxLod : 12; + + uint32_t compareEnable : 1; + uint32_t compareOp : 3; + uint32_t reduction : 2; + uint32_t pixelCoord : 1; + uint32_t legacyCube : 1; + } p; + + uint32_t properties[2] = { 0u, 0u }; + } u; + + VkClearColorValue borderColor = { }; + + void setFilter(VkFilter min, VkFilter mag, VkSamplerMipmapMode mip) { + u.p.minFilter = uint32_t(min); + u.p.magFilter = uint32_t(mag); + u.p.mipMode = uint32_t(mip); + } + + void setAniso(uint32_t anisotropy) { + u.p.anisotropy = std::min(anisotropy, 16u); + } + + void setDepthCompare(bool enable, VkCompareOp op) { + u.p.compareEnable = uint32_t(enable); + u.p.compareOp = enable ? uint32_t(op) : 0u; + } + + void setReduction(VkSamplerReductionMode reduction) { + u.p.reduction = uint32_t(reduction); + } + + void setUsePixelCoordinates(bool enable) { + u.p.pixelCoord = uint32_t(enable); + } + + void setLegacyCubeFilter(bool enable) { + u.p.legacyCube = uint32_t(enable); + } + + void setAddressModes(VkSamplerAddressMode u_, VkSamplerAddressMode v_, VkSamplerAddressMode w_) { + u.p.addressU = u_; + u.p.addressV = v_; + u.p.addressW = w_; + u.p.hasBorder = uint32_t(u_ == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER + || v_ == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER + || w_ == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); + } + + void setLodRange(float min, float max, float bias) { + u.p.minLod = bit::encodeFixed(min); + u.p.maxLod = bit::encodeFixed(max); + u.p.lodBias = bit::encodeFixed(bias); + } + + void setBorderColor(VkClearColorValue color) { + borderColor = color; + } + + bool eq(const DxvkSamplerKey& other) const { + bool eq = u.properties[0] == other.u.properties[0] + && u.properties[1] == other.u.properties[1]; + + if (eq && u.p.hasBorder) { + eq = borderColor.uint32[0] == other.borderColor.uint32[0] + && borderColor.uint32[1] == other.borderColor.uint32[1] + && borderColor.uint32[2] == other.borderColor.uint32[2] + && borderColor.uint32[3] == other.borderColor.uint32[3]; + } + + return eq; + } + + size_t hash() const { + DxvkHashState hash; + hash.add(u.properties[0]); + hash.add(u.properties[1]); + + if (u.p.hasBorder) { + hash.add(borderColor.uint32[0]); + hash.add(borderColor.uint32[1]); + hash.add(borderColor.uint32[2]); + hash.add(borderColor.uint32[3]); + } + + return hash; + } - /// Enables non seamless cube map filtering - VkBool32 nonSeamless; }; + + static_assert(sizeof(DxvkSamplerKey) == 24u); /** @@ -54,15 +135,33 @@ namespace dxvk { * a pipeline. Sampler objects provide parameters * for texture lookups within a shader. */ - class DxvkSampler : public DxvkResource { - + class DxvkSampler { + friend class DxvkSamplerPool; public: DxvkSampler( - DxvkDevice* device, - const DxvkSamplerCreateInfo& info); + DxvkSamplerPool* pool, + const DxvkSamplerKey& key); + ~DxvkSampler(); - + + /** + * \brief Increments reference count + */ + force_inline void incRef() { + m_refCount.fetch_add(1u, std::memory_order_acquire); + } + + /** + * \brief Decrements reference count + * + * Recycles the sampler once the ref count reaches zero. + */ + force_inline void decRef() { + if (m_refCount.fetch_sub(1u, std::memory_order_relaxed) == 1u) + release(); + } + /** * \brief Sampler handle * \returns Sampler handle @@ -71,15 +170,77 @@ namespace dxvk { return m_sampler; } + /** + * \brief Sampler key + * \returns Sampler properties + */ + const DxvkSamplerKey& key() const { + return m_key; + } + private: - Rc m_vkd; - VkSampler m_sampler = VK_NULL_HANDLE; + std::atomic m_refCount = { 0u }; + + DxvkSamplerPool* m_pool = nullptr; + DxvkSamplerKey m_key = { }; + + VkSampler m_sampler = VK_NULL_HANDLE; + + DxvkSampler* m_lruPrev = nullptr; + DxvkSampler* m_lruNext = nullptr; + + void release(); + + VkBorderColor determineBorderColorType() const; - static VkBorderColor getBorderColor( - const Rc& device, - const DxvkSamplerCreateInfo& info); - }; + + /** + * \brief Sampler pool + * + * Manages unique samplers within a device. + */ + class DxvkSamplerPool { + friend DxvkSampler; + public: + + // The Vulkan limit for samplers is at least 4000. + // Keep some objects available for internal use. + constexpr static uint32_t MaxSamplerCount = 3584u; + + // Minimum number of samplers to keep alive. + constexpr static uint32_t MinSamplerCount = 1024u; + + DxvkSamplerPool(DxvkDevice* device); + + ~DxvkSamplerPool(); + + /** + * \brief Creates sampler + * + * \param [in] key Sampler key + * \returns Sampler object + */ + Rc createSampler(const DxvkSamplerKey& key); + + private: + + DxvkDevice* m_device; + + dxvk::mutex m_mutex; + std::unordered_map m_samplers; + + DxvkSampler* m_lruHead = nullptr; + DxvkSampler* m_lruTail = nullptr; + + void releaseSampler(DxvkSampler* sampler); + + void destroyLeastRecentlyUsedSampler(); + + }; + + } diff --git a/src/dxvk/dxvk_swapchain_blitter.cpp b/src/dxvk/dxvk_swapchain_blitter.cpp index fc5b0dfb..4d57e47e 100644 --- a/src/dxvk/dxvk_swapchain_blitter.cpp +++ b/src/dxvk/dxvk_swapchain_blitter.cpp @@ -286,30 +286,23 @@ namespace dxvk { void DxvkSwapchainBlitter::createSampler() { - DxvkSamplerCreateInfo samplerInfo; - samplerInfo.magFilter = VK_FILTER_LINEAR; - samplerInfo.minFilter = VK_FILTER_LINEAR; - samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - samplerInfo.mipmapLodBias = 0.0f; - samplerInfo.mipmapLodMin = 0.0f; - samplerInfo.mipmapLodMax = 0.0f; - samplerInfo.useAnisotropy = VK_FALSE; - samplerInfo.maxAnisotropy = 1.0f; - samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerInfo.compareToDepth = VK_FALSE; - samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS; - samplerInfo.reductionMode = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; - samplerInfo.borderColor = VkClearColorValue(); - samplerInfo.usePixelCoord = VK_TRUE; - samplerInfo.nonSeamless = VK_FALSE; + DxvkSamplerKey samplerInfo = { }; + samplerInfo.setFilter(VK_FILTER_LINEAR, VK_FILTER_LINEAR, + VK_SAMPLER_MIPMAP_MODE_NEAREST); + samplerInfo.setAddressModes( + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER); + samplerInfo.setUsePixelCoordinates(true); + m_samplerPresent = m_device->createSampler(samplerInfo); - samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - samplerInfo.usePixelCoord = VK_FALSE; + samplerInfo.setAddressModes( + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + samplerInfo.setUsePixelCoordinates(false); + m_samplerGamma = m_device->createSampler(samplerInfo); } diff --git a/src/dxvk/dxvk_unbound.cpp b/src/dxvk/dxvk_unbound.cpp index fd2af2f3..eed31c01 100644 --- a/src/dxvk/dxvk_unbound.cpp +++ b/src/dxvk/dxvk_unbound.cpp @@ -54,25 +54,15 @@ namespace dxvk { Rc DxvkUnboundResources::createSampler() { - DxvkSamplerCreateInfo info; - info.minFilter = VK_FILTER_LINEAR; - info.magFilter = VK_FILTER_LINEAR; - info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - info.mipmapLodBias = 0.0f; - info.mipmapLodMin = -256.0f; - info.mipmapLodMax = 256.0f; - info.useAnisotropy = VK_FALSE; - info.maxAnisotropy = 1.0f; - info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.compareToDepth = VK_FALSE; - info.compareOp = VK_COMPARE_OP_NEVER; - info.reductionMode = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; - info.borderColor = VkClearColorValue(); - info.usePixelCoord = VK_FALSE; - info.nonSeamless = VK_FALSE; - + DxvkSamplerKey info; + info.setFilter(VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR); + info.setLodRange(-256.0f, 256.0f, 0.0f); + info.setAddressModes( + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + info.setReduction(VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE); + return m_device->createSampler(info); } diff --git a/src/dxvk/hud/dxvk_hud_renderer.cpp b/src/dxvk/hud/dxvk_hud_renderer.cpp index 50bc6590..f34e97d9 100644 --- a/src/dxvk/hud/dxvk_hud_renderer.cpp +++ b/src/dxvk/hud/dxvk_hud_renderer.cpp @@ -321,24 +321,14 @@ namespace dxvk::hud { Rc HudRenderer::createFontSampler() { - DxvkSamplerCreateInfo info; - info.magFilter = VK_FILTER_LINEAR; - info.minFilter = VK_FILTER_LINEAR; - info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - info.mipmapLodBias = 0.0f; - info.mipmapLodMin = 0.0f; - info.mipmapLodMax = 0.0f; - info.useAnisotropy = VK_FALSE; - info.maxAnisotropy = 1.0f; - info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.compareToDepth = VK_FALSE; - info.compareOp = VK_COMPARE_OP_NEVER; - info.reductionMode = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE; - info.borderColor = VkClearColorValue(); - info.usePixelCoord = VK_TRUE; - info.nonSeamless = VK_FALSE; + DxvkSamplerKey info = { }; + info.setFilter(VK_FILTER_LINEAR, + VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST); + info.setAddressModes( + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + info.setUsePixelCoordinates(true); return m_device->createSampler(info); } diff --git a/src/util/rc/util_rc_ptr.h b/src/util/rc/util_rc_ptr.h index f475c8f8..29ef274a 100644 --- a/src/util/rc/util_rc_ptr.h +++ b/src/util/rc/util_rc_ptr.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -107,19 +109,47 @@ namespace dxvk { return m_object != nullptr; } + /** + * \brief Sets pointer without acquiring a reference + * + * Must only be use when a reference has been taken via + * other means. + * \param [in] object Object pointer + */ void unsafeInsert(T* object) { this->decRef(); m_object = object; } + /** + * \brief Extracts raw pointer + * + * Sets the smart pointer to null without decrementing the + * reference count. Must only be used when the reference + * count is decremented in some other way. + * \returns Pointer to owned object + */ T* unsafeExtract() { return std::exchange(m_object, nullptr); } + /** + * \brief Creates smart pointer without taking reference + * + * Must only be used when a refernece has been obtained via other means. + * \param [in] object Pointer to object to take ownership of + */ + static Rc unsafeCreate(T* object) { + return Rc(object, false); + } + private: T* m_object = nullptr; + explicit Rc(T* object, bool) + : m_object(object) { } + force_inline void incRef() const { if (m_object != nullptr) m_object->incRef(); @@ -145,6 +175,13 @@ namespace dxvk { template bool operator != (Tx* a, const Rc& b) { return b != a; } + struct RcHash { + template + size_t operator () (const Rc& rc) const { + return reinterpret_cast(rc.ptr()) / sizeof(T); + } + }; + } template