diff --git a/src/dxvk/dxvk_buffer.h b/src/dxvk/dxvk_buffer.h index 79969e3dc..5be696d6e 100644 --- a/src/dxvk/dxvk_buffer.h +++ b/src/dxvk/dxvk_buffer.h @@ -329,16 +329,28 @@ namespace dxvk { /** * \brief Allocates new buffer slice - * \returns The new buffer slice + * \returns The new backing resource */ Rc allocateSlice() { + return allocateSlice(nullptr); + } + + /** + * \brief Allocates new buffer slice with cache + * + * Uses the given cache to service small allocations without + * having to block the actual allocator if possible. + * \param [in] cache Optional allocation cache + * \returns The new buffer slice + */ + Rc allocateSlice(DxvkLocalAllocationCache* cache) { VkBufferCreateInfo info = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; info.flags = m_info.flags; info.usage = m_info.usage; info.size = m_info.size; info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - return m_allocator->createBufferResource(info, m_properties); + return m_allocator->createBufferResource(info, m_properties, cache); } /** diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 151c44572..d9e7c5daf 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -182,8 +182,15 @@ namespace dxvk { const DxvkSamplerCreateInfo& createInfo) { return new DxvkSampler(this, createInfo); } - - + + + DxvkLocalAllocationCache DxvkDevice::createAllocationCache( + VkBufferUsageFlags bufferUsage, + VkMemoryPropertyFlags propertyFlags) { + return m_objects.memoryManager().createAllocationCache(bufferUsage, propertyFlags); + } + + Rc DxvkDevice::createSparsePageAllocator() { return new DxvkSparsePageAllocator(m_objects.memoryManager()); } diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index fa78942f8..cd4a08d20 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -343,6 +343,17 @@ namespace dxvk { Rc createSampler( const DxvkSamplerCreateInfo& createInfo); + /** + * \brief Creates local allocation cache + * + * \param [in] bufferUsage Required buffer usage + * \param [in] propertyFlags Memory properties + * \returns Allocation cache object + */ + DxvkLocalAllocationCache createAllocationCache( + VkBufferUsageFlags bufferUsage, + VkMemoryPropertyFlags propertyFlags); + /** * \brief Creates a sparse page allocator * \returns Sparse page allocator diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index b94894954..08f3c3db4 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -200,6 +200,57 @@ namespace dxvk { + DxvkResourceAllocation* DxvkLocalAllocationCache::allocateFromCache( + VkDeviceSize size) { + uint32_t poolIndex = computePoolIndex(size); + DxvkResourceAllocation* allocation = m_pools[poolIndex]; + + if (!allocation) + return nullptr; + + m_pools[poolIndex] = allocation->m_next; + allocation->m_next = nullptr; + return allocation; + } + + + DxvkResourceAllocation* DxvkLocalAllocationCache::assignCache( + VkDeviceSize size, + DxvkResourceAllocation* allocation) { + uint32_t poolIndex = computePoolIndex(size); + return std::exchange(m_pools[poolIndex], allocation); + } + + + void DxvkLocalAllocationCache::freeCache() { + if (m_allocator) + m_allocator->freeLocalCache(this); + } + + + uint32_t DxvkLocalAllocationCache::computePreferredAllocationCount( + VkDeviceSize size) { + uint32_t poolIndex = computePoolIndex(size); + uint32_t count = (DxvkPageAllocator::PageSize / MinSize) >> poolIndex; + + return std::max(count, MinAllocationCountPerPool); + } + + + uint32_t DxvkLocalAllocationCache::computePoolIndex( + VkDeviceSize size) { + return 64u - bit::lzcnt((std::max(size, MinSize) - 1u) / MinSize); + } + + + VkDeviceSize DxvkLocalAllocationCache::computeAllocationSize( + uint32_t index) { + return MinSize << index; + } + + + + DxvkMemoryAllocator::DxvkMemoryAllocator(DxvkDevice* device) : m_device(device) { VkPhysicalDeviceMemoryProperties memInfo = device->adapter()->memoryProperties(); @@ -418,7 +469,8 @@ namespace dxvk { Rc DxvkMemoryAllocator::createBufferResource( const VkBufferCreateInfo& createInfo, - VkMemoryPropertyFlags properties) { + VkMemoryPropertyFlags properties, + DxvkLocalAllocationCache* allocationCache) { Rc allocation; if (likely(!createInfo.flags && createInfo.sharingMode == VK_SHARING_MODE_EXCLUSIVE)) { @@ -430,10 +482,26 @@ namespace dxvk { if (unlikely(createInfo.usage & ~m_globalBufferUsageFlags)) memoryRequirements.memoryTypeBits = findGlobalBufferMemoryTypeMask(createInfo.usage); - // If there is at least one memory type that supports the required - // buffer usage flags and requested memory properties, suballocate - // from a global buffer. if (likely(memoryRequirements.memoryTypeBits)) { + // If the given allocation cache supports the memory types and usage + // flags that we need, try to use it to service this allocation. + if (allocationCache && createInfo.size <= DxvkLocalAllocationCache::MaxSize + && allocationCache->m_memoryTypes && !(allocationCache->m_memoryTypes & ~memoryRequirements.memoryTypeBits)) { + allocation = allocationCache->allocateFromCache(createInfo.size); + + if (likely(allocation)) + return allocation; + + // If the cache is currently empty for the required allocation size, + // make sure it's not. This will also initialize the shared caches + // for any relevant memory pools as necessary. + if (refillAllocationCache(allocationCache, memoryRequirements, properties)) + return allocationCache->allocateFromCache(createInfo.size); + } + + // If there is at least one memory type that supports the required + // buffer usage flags and requested memory properties, suballocate + // from a global buffer. allocation = allocateMemory(memoryRequirements, properties); if (likely(allocation && allocation->m_buffer)) @@ -630,6 +698,19 @@ namespace dxvk { } + DxvkLocalAllocationCache DxvkMemoryAllocator::createAllocationCache( + VkBufferUsageFlags bufferUsage, + VkMemoryPropertyFlags properties) { + uint32_t memoryTypeMask = m_globalBufferMemoryTypes; + + if (bufferUsage & ~m_globalBufferUsageFlags) + memoryTypeMask = findGlobalBufferMemoryTypeMask(bufferUsage); + + memoryTypeMask &= getMemoryTypeMask(properties); + return DxvkLocalAllocationCache(this, memoryTypeMask); + } + + DxvkDeviceMemory DxvkMemoryAllocator::allocateDeviceMemory( DxvkMemoryType& type, VkDeviceSize size, @@ -775,7 +856,7 @@ namespace dxvk { } - Rc DxvkMemoryAllocator::createAllocation( + DxvkResourceAllocation* DxvkMemoryAllocator::createAllocation( DxvkMemoryType& type, DxvkMemoryPool& pool, VkDeviceSize address, @@ -808,7 +889,7 @@ namespace dxvk { } - Rc DxvkMemoryAllocator::createAllocation( + DxvkResourceAllocation* DxvkMemoryAllocator::createAllocation( DxvkMemoryType& type, const DxvkDeviceMemory& memory) { type.stats.memoryUsed += memory.size; @@ -865,6 +946,34 @@ namespace dxvk { } + void DxvkMemoryAllocator::freeLocalCache( + DxvkLocalAllocationCache* cache) { + std::unique_lock lock(m_mutex); + + for (size_t i = 0; i < cache->m_pools.size(); i++) + freeCachedAllocationsLocked(std::exchange(cache->m_pools[i], nullptr)); + } + + + void DxvkMemoryAllocator::freeCachedAllocationsLocked( + DxvkResourceAllocation* allocation) { + while (allocation) { + auto& pool = allocation->m_mapPtr + ? allocation->m_type->mappedPool + : allocation->m_type->devicePool; + + // Cached allocations may have a reference count of 0, but they + // still own the memory, so make sure to release it here. + allocation->m_type->stats.memoryUsed -= allocation->m_size; + + if (unlikely(pool.free(allocation->m_address, allocation->m_size))) + freeEmptyChunksInPool(*allocation->m_type, pool, 0, high_resolution_clock::now()); + + m_allocationPool.free(std::exchange(allocation, allocation->m_next)); + } + } + + void DxvkMemoryAllocator::freeEmptyChunksInHeap( const DxvkMemoryHeap& heap, VkDeviceSize allocationSize, @@ -995,6 +1104,68 @@ namespace dxvk { } + bool DxvkMemoryAllocator::refillAllocationCache( + DxvkLocalAllocationCache* cache, + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties) { + VkDeviceSize allocationSize = (VkDeviceSize(-1) >> bit::lzcnt(requirements.size - 1u)) + 1u; + allocationSize = std::max(allocationSize, DxvkLocalAllocationCache::MinSize); + + // TODO implement shared caches per memory pool + + // No suitable allocations available from the shared cache, create some + // new ones so that subsequent allocations of this size category can be + // handled without locking the allocator. + uint32_t allocationCount = DxvkLocalAllocationCache::computePreferredAllocationCount(allocationSize); + + DxvkResourceAllocation* head = nullptr; + DxvkResourceAllocation* tail = nullptr; + + std::unique_lock lock(m_mutex); + + for (auto typeIndex : bit::BitMask(cache->m_memoryTypes)) { + auto& pool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + ? m_memTypes[typeIndex].mappedPool + : m_memTypes[typeIndex].devicePool; + + while (allocationCount) { + // Try to suballocate from existing chunks, but do not create + // any new chunks. Let the regular code path handle that case + // as necessary. + int64_t address = pool.alloc(allocationSize, requirements.alignment); + + if (address < 0) + break; + + // Add allocation to the list and mark it as cacheable, + // so it will get recycled as-is after use. + DxvkResourceAllocation* allocation = createAllocation( + m_memTypes[typeIndex], pool, address, allocationSize); + allocation->m_flags.set(DxvkAllocationFlag::Cacheable); + + if (tail) { + tail->m_next = allocation; + tail = allocation; + } else { + head = allocation; + tail = allocation; + } + + allocationCount--; + } + + if (!allocationCount) + break; + } + + if (!tail) + return false; + + tail->m_next = cache->assignCache(allocationSize, head); + return true; + } + + void DxvkMemoryAllocator::getAllocationStatsForPool( const DxvkMemoryType& type, const DxvkMemoryPool& pool, diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index d92813f75..1ce68e278 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -420,6 +420,7 @@ namespace dxvk { OwnsMemory = 0, OwnsBuffer = 1, OwnsImage = 2, + Cacheable = 3, }; using DxvkAllocationFlags = Flags; @@ -434,6 +435,8 @@ namespace dxvk { */ class alignas(CACHE_LINE_SIZE) DxvkResourceAllocation { friend DxvkMemoryAllocator; + friend class DxvkLocalAllocationCache; + friend class DxvkSharedAllocationCache; friend class DxvkMemory; public: @@ -579,6 +582,8 @@ namespace dxvk { DxvkMemoryAllocator* m_allocator = nullptr; DxvkMemoryType* m_type = nullptr; + DxvkResourceAllocation* m_next = nullptr; + void free(); static force_inline uint64_t getIncrement(DxvkAccess access) { @@ -604,7 +609,7 @@ namespace dxvk { ~DxvkResourceAllocationPool(); template - Rc create(Args&&... args) { + DxvkResourceAllocation* create(Args&&... args) { return new (alloc()) DxvkResourceAllocation(std::forward(args)...); } @@ -752,6 +757,87 @@ namespace dxvk { }; + /** + * \brief Local allocation cache + * + * Provides pre-allocated memory of supported power-of-two sizes + * in a non-thread safe manner. This is intended to be used for + * context classes in order to reduce lock contention. + */ + class DxvkLocalAllocationCache { + friend class DxvkMemoryAllocator; + public: + constexpr static uint32_t PoolCount = 8u; + constexpr static uint32_t MinAllocationCountPerPool = 8u; + + constexpr static VkDeviceSize MinSize = DxvkPoolAllocator::MinSize; + constexpr static VkDeviceSize MaxSize = MinSize << (PoolCount - 1u); + + DxvkLocalAllocationCache() = default; + + DxvkLocalAllocationCache( + DxvkMemoryAllocator* allocator, + uint32_t memoryTypes) + : m_allocator(allocator), m_memoryTypes(memoryTypes) { } + + DxvkLocalAllocationCache(DxvkLocalAllocationCache&& other) + : m_allocator(other.m_allocator), m_memoryTypes(other.m_memoryTypes), + m_pools(other.m_pools) { + other.m_allocator = nullptr; + other.m_memoryTypes = 0u; + other.m_pools = { }; + } + + DxvkLocalAllocationCache& operator = (DxvkLocalAllocationCache&& other) { + freeCache(); + + m_allocator = other.m_allocator; + m_memoryTypes = other.m_memoryTypes; + m_pools = other.m_pools; + + other.m_allocator = nullptr; + other.m_memoryTypes = 0u; + other.m_pools = { }; + return *this; + } + + ~DxvkLocalAllocationCache() { + freeCache(); + } + + /** + * \brief Computes preferred number of cached allocations + * + * Depends on size so that a large enough number of consecutive + * allocations can be handled by the local cache without wasting + * too much memory on larger allocations. + * \param [in] size Allocation size + */ + static uint32_t computePreferredAllocationCount( + VkDeviceSize size); + + private: + + DxvkMemoryAllocator* m_allocator = nullptr; + uint32_t m_memoryTypes = 0u; + + std::array m_pools = { }; + + DxvkResourceAllocation* allocateFromCache( + VkDeviceSize size); + + DxvkResourceAllocation* assignCache( + VkDeviceSize size, + DxvkResourceAllocation* allocation); + + void freeCache(); + + static uint32_t computePoolIndex( + VkDeviceSize size); + + }; + + /** * \brief Memory allocator * @@ -761,6 +847,7 @@ namespace dxvk { class DxvkMemoryAllocator { friend DxvkMemory; friend DxvkResourceAllocation; + friend DxvkLocalAllocationCache; constexpr static uint64_t DedicatedChunkAddress = 1ull << 63u; @@ -836,11 +923,13 @@ namespace dxvk { * may fall back to creating a dedicated Vulkan buffer. * \param [in] createInfo Buffer create info * \param [in] properties Memory property flags + * \param [in] allocationCache Optional allocation cache * \returns Buffer resource */ Rc createBufferResource( const VkBufferCreateInfo& createInfo, - VkMemoryPropertyFlags properties); + VkMemoryPropertyFlags properties, + DxvkLocalAllocationCache* allocationCache); /** * \brief Creates image resource @@ -855,6 +944,17 @@ namespace dxvk { VkMemoryPropertyFlags properties, const void* next); + /** + * \brief Creates local allocation cache for buffer resources + * + * \param [in] bufferUsage Required buffer usage flags + * \param [in] properties Required memory properties + * \returns Local allocation cache + */ + DxvkLocalAllocationCache createAllocationCache( + VkBufferUsageFlags bufferUsage, + VkMemoryPropertyFlags properties); + /** * \brief Queries memory stats * @@ -942,6 +1042,12 @@ namespace dxvk { void freeAllocation( DxvkResourceAllocation* allocation); + void freeLocalCache( + DxvkLocalAllocationCache* cache); + + void freeCachedAllocationsLocked( + DxvkResourceAllocation* allocation); + uint32_t countEmptyChunksInPool( const DxvkMemoryPool& pool) const; @@ -965,16 +1071,21 @@ namespace dxvk { DxvkDeviceMemory& memory, VkMemoryPropertyFlags properties); - Rc createAllocation( + DxvkResourceAllocation* createAllocation( DxvkMemoryType& type, DxvkMemoryPool& pool, VkDeviceSize address, VkDeviceSize size); - Rc createAllocation( + DxvkResourceAllocation* createAllocation( DxvkMemoryType& type, const DxvkDeviceMemory& memory); + bool refillAllocationCache( + DxvkLocalAllocationCache* cache, + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties); + void getAllocationStatsForPool( const DxvkMemoryType& type, const DxvkMemoryPool& pool,