From 27088beea8be959484c0c2528bb20ef33a22f673 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 21 Sep 2024 10:44:48 +0200 Subject: [PATCH] [dxvk] Use worker thread to periodically free unused memory System memory allocations typically peak very high while loading, but just sit there unused afterwards. This allows us to free them based on when they have last been used. Works well in practice since best-fit avoids using empty chunks as much as possible. --- src/dxvk/dxvk_memory.cpp | 117 ++++++++++++++++++++++++++++----------- src/dxvk/dxvk_memory.h | 32 +++++++++-- 2 files changed, 112 insertions(+), 37 deletions(-) diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index 5836451da..ce5919e9d 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -95,14 +95,24 @@ namespace dxvk { m_sparseMemoryTypes = determineSparseMemoryTypes(device); determineBufferUsageFlagsPerMemoryType(); + + // Start worker after setting up everything else + m_worker = dxvk::thread([this] { runWorker(); }); } DxvkMemoryAllocator::~DxvkMemoryAllocator() { auto vk = m_device->vkd(); + { std::unique_lock lock(m_mutex); + m_stopWorker = true; + m_cond.notify_one(); + } + + m_worker.join(); + for (uint32_t i = 0; i < m_memHeapCount; i++) - freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1)); + freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1), high_resolution_clock::time_point()); } @@ -180,14 +190,14 @@ namespace dxvk { size, selectedPool.maxChunkSize); if (freeChunkIndex >= 0) { - uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].size); + uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].memory.size); selectedPool.chunks.resize(std::max(selectedPool.chunks.size(), poolChunkIndex + 1u)); selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex]; oppositePool.pageAllocator.removeChunk(freeChunkIndex); - oppositePool.chunks[freeChunkIndex] = DxvkDeviceMemory(); + oppositePool.chunks[freeChunkIndex] = DxvkMemoryChunk(); - mapDeviceMemory(selectedPool.chunks[poolChunkIndex], properties); + mapDeviceMemory(selectedPool.chunks[poolChunkIndex].memory, properties); address = selectedPool.alloc(size, requirements.alignment); @@ -261,7 +271,7 @@ namespace dxvk { auto vk = m_device->vkd(); // Preemptively free some unused allocations to reduce memory waste - freeEmptyChunksInHeap(*type.heap, size); + freeEmptyChunksInHeap(*type.heap, size, high_resolution_clock::now()); VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next }; memoryInfo.allocationSize = size; @@ -303,7 +313,7 @@ namespace dxvk { result.size = size; if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) { - freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1)); + freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1), high_resolution_clock::time_point()); if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) return DxvkDeviceMemory(); @@ -385,7 +395,8 @@ namespace dxvk { uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size); pool.chunks.resize(std::max(pool.chunks.size(), chunkIndex + 1u)); - pool.chunks[chunkIndex] = chunk; + pool.chunks[chunkIndex].memory = chunk; + pool.chunks[chunkIndex].unusedTime = high_resolution_clock::time_point(); return true; } @@ -400,13 +411,16 @@ namespace dxvk { m_device->notifyMemoryUse(type.properties.heapIndex, size); uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits; - const auto& chunk = pool.chunks[chunkIndex]; - void* mapPtr = chunk.mapPtr - ? reinterpret_cast(chunk.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask) + auto& chunk = pool.chunks[chunkIndex]; + chunk.unusedTime = high_resolution_clock::time_point(); + + void* mapPtr = chunk.memory.mapPtr + ? reinterpret_cast(chunk.memory.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask) : nullptr; - return DxvkMemory(this, &type, chunk.buffer, chunk.memory, address, size, mapPtr); + return DxvkMemory(this, &type, chunk.memory.buffer, + chunk.memory.memory, address, size, mapPtr); } @@ -443,7 +457,7 @@ namespace dxvk { : memory.m_type->devicePool; if (unlikely(pool.free(memory.m_address, memory.m_length))) - freeEmptyChunksInPool(*memory.m_type, pool, 0); + freeEmptyChunksInPool(*memory.m_type, pool, 0, high_resolution_clock::now()); } } @@ -462,12 +476,13 @@ namespace dxvk { void DxvkMemoryAllocator::freeEmptyChunksInHeap( const DxvkMemoryHeap& heap, - VkDeviceSize allocationSize) { + VkDeviceSize allocationSize, + high_resolution_clock::time_point time) { for (auto typeIndex : bit::BitMask(heap.memoryTypes)) { auto& type = m_memTypes[typeIndex]; - freeEmptyChunksInPool(type, type.devicePool, allocationSize); - freeEmptyChunksInPool(type, type.mappedPool, allocationSize); + freeEmptyChunksInPool(type, type.devicePool, allocationSize, time); + freeEmptyChunksInPool(type, type.mappedPool, allocationSize, time); } } @@ -475,7 +490,8 @@ namespace dxvk { void DxvkMemoryAllocator::freeEmptyChunksInPool( DxvkMemoryType& type, DxvkMemoryPool& pool, - VkDeviceSize allocationSize) { + VkDeviceSize allocationSize, + high_resolution_clock::time_point time) { // Allow for one unused max-size chunk on device-local memory types. // For system memory allocations, we need to be more lenient since // applications will frequently allocate staging buffers. @@ -486,38 +502,52 @@ namespace dxvk { && (&pool == &type.mappedPool)) maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u; + // Factor current memory allocation into the decision to free chunks VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5; VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated; VkDeviceSize unusedMemory = 0u; - for (uint32_t i = 0; i < pool.chunks.size(); i++) { - DxvkDeviceMemory chunk = pool.chunks[i]; + bool chunkFreed = false; - if (!chunk.memory || pool.pageAllocator.pagesUsed(i)) + for (uint32_t i = 0; i < pool.chunks.size(); i++) { + DxvkMemoryChunk& chunk = pool.chunks[i]; + + if (!chunk.memory.memory || pool.pageAllocator.pagesUsed(i)) continue; // Free the chunk if it is smaller than the current chunk size of // the pool, since it is unlikely to be useful for future allocations. // Also free if the pending allocation would exceed the heap budget. - bool shouldFree = chunk.size < pool.nextChunkSize + bool shouldFree = chunk.memory.size < pool.nextChunkSize || allocationSize + heapAllocated > heapBudget || allocationSize > heapBudget; - // If we don't free the chunk under these conditions, count it towards - // unused memory in the current memory pool. Once we exceed the limit, - // free any empty chunk we encounter. + // If we still don't free the chunk under these conditions, count it + // towards unused memory in the current memory pool. Once we exceed + // the limit, free any empty chunk we encounter. if (!shouldFree) { - unusedMemory += chunk.size; + unusedMemory += chunk.memory.size; shouldFree = unusedMemory > maxUnusedMemory; } - if (shouldFree) { - freeDeviceMemory(type, chunk); - heapAllocated -= chunk.size; + // Free chunks that have not been used in some time, but only free + // one chunk at a time and keep at least one empty chunk alive. + if (!shouldFree && time != high_resolution_clock::time_point()) { + if (chunk.unusedTime == high_resolution_clock::time_point() || chunkFreed) + chunk.unusedTime = time; + else if (unusedMemory > chunk.memory.size) + shouldFree = time - chunk.unusedTime >= std::chrono::seconds(20); + } - pool.chunks[i] = DxvkDeviceMemory(); + if (shouldFree) { + freeDeviceMemory(type, chunk.memory); + heapAllocated -= chunk.memory.size; + + chunk = DxvkMemoryChunk(); pool.pageAllocator.removeChunk(i); + + chunkFreed = true; } } } @@ -528,9 +558,9 @@ namespace dxvk { VkDeviceSize minSize, VkDeviceSize maxSize) const { for (uint32_t i = 0; i < pool.chunks.size(); i++) { - if (pool.chunks[i].memory - && pool.chunks[i].size >= minSize - && pool.chunks[i].size <= maxSize + const auto& chunk = pool.chunks[i].memory; + + if (chunk.memory && chunk.size >= minSize && chunk.size <= maxSize && !pool.pageAllocator.pagesUsed(i)) return int32_t(i); } @@ -582,13 +612,13 @@ namespace dxvk { auto& typeStats = stats.memoryTypes[type.index]; for (uint32_t i = 0; i < pool.chunks.size(); i++) { - if (!pool.chunks[i].memory) + if (!pool.chunks[i].memory.memory) continue; typeStats.chunkCount += 1u; auto& chunkStats = stats.chunks.emplace_back(); - chunkStats.capacity = pool.chunks[i].size; + chunkStats.capacity = pool.chunks[i].memory.size; chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize; chunkStats.pageMaskOffset = stats.pageMasks.size(); chunkStats.pageCount = pool.pageAllocator.pageCount(i); @@ -933,4 +963,25 @@ namespace dxvk { return bit::BitMask(mask); } + + void DxvkMemoryAllocator::runWorker() { + env::setThreadName("dxvk-memory"); + + std::unique_lock lock(m_mutex); + + while (true) { + m_cond.wait_for(lock, std::chrono::seconds(1u), + [this] { return m_stopWorker; }); + + if (m_stopWorker) + break; + + // Periodically free unused memory chunks + auto currentTime = high_resolution_clock::now(); + + for (uint32_t i = 0; i < m_memHeapCount; i++) + freeEmptyChunksInHeap(m_memHeaps[i], 0, currentTime); + } + } + } diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index 9df6555dd..b2c7f2c17 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -3,6 +3,8 @@ #include "dxvk_adapter.h" #include "dxvk_allocator.h" +#include "../util/util_time.h" + namespace dxvk { class DxvkMemoryAllocator; @@ -56,6 +58,20 @@ namespace dxvk { void* mapPtr = nullptr; }; + + /** + * \brief Memory chunk + * + * Stores a device memory object with some metadata. + */ + struct DxvkMemoryChunk { + /// Backing storage for this chunk + DxvkDeviceMemory memory; + /// Time when the chunk has been marked as unused. Must + /// be set to 0 when allocating memory from the chunk + high_resolution_clock::time_point unusedTime = { }; + }; + /** * \brief Memory pool @@ -68,7 +84,7 @@ namespace dxvk { constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u; /// Backing storage for allocated memory chunks - std::vector chunks; + std::vector chunks; /// Memory allocator covering the entire memory pool DxvkPageAllocator pageAllocator; /// Pool allocator that sits on top of the page allocator @@ -423,7 +439,8 @@ namespace dxvk { DxvkDevice* m_device; - dxvk::mutex m_mutex; + dxvk::mutex m_mutex; + dxvk::condition_variable m_cond; uint32_t m_memTypeCount = 0u; uint32_t m_memHeapCount = 0u; @@ -435,6 +452,9 @@ namespace dxvk { std::array m_memTypesByPropertyFlags = { }; + dxvk::thread m_worker; + bool m_stopWorker = false; + DxvkDeviceMemory allocateDeviceMemory( DxvkMemoryType& type, VkDeviceSize size, @@ -465,12 +485,14 @@ namespace dxvk { void freeEmptyChunksInHeap( const DxvkMemoryHeap& heap, - VkDeviceSize allocationSize); + VkDeviceSize allocationSize, + high_resolution_clock::time_point time); void freeEmptyChunksInPool( DxvkMemoryType& type, DxvkMemoryPool& pool, - VkDeviceSize allocationSize); + VkDeviceSize allocationSize, + high_resolution_clock::time_point time); int32_t findEmptyChunkInPool( const DxvkMemoryPool& pool, @@ -516,6 +538,8 @@ namespace dxvk { const VkMemoryRequirements& requirements, VkMemoryPropertyFlags properties) const; + void runWorker(); + }; }