1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-19 14:52:10 +01:00

[dxvk] Use worker thread to periodically free unused memory

System memory allocations typically peak very high while loading, but
just sit there unused afterwards. This allows us to free them based
on when they have last been used.

Works well in practice since best-fit avoids using empty chunks as much
as possible.
This commit is contained in:
Philip Rebohle 2024-09-21 10:44:48 +02:00 committed by Philip Rebohle
parent e1fd2bff2c
commit 27088beea8
2 changed files with 112 additions and 37 deletions

View File

@ -95,14 +95,24 @@ namespace dxvk {
m_sparseMemoryTypes = determineSparseMemoryTypes(device); m_sparseMemoryTypes = determineSparseMemoryTypes(device);
determineBufferUsageFlagsPerMemoryType(); determineBufferUsageFlagsPerMemoryType();
// Start worker after setting up everything else
m_worker = dxvk::thread([this] { runWorker(); });
} }
DxvkMemoryAllocator::~DxvkMemoryAllocator() { DxvkMemoryAllocator::~DxvkMemoryAllocator() {
auto vk = m_device->vkd(); auto vk = m_device->vkd();
{ std::unique_lock lock(m_mutex);
m_stopWorker = true;
m_cond.notify_one();
}
m_worker.join();
for (uint32_t i = 0; i < m_memHeapCount; i++) for (uint32_t i = 0; i < m_memHeapCount; i++)
freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1)); freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1), high_resolution_clock::time_point());
} }
@ -180,14 +190,14 @@ namespace dxvk {
size, selectedPool.maxChunkSize); size, selectedPool.maxChunkSize);
if (freeChunkIndex >= 0) { if (freeChunkIndex >= 0) {
uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].size); uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].memory.size);
selectedPool.chunks.resize(std::max<size_t>(selectedPool.chunks.size(), poolChunkIndex + 1u)); selectedPool.chunks.resize(std::max<size_t>(selectedPool.chunks.size(), poolChunkIndex + 1u));
selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex]; selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex];
oppositePool.pageAllocator.removeChunk(freeChunkIndex); oppositePool.pageAllocator.removeChunk(freeChunkIndex);
oppositePool.chunks[freeChunkIndex] = DxvkDeviceMemory(); oppositePool.chunks[freeChunkIndex] = DxvkMemoryChunk();
mapDeviceMemory(selectedPool.chunks[poolChunkIndex], properties); mapDeviceMemory(selectedPool.chunks[poolChunkIndex].memory, properties);
address = selectedPool.alloc(size, requirements.alignment); address = selectedPool.alloc(size, requirements.alignment);
@ -261,7 +271,7 @@ namespace dxvk {
auto vk = m_device->vkd(); auto vk = m_device->vkd();
// Preemptively free some unused allocations to reduce memory waste // Preemptively free some unused allocations to reduce memory waste
freeEmptyChunksInHeap(*type.heap, size); freeEmptyChunksInHeap(*type.heap, size, high_resolution_clock::now());
VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next }; VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next };
memoryInfo.allocationSize = size; memoryInfo.allocationSize = size;
@ -303,7 +313,7 @@ namespace dxvk {
result.size = size; result.size = size;
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) { if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) {
freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1)); freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1), high_resolution_clock::time_point());
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory))
return DxvkDeviceMemory(); return DxvkDeviceMemory();
@ -385,7 +395,8 @@ namespace dxvk {
uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size); uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size);
pool.chunks.resize(std::max<size_t>(pool.chunks.size(), chunkIndex + 1u)); pool.chunks.resize(std::max<size_t>(pool.chunks.size(), chunkIndex + 1u));
pool.chunks[chunkIndex] = chunk; pool.chunks[chunkIndex].memory = chunk;
pool.chunks[chunkIndex].unusedTime = high_resolution_clock::time_point();
return true; return true;
} }
@ -400,13 +411,16 @@ namespace dxvk {
m_device->notifyMemoryUse(type.properties.heapIndex, size); m_device->notifyMemoryUse(type.properties.heapIndex, size);
uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits; uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits;
const auto& chunk = pool.chunks[chunkIndex];
void* mapPtr = chunk.mapPtr auto& chunk = pool.chunks[chunkIndex];
? reinterpret_cast<char*>(chunk.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask) chunk.unusedTime = high_resolution_clock::time_point();
void* mapPtr = chunk.memory.mapPtr
? reinterpret_cast<char*>(chunk.memory.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask)
: nullptr; : nullptr;
return DxvkMemory(this, &type, chunk.buffer, chunk.memory, address, size, mapPtr); return DxvkMemory(this, &type, chunk.memory.buffer,
chunk.memory.memory, address, size, mapPtr);
} }
@ -443,7 +457,7 @@ namespace dxvk {
: memory.m_type->devicePool; : memory.m_type->devicePool;
if (unlikely(pool.free(memory.m_address, memory.m_length))) if (unlikely(pool.free(memory.m_address, memory.m_length)))
freeEmptyChunksInPool(*memory.m_type, pool, 0); freeEmptyChunksInPool(*memory.m_type, pool, 0, high_resolution_clock::now());
} }
} }
@ -462,12 +476,13 @@ namespace dxvk {
void DxvkMemoryAllocator::freeEmptyChunksInHeap( void DxvkMemoryAllocator::freeEmptyChunksInHeap(
const DxvkMemoryHeap& heap, const DxvkMemoryHeap& heap,
VkDeviceSize allocationSize) { VkDeviceSize allocationSize,
high_resolution_clock::time_point time) {
for (auto typeIndex : bit::BitMask(heap.memoryTypes)) { for (auto typeIndex : bit::BitMask(heap.memoryTypes)) {
auto& type = m_memTypes[typeIndex]; auto& type = m_memTypes[typeIndex];
freeEmptyChunksInPool(type, type.devicePool, allocationSize); freeEmptyChunksInPool(type, type.devicePool, allocationSize, time);
freeEmptyChunksInPool(type, type.mappedPool, allocationSize); freeEmptyChunksInPool(type, type.mappedPool, allocationSize, time);
} }
} }
@ -475,7 +490,8 @@ namespace dxvk {
void DxvkMemoryAllocator::freeEmptyChunksInPool( void DxvkMemoryAllocator::freeEmptyChunksInPool(
DxvkMemoryType& type, DxvkMemoryType& type,
DxvkMemoryPool& pool, DxvkMemoryPool& pool,
VkDeviceSize allocationSize) { VkDeviceSize allocationSize,
high_resolution_clock::time_point time) {
// Allow for one unused max-size chunk on device-local memory types. // Allow for one unused max-size chunk on device-local memory types.
// For system memory allocations, we need to be more lenient since // For system memory allocations, we need to be more lenient since
// applications will frequently allocate staging buffers. // applications will frequently allocate staging buffers.
@ -486,38 +502,52 @@ namespace dxvk {
&& (&pool == &type.mappedPool)) && (&pool == &type.mappedPool))
maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u; maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u;
// Factor current memory allocation into the decision to free chunks
VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5; VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5;
VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated; VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated;
VkDeviceSize unusedMemory = 0u; VkDeviceSize unusedMemory = 0u;
for (uint32_t i = 0; i < pool.chunks.size(); i++) { bool chunkFreed = false;
DxvkDeviceMemory chunk = pool.chunks[i];
if (!chunk.memory || pool.pageAllocator.pagesUsed(i)) for (uint32_t i = 0; i < pool.chunks.size(); i++) {
DxvkMemoryChunk& chunk = pool.chunks[i];
if (!chunk.memory.memory || pool.pageAllocator.pagesUsed(i))
continue; continue;
// Free the chunk if it is smaller than the current chunk size of // Free the chunk if it is smaller than the current chunk size of
// the pool, since it is unlikely to be useful for future allocations. // the pool, since it is unlikely to be useful for future allocations.
// Also free if the pending allocation would exceed the heap budget. // Also free if the pending allocation would exceed the heap budget.
bool shouldFree = chunk.size < pool.nextChunkSize bool shouldFree = chunk.memory.size < pool.nextChunkSize
|| allocationSize + heapAllocated > heapBudget || allocationSize + heapAllocated > heapBudget
|| allocationSize > heapBudget; || allocationSize > heapBudget;
// If we don't free the chunk under these conditions, count it towards // If we still don't free the chunk under these conditions, count it
// unused memory in the current memory pool. Once we exceed the limit, // towards unused memory in the current memory pool. Once we exceed
// free any empty chunk we encounter. // the limit, free any empty chunk we encounter.
if (!shouldFree) { if (!shouldFree) {
unusedMemory += chunk.size; unusedMemory += chunk.memory.size;
shouldFree = unusedMemory > maxUnusedMemory; shouldFree = unusedMemory > maxUnusedMemory;
} }
if (shouldFree) { // Free chunks that have not been used in some time, but only free
freeDeviceMemory(type, chunk); // one chunk at a time and keep at least one empty chunk alive.
heapAllocated -= chunk.size; if (!shouldFree && time != high_resolution_clock::time_point()) {
if (chunk.unusedTime == high_resolution_clock::time_point() || chunkFreed)
chunk.unusedTime = time;
else if (unusedMemory > chunk.memory.size)
shouldFree = time - chunk.unusedTime >= std::chrono::seconds(20);
}
pool.chunks[i] = DxvkDeviceMemory(); if (shouldFree) {
freeDeviceMemory(type, chunk.memory);
heapAllocated -= chunk.memory.size;
chunk = DxvkMemoryChunk();
pool.pageAllocator.removeChunk(i); pool.pageAllocator.removeChunk(i);
chunkFreed = true;
} }
} }
} }
@ -528,9 +558,9 @@ namespace dxvk {
VkDeviceSize minSize, VkDeviceSize minSize,
VkDeviceSize maxSize) const { VkDeviceSize maxSize) const {
for (uint32_t i = 0; i < pool.chunks.size(); i++) { for (uint32_t i = 0; i < pool.chunks.size(); i++) {
if (pool.chunks[i].memory const auto& chunk = pool.chunks[i].memory;
&& pool.chunks[i].size >= minSize
&& pool.chunks[i].size <= maxSize if (chunk.memory && chunk.size >= minSize && chunk.size <= maxSize
&& !pool.pageAllocator.pagesUsed(i)) && !pool.pageAllocator.pagesUsed(i))
return int32_t(i); return int32_t(i);
} }
@ -582,13 +612,13 @@ namespace dxvk {
auto& typeStats = stats.memoryTypes[type.index]; auto& typeStats = stats.memoryTypes[type.index];
for (uint32_t i = 0; i < pool.chunks.size(); i++) { for (uint32_t i = 0; i < pool.chunks.size(); i++) {
if (!pool.chunks[i].memory) if (!pool.chunks[i].memory.memory)
continue; continue;
typeStats.chunkCount += 1u; typeStats.chunkCount += 1u;
auto& chunkStats = stats.chunks.emplace_back(); auto& chunkStats = stats.chunks.emplace_back();
chunkStats.capacity = pool.chunks[i].size; chunkStats.capacity = pool.chunks[i].memory.size;
chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize; chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize;
chunkStats.pageMaskOffset = stats.pageMasks.size(); chunkStats.pageMaskOffset = stats.pageMasks.size();
chunkStats.pageCount = pool.pageAllocator.pageCount(i); chunkStats.pageCount = pool.pageAllocator.pageCount(i);
@ -933,4 +963,25 @@ namespace dxvk {
return bit::BitMask(mask); return bit::BitMask(mask);
} }
void DxvkMemoryAllocator::runWorker() {
env::setThreadName("dxvk-memory");
std::unique_lock lock(m_mutex);
while (true) {
m_cond.wait_for(lock, std::chrono::seconds(1u),
[this] { return m_stopWorker; });
if (m_stopWorker)
break;
// Periodically free unused memory chunks
auto currentTime = high_resolution_clock::now();
for (uint32_t i = 0; i < m_memHeapCount; i++)
freeEmptyChunksInHeap(m_memHeaps[i], 0, currentTime);
}
}
} }

View File

@ -3,6 +3,8 @@
#include "dxvk_adapter.h" #include "dxvk_adapter.h"
#include "dxvk_allocator.h" #include "dxvk_allocator.h"
#include "../util/util_time.h"
namespace dxvk { namespace dxvk {
class DxvkMemoryAllocator; class DxvkMemoryAllocator;
@ -57,6 +59,20 @@ namespace dxvk {
}; };
/**
* \brief Memory chunk
*
* Stores a device memory object with some metadata.
*/
struct DxvkMemoryChunk {
/// Backing storage for this chunk
DxvkDeviceMemory memory;
/// Time when the chunk has been marked as unused. Must
/// be set to 0 when allocating memory from the chunk
high_resolution_clock::time_point unusedTime = { };
};
/** /**
* \brief Memory pool * \brief Memory pool
* *
@ -68,7 +84,7 @@ namespace dxvk {
constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u; constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u;
/// Backing storage for allocated memory chunks /// Backing storage for allocated memory chunks
std::vector<DxvkDeviceMemory> chunks; std::vector<DxvkMemoryChunk> chunks;
/// Memory allocator covering the entire memory pool /// Memory allocator covering the entire memory pool
DxvkPageAllocator pageAllocator; DxvkPageAllocator pageAllocator;
/// Pool allocator that sits on top of the page allocator /// Pool allocator that sits on top of the page allocator
@ -424,6 +440,7 @@ namespace dxvk {
DxvkDevice* m_device; DxvkDevice* m_device;
dxvk::mutex m_mutex; dxvk::mutex m_mutex;
dxvk::condition_variable m_cond;
uint32_t m_memTypeCount = 0u; uint32_t m_memTypeCount = 0u;
uint32_t m_memHeapCount = 0u; uint32_t m_memHeapCount = 0u;
@ -435,6 +452,9 @@ namespace dxvk {
std::array<uint32_t, 16> m_memTypesByPropertyFlags = { }; std::array<uint32_t, 16> m_memTypesByPropertyFlags = { };
dxvk::thread m_worker;
bool m_stopWorker = false;
DxvkDeviceMemory allocateDeviceMemory( DxvkDeviceMemory allocateDeviceMemory(
DxvkMemoryType& type, DxvkMemoryType& type,
VkDeviceSize size, VkDeviceSize size,
@ -465,12 +485,14 @@ namespace dxvk {
void freeEmptyChunksInHeap( void freeEmptyChunksInHeap(
const DxvkMemoryHeap& heap, const DxvkMemoryHeap& heap,
VkDeviceSize allocationSize); VkDeviceSize allocationSize,
high_resolution_clock::time_point time);
void freeEmptyChunksInPool( void freeEmptyChunksInPool(
DxvkMemoryType& type, DxvkMemoryType& type,
DxvkMemoryPool& pool, DxvkMemoryPool& pool,
VkDeviceSize allocationSize); VkDeviceSize allocationSize,
high_resolution_clock::time_point time);
int32_t findEmptyChunkInPool( int32_t findEmptyChunkInPool(
const DxvkMemoryPool& pool, const DxvkMemoryPool& pool,
@ -516,6 +538,8 @@ namespace dxvk {
const VkMemoryRequirements& requirements, const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags properties) const; VkMemoryPropertyFlags properties) const;
void runWorker();
}; };
} }