mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-01-19 14:52:10 +01:00
[dxvk] Use worker thread to periodically free unused memory
System memory allocations typically peak very high while loading, but just sit there unused afterwards. This allows us to free them based on when they have last been used. Works well in practice since best-fit avoids using empty chunks as much as possible.
This commit is contained in:
parent
e1fd2bff2c
commit
27088beea8
@ -95,14 +95,24 @@ namespace dxvk {
|
||||
m_sparseMemoryTypes = determineSparseMemoryTypes(device);
|
||||
|
||||
determineBufferUsageFlagsPerMemoryType();
|
||||
|
||||
// Start worker after setting up everything else
|
||||
m_worker = dxvk::thread([this] { runWorker(); });
|
||||
}
|
||||
|
||||
|
||||
DxvkMemoryAllocator::~DxvkMemoryAllocator() {
|
||||
auto vk = m_device->vkd();
|
||||
|
||||
{ std::unique_lock lock(m_mutex);
|
||||
m_stopWorker = true;
|
||||
m_cond.notify_one();
|
||||
}
|
||||
|
||||
m_worker.join();
|
||||
|
||||
for (uint32_t i = 0; i < m_memHeapCount; i++)
|
||||
freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1));
|
||||
freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1), high_resolution_clock::time_point());
|
||||
}
|
||||
|
||||
|
||||
@ -180,14 +190,14 @@ namespace dxvk {
|
||||
size, selectedPool.maxChunkSize);
|
||||
|
||||
if (freeChunkIndex >= 0) {
|
||||
uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].size);
|
||||
uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].memory.size);
|
||||
selectedPool.chunks.resize(std::max<size_t>(selectedPool.chunks.size(), poolChunkIndex + 1u));
|
||||
selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex];
|
||||
|
||||
oppositePool.pageAllocator.removeChunk(freeChunkIndex);
|
||||
oppositePool.chunks[freeChunkIndex] = DxvkDeviceMemory();
|
||||
oppositePool.chunks[freeChunkIndex] = DxvkMemoryChunk();
|
||||
|
||||
mapDeviceMemory(selectedPool.chunks[poolChunkIndex], properties);
|
||||
mapDeviceMemory(selectedPool.chunks[poolChunkIndex].memory, properties);
|
||||
|
||||
address = selectedPool.alloc(size, requirements.alignment);
|
||||
|
||||
@ -261,7 +271,7 @@ namespace dxvk {
|
||||
auto vk = m_device->vkd();
|
||||
|
||||
// Preemptively free some unused allocations to reduce memory waste
|
||||
freeEmptyChunksInHeap(*type.heap, size);
|
||||
freeEmptyChunksInHeap(*type.heap, size, high_resolution_clock::now());
|
||||
|
||||
VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next };
|
||||
memoryInfo.allocationSize = size;
|
||||
@ -303,7 +313,7 @@ namespace dxvk {
|
||||
result.size = size;
|
||||
|
||||
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) {
|
||||
freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1));
|
||||
freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1), high_resolution_clock::time_point());
|
||||
|
||||
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory))
|
||||
return DxvkDeviceMemory();
|
||||
@ -385,7 +395,8 @@ namespace dxvk {
|
||||
uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size);
|
||||
|
||||
pool.chunks.resize(std::max<size_t>(pool.chunks.size(), chunkIndex + 1u));
|
||||
pool.chunks[chunkIndex] = chunk;
|
||||
pool.chunks[chunkIndex].memory = chunk;
|
||||
pool.chunks[chunkIndex].unusedTime = high_resolution_clock::time_point();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -400,13 +411,16 @@ namespace dxvk {
|
||||
m_device->notifyMemoryUse(type.properties.heapIndex, size);
|
||||
|
||||
uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits;
|
||||
const auto& chunk = pool.chunks[chunkIndex];
|
||||
|
||||
void* mapPtr = chunk.mapPtr
|
||||
? reinterpret_cast<char*>(chunk.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask)
|
||||
auto& chunk = pool.chunks[chunkIndex];
|
||||
chunk.unusedTime = high_resolution_clock::time_point();
|
||||
|
||||
void* mapPtr = chunk.memory.mapPtr
|
||||
? reinterpret_cast<char*>(chunk.memory.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask)
|
||||
: nullptr;
|
||||
|
||||
return DxvkMemory(this, &type, chunk.buffer, chunk.memory, address, size, mapPtr);
|
||||
return DxvkMemory(this, &type, chunk.memory.buffer,
|
||||
chunk.memory.memory, address, size, mapPtr);
|
||||
}
|
||||
|
||||
|
||||
@ -443,7 +457,7 @@ namespace dxvk {
|
||||
: memory.m_type->devicePool;
|
||||
|
||||
if (unlikely(pool.free(memory.m_address, memory.m_length)))
|
||||
freeEmptyChunksInPool(*memory.m_type, pool, 0);
|
||||
freeEmptyChunksInPool(*memory.m_type, pool, 0, high_resolution_clock::now());
|
||||
}
|
||||
}
|
||||
|
||||
@ -462,12 +476,13 @@ namespace dxvk {
|
||||
|
||||
void DxvkMemoryAllocator::freeEmptyChunksInHeap(
|
||||
const DxvkMemoryHeap& heap,
|
||||
VkDeviceSize allocationSize) {
|
||||
VkDeviceSize allocationSize,
|
||||
high_resolution_clock::time_point time) {
|
||||
for (auto typeIndex : bit::BitMask(heap.memoryTypes)) {
|
||||
auto& type = m_memTypes[typeIndex];
|
||||
|
||||
freeEmptyChunksInPool(type, type.devicePool, allocationSize);
|
||||
freeEmptyChunksInPool(type, type.mappedPool, allocationSize);
|
||||
freeEmptyChunksInPool(type, type.devicePool, allocationSize, time);
|
||||
freeEmptyChunksInPool(type, type.mappedPool, allocationSize, time);
|
||||
}
|
||||
}
|
||||
|
||||
@ -475,7 +490,8 @@ namespace dxvk {
|
||||
void DxvkMemoryAllocator::freeEmptyChunksInPool(
|
||||
DxvkMemoryType& type,
|
||||
DxvkMemoryPool& pool,
|
||||
VkDeviceSize allocationSize) {
|
||||
VkDeviceSize allocationSize,
|
||||
high_resolution_clock::time_point time) {
|
||||
// Allow for one unused max-size chunk on device-local memory types.
|
||||
// For system memory allocations, we need to be more lenient since
|
||||
// applications will frequently allocate staging buffers.
|
||||
@ -486,38 +502,52 @@ namespace dxvk {
|
||||
&& (&pool == &type.mappedPool))
|
||||
maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u;
|
||||
|
||||
// Factor current memory allocation into the decision to free chunks
|
||||
VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5;
|
||||
VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated;
|
||||
|
||||
VkDeviceSize unusedMemory = 0u;
|
||||
|
||||
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
||||
DxvkDeviceMemory chunk = pool.chunks[i];
|
||||
bool chunkFreed = false;
|
||||
|
||||
if (!chunk.memory || pool.pageAllocator.pagesUsed(i))
|
||||
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
||||
DxvkMemoryChunk& chunk = pool.chunks[i];
|
||||
|
||||
if (!chunk.memory.memory || pool.pageAllocator.pagesUsed(i))
|
||||
continue;
|
||||
|
||||
// Free the chunk if it is smaller than the current chunk size of
|
||||
// the pool, since it is unlikely to be useful for future allocations.
|
||||
// Also free if the pending allocation would exceed the heap budget.
|
||||
bool shouldFree = chunk.size < pool.nextChunkSize
|
||||
bool shouldFree = chunk.memory.size < pool.nextChunkSize
|
||||
|| allocationSize + heapAllocated > heapBudget
|
||||
|| allocationSize > heapBudget;
|
||||
|
||||
// If we don't free the chunk under these conditions, count it towards
|
||||
// unused memory in the current memory pool. Once we exceed the limit,
|
||||
// free any empty chunk we encounter.
|
||||
// If we still don't free the chunk under these conditions, count it
|
||||
// towards unused memory in the current memory pool. Once we exceed
|
||||
// the limit, free any empty chunk we encounter.
|
||||
if (!shouldFree) {
|
||||
unusedMemory += chunk.size;
|
||||
unusedMemory += chunk.memory.size;
|
||||
shouldFree = unusedMemory > maxUnusedMemory;
|
||||
}
|
||||
|
||||
if (shouldFree) {
|
||||
freeDeviceMemory(type, chunk);
|
||||
heapAllocated -= chunk.size;
|
||||
// Free chunks that have not been used in some time, but only free
|
||||
// one chunk at a time and keep at least one empty chunk alive.
|
||||
if (!shouldFree && time != high_resolution_clock::time_point()) {
|
||||
if (chunk.unusedTime == high_resolution_clock::time_point() || chunkFreed)
|
||||
chunk.unusedTime = time;
|
||||
else if (unusedMemory > chunk.memory.size)
|
||||
shouldFree = time - chunk.unusedTime >= std::chrono::seconds(20);
|
||||
}
|
||||
|
||||
pool.chunks[i] = DxvkDeviceMemory();
|
||||
if (shouldFree) {
|
||||
freeDeviceMemory(type, chunk.memory);
|
||||
heapAllocated -= chunk.memory.size;
|
||||
|
||||
chunk = DxvkMemoryChunk();
|
||||
pool.pageAllocator.removeChunk(i);
|
||||
|
||||
chunkFreed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -528,9 +558,9 @@ namespace dxvk {
|
||||
VkDeviceSize minSize,
|
||||
VkDeviceSize maxSize) const {
|
||||
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
||||
if (pool.chunks[i].memory
|
||||
&& pool.chunks[i].size >= minSize
|
||||
&& pool.chunks[i].size <= maxSize
|
||||
const auto& chunk = pool.chunks[i].memory;
|
||||
|
||||
if (chunk.memory && chunk.size >= minSize && chunk.size <= maxSize
|
||||
&& !pool.pageAllocator.pagesUsed(i))
|
||||
return int32_t(i);
|
||||
}
|
||||
@ -582,13 +612,13 @@ namespace dxvk {
|
||||
auto& typeStats = stats.memoryTypes[type.index];
|
||||
|
||||
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
||||
if (!pool.chunks[i].memory)
|
||||
if (!pool.chunks[i].memory.memory)
|
||||
continue;
|
||||
|
||||
typeStats.chunkCount += 1u;
|
||||
|
||||
auto& chunkStats = stats.chunks.emplace_back();
|
||||
chunkStats.capacity = pool.chunks[i].size;
|
||||
chunkStats.capacity = pool.chunks[i].memory.size;
|
||||
chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize;
|
||||
chunkStats.pageMaskOffset = stats.pageMasks.size();
|
||||
chunkStats.pageCount = pool.pageAllocator.pageCount(i);
|
||||
@ -933,4 +963,25 @@ namespace dxvk {
|
||||
return bit::BitMask(mask);
|
||||
}
|
||||
|
||||
|
||||
void DxvkMemoryAllocator::runWorker() {
|
||||
env::setThreadName("dxvk-memory");
|
||||
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
while (true) {
|
||||
m_cond.wait_for(lock, std::chrono::seconds(1u),
|
||||
[this] { return m_stopWorker; });
|
||||
|
||||
if (m_stopWorker)
|
||||
break;
|
||||
|
||||
// Periodically free unused memory chunks
|
||||
auto currentTime = high_resolution_clock::now();
|
||||
|
||||
for (uint32_t i = 0; i < m_memHeapCount; i++)
|
||||
freeEmptyChunksInHeap(m_memHeaps[i], 0, currentTime);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include "dxvk_adapter.h"
|
||||
#include "dxvk_allocator.h"
|
||||
|
||||
#include "../util/util_time.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
class DxvkMemoryAllocator;
|
||||
@ -56,6 +58,20 @@ namespace dxvk {
|
||||
void* mapPtr = nullptr;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Memory chunk
|
||||
*
|
||||
* Stores a device memory object with some metadata.
|
||||
*/
|
||||
struct DxvkMemoryChunk {
|
||||
/// Backing storage for this chunk
|
||||
DxvkDeviceMemory memory;
|
||||
/// Time when the chunk has been marked as unused. Must
|
||||
/// be set to 0 when allocating memory from the chunk
|
||||
high_resolution_clock::time_point unusedTime = { };
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Memory pool
|
||||
@ -68,7 +84,7 @@ namespace dxvk {
|
||||
constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u;
|
||||
|
||||
/// Backing storage for allocated memory chunks
|
||||
std::vector<DxvkDeviceMemory> chunks;
|
||||
std::vector<DxvkMemoryChunk> chunks;
|
||||
/// Memory allocator covering the entire memory pool
|
||||
DxvkPageAllocator pageAllocator;
|
||||
/// Pool allocator that sits on top of the page allocator
|
||||
@ -423,7 +439,8 @@ namespace dxvk {
|
||||
|
||||
DxvkDevice* m_device;
|
||||
|
||||
dxvk::mutex m_mutex;
|
||||
dxvk::mutex m_mutex;
|
||||
dxvk::condition_variable m_cond;
|
||||
|
||||
uint32_t m_memTypeCount = 0u;
|
||||
uint32_t m_memHeapCount = 0u;
|
||||
@ -435,6 +452,9 @@ namespace dxvk {
|
||||
|
||||
std::array<uint32_t, 16> m_memTypesByPropertyFlags = { };
|
||||
|
||||
dxvk::thread m_worker;
|
||||
bool m_stopWorker = false;
|
||||
|
||||
DxvkDeviceMemory allocateDeviceMemory(
|
||||
DxvkMemoryType& type,
|
||||
VkDeviceSize size,
|
||||
@ -465,12 +485,14 @@ namespace dxvk {
|
||||
|
||||
void freeEmptyChunksInHeap(
|
||||
const DxvkMemoryHeap& heap,
|
||||
VkDeviceSize allocationSize);
|
||||
VkDeviceSize allocationSize,
|
||||
high_resolution_clock::time_point time);
|
||||
|
||||
void freeEmptyChunksInPool(
|
||||
DxvkMemoryType& type,
|
||||
DxvkMemoryPool& pool,
|
||||
VkDeviceSize allocationSize);
|
||||
VkDeviceSize allocationSize,
|
||||
high_resolution_clock::time_point time);
|
||||
|
||||
int32_t findEmptyChunkInPool(
|
||||
const DxvkMemoryPool& pool,
|
||||
@ -516,6 +538,8 @@ namespace dxvk {
|
||||
const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags properties) const;
|
||||
|
||||
void runWorker();
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user