mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-01-19 14:52:10 +01:00
[dxvk] Use worker thread to periodically free unused memory
System memory allocations typically peak very high while loading, but just sit there unused afterwards. This allows us to free them based on when they have last been used. Works well in practice since best-fit avoids using empty chunks as much as possible.
This commit is contained in:
parent
e1fd2bff2c
commit
27088beea8
@ -95,14 +95,24 @@ namespace dxvk {
|
|||||||
m_sparseMemoryTypes = determineSparseMemoryTypes(device);
|
m_sparseMemoryTypes = determineSparseMemoryTypes(device);
|
||||||
|
|
||||||
determineBufferUsageFlagsPerMemoryType();
|
determineBufferUsageFlagsPerMemoryType();
|
||||||
|
|
||||||
|
// Start worker after setting up everything else
|
||||||
|
m_worker = dxvk::thread([this] { runWorker(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
DxvkMemoryAllocator::~DxvkMemoryAllocator() {
|
DxvkMemoryAllocator::~DxvkMemoryAllocator() {
|
||||||
auto vk = m_device->vkd();
|
auto vk = m_device->vkd();
|
||||||
|
|
||||||
|
{ std::unique_lock lock(m_mutex);
|
||||||
|
m_stopWorker = true;
|
||||||
|
m_cond.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_worker.join();
|
||||||
|
|
||||||
for (uint32_t i = 0; i < m_memHeapCount; i++)
|
for (uint32_t i = 0; i < m_memHeapCount; i++)
|
||||||
freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1));
|
freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1), high_resolution_clock::time_point());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -180,14 +190,14 @@ namespace dxvk {
|
|||||||
size, selectedPool.maxChunkSize);
|
size, selectedPool.maxChunkSize);
|
||||||
|
|
||||||
if (freeChunkIndex >= 0) {
|
if (freeChunkIndex >= 0) {
|
||||||
uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].size);
|
uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].memory.size);
|
||||||
selectedPool.chunks.resize(std::max<size_t>(selectedPool.chunks.size(), poolChunkIndex + 1u));
|
selectedPool.chunks.resize(std::max<size_t>(selectedPool.chunks.size(), poolChunkIndex + 1u));
|
||||||
selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex];
|
selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex];
|
||||||
|
|
||||||
oppositePool.pageAllocator.removeChunk(freeChunkIndex);
|
oppositePool.pageAllocator.removeChunk(freeChunkIndex);
|
||||||
oppositePool.chunks[freeChunkIndex] = DxvkDeviceMemory();
|
oppositePool.chunks[freeChunkIndex] = DxvkMemoryChunk();
|
||||||
|
|
||||||
mapDeviceMemory(selectedPool.chunks[poolChunkIndex], properties);
|
mapDeviceMemory(selectedPool.chunks[poolChunkIndex].memory, properties);
|
||||||
|
|
||||||
address = selectedPool.alloc(size, requirements.alignment);
|
address = selectedPool.alloc(size, requirements.alignment);
|
||||||
|
|
||||||
@ -261,7 +271,7 @@ namespace dxvk {
|
|||||||
auto vk = m_device->vkd();
|
auto vk = m_device->vkd();
|
||||||
|
|
||||||
// Preemptively free some unused allocations to reduce memory waste
|
// Preemptively free some unused allocations to reduce memory waste
|
||||||
freeEmptyChunksInHeap(*type.heap, size);
|
freeEmptyChunksInHeap(*type.heap, size, high_resolution_clock::now());
|
||||||
|
|
||||||
VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next };
|
VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next };
|
||||||
memoryInfo.allocationSize = size;
|
memoryInfo.allocationSize = size;
|
||||||
@ -303,7 +313,7 @@ namespace dxvk {
|
|||||||
result.size = size;
|
result.size = size;
|
||||||
|
|
||||||
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) {
|
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) {
|
||||||
freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1));
|
freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1), high_resolution_clock::time_point());
|
||||||
|
|
||||||
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory))
|
if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory))
|
||||||
return DxvkDeviceMemory();
|
return DxvkDeviceMemory();
|
||||||
@ -385,7 +395,8 @@ namespace dxvk {
|
|||||||
uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size);
|
uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size);
|
||||||
|
|
||||||
pool.chunks.resize(std::max<size_t>(pool.chunks.size(), chunkIndex + 1u));
|
pool.chunks.resize(std::max<size_t>(pool.chunks.size(), chunkIndex + 1u));
|
||||||
pool.chunks[chunkIndex] = chunk;
|
pool.chunks[chunkIndex].memory = chunk;
|
||||||
|
pool.chunks[chunkIndex].unusedTime = high_resolution_clock::time_point();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -400,13 +411,16 @@ namespace dxvk {
|
|||||||
m_device->notifyMemoryUse(type.properties.heapIndex, size);
|
m_device->notifyMemoryUse(type.properties.heapIndex, size);
|
||||||
|
|
||||||
uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits;
|
uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits;
|
||||||
const auto& chunk = pool.chunks[chunkIndex];
|
|
||||||
|
|
||||||
void* mapPtr = chunk.mapPtr
|
auto& chunk = pool.chunks[chunkIndex];
|
||||||
? reinterpret_cast<char*>(chunk.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask)
|
chunk.unusedTime = high_resolution_clock::time_point();
|
||||||
|
|
||||||
|
void* mapPtr = chunk.memory.mapPtr
|
||||||
|
? reinterpret_cast<char*>(chunk.memory.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask)
|
||||||
: nullptr;
|
: nullptr;
|
||||||
|
|
||||||
return DxvkMemory(this, &type, chunk.buffer, chunk.memory, address, size, mapPtr);
|
return DxvkMemory(this, &type, chunk.memory.buffer,
|
||||||
|
chunk.memory.memory, address, size, mapPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -443,7 +457,7 @@ namespace dxvk {
|
|||||||
: memory.m_type->devicePool;
|
: memory.m_type->devicePool;
|
||||||
|
|
||||||
if (unlikely(pool.free(memory.m_address, memory.m_length)))
|
if (unlikely(pool.free(memory.m_address, memory.m_length)))
|
||||||
freeEmptyChunksInPool(*memory.m_type, pool, 0);
|
freeEmptyChunksInPool(*memory.m_type, pool, 0, high_resolution_clock::now());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -462,12 +476,13 @@ namespace dxvk {
|
|||||||
|
|
||||||
void DxvkMemoryAllocator::freeEmptyChunksInHeap(
|
void DxvkMemoryAllocator::freeEmptyChunksInHeap(
|
||||||
const DxvkMemoryHeap& heap,
|
const DxvkMemoryHeap& heap,
|
||||||
VkDeviceSize allocationSize) {
|
VkDeviceSize allocationSize,
|
||||||
|
high_resolution_clock::time_point time) {
|
||||||
for (auto typeIndex : bit::BitMask(heap.memoryTypes)) {
|
for (auto typeIndex : bit::BitMask(heap.memoryTypes)) {
|
||||||
auto& type = m_memTypes[typeIndex];
|
auto& type = m_memTypes[typeIndex];
|
||||||
|
|
||||||
freeEmptyChunksInPool(type, type.devicePool, allocationSize);
|
freeEmptyChunksInPool(type, type.devicePool, allocationSize, time);
|
||||||
freeEmptyChunksInPool(type, type.mappedPool, allocationSize);
|
freeEmptyChunksInPool(type, type.mappedPool, allocationSize, time);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -475,7 +490,8 @@ namespace dxvk {
|
|||||||
void DxvkMemoryAllocator::freeEmptyChunksInPool(
|
void DxvkMemoryAllocator::freeEmptyChunksInPool(
|
||||||
DxvkMemoryType& type,
|
DxvkMemoryType& type,
|
||||||
DxvkMemoryPool& pool,
|
DxvkMemoryPool& pool,
|
||||||
VkDeviceSize allocationSize) {
|
VkDeviceSize allocationSize,
|
||||||
|
high_resolution_clock::time_point time) {
|
||||||
// Allow for one unused max-size chunk on device-local memory types.
|
// Allow for one unused max-size chunk on device-local memory types.
|
||||||
// For system memory allocations, we need to be more lenient since
|
// For system memory allocations, we need to be more lenient since
|
||||||
// applications will frequently allocate staging buffers.
|
// applications will frequently allocate staging buffers.
|
||||||
@ -486,38 +502,52 @@ namespace dxvk {
|
|||||||
&& (&pool == &type.mappedPool))
|
&& (&pool == &type.mappedPool))
|
||||||
maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u;
|
maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u;
|
||||||
|
|
||||||
|
// Factor current memory allocation into the decision to free chunks
|
||||||
VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5;
|
VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5;
|
||||||
VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated;
|
VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated;
|
||||||
|
|
||||||
VkDeviceSize unusedMemory = 0u;
|
VkDeviceSize unusedMemory = 0u;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
bool chunkFreed = false;
|
||||||
DxvkDeviceMemory chunk = pool.chunks[i];
|
|
||||||
|
|
||||||
if (!chunk.memory || pool.pageAllocator.pagesUsed(i))
|
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
||||||
|
DxvkMemoryChunk& chunk = pool.chunks[i];
|
||||||
|
|
||||||
|
if (!chunk.memory.memory || pool.pageAllocator.pagesUsed(i))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Free the chunk if it is smaller than the current chunk size of
|
// Free the chunk if it is smaller than the current chunk size of
|
||||||
// the pool, since it is unlikely to be useful for future allocations.
|
// the pool, since it is unlikely to be useful for future allocations.
|
||||||
// Also free if the pending allocation would exceed the heap budget.
|
// Also free if the pending allocation would exceed the heap budget.
|
||||||
bool shouldFree = chunk.size < pool.nextChunkSize
|
bool shouldFree = chunk.memory.size < pool.nextChunkSize
|
||||||
|| allocationSize + heapAllocated > heapBudget
|
|| allocationSize + heapAllocated > heapBudget
|
||||||
|| allocationSize > heapBudget;
|
|| allocationSize > heapBudget;
|
||||||
|
|
||||||
// If we don't free the chunk under these conditions, count it towards
|
// If we still don't free the chunk under these conditions, count it
|
||||||
// unused memory in the current memory pool. Once we exceed the limit,
|
// towards unused memory in the current memory pool. Once we exceed
|
||||||
// free any empty chunk we encounter.
|
// the limit, free any empty chunk we encounter.
|
||||||
if (!shouldFree) {
|
if (!shouldFree) {
|
||||||
unusedMemory += chunk.size;
|
unusedMemory += chunk.memory.size;
|
||||||
shouldFree = unusedMemory > maxUnusedMemory;
|
shouldFree = unusedMemory > maxUnusedMemory;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (shouldFree) {
|
// Free chunks that have not been used in some time, but only free
|
||||||
freeDeviceMemory(type, chunk);
|
// one chunk at a time and keep at least one empty chunk alive.
|
||||||
heapAllocated -= chunk.size;
|
if (!shouldFree && time != high_resolution_clock::time_point()) {
|
||||||
|
if (chunk.unusedTime == high_resolution_clock::time_point() || chunkFreed)
|
||||||
|
chunk.unusedTime = time;
|
||||||
|
else if (unusedMemory > chunk.memory.size)
|
||||||
|
shouldFree = time - chunk.unusedTime >= std::chrono::seconds(20);
|
||||||
|
}
|
||||||
|
|
||||||
pool.chunks[i] = DxvkDeviceMemory();
|
if (shouldFree) {
|
||||||
|
freeDeviceMemory(type, chunk.memory);
|
||||||
|
heapAllocated -= chunk.memory.size;
|
||||||
|
|
||||||
|
chunk = DxvkMemoryChunk();
|
||||||
pool.pageAllocator.removeChunk(i);
|
pool.pageAllocator.removeChunk(i);
|
||||||
|
|
||||||
|
chunkFreed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -528,9 +558,9 @@ namespace dxvk {
|
|||||||
VkDeviceSize minSize,
|
VkDeviceSize minSize,
|
||||||
VkDeviceSize maxSize) const {
|
VkDeviceSize maxSize) const {
|
||||||
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
||||||
if (pool.chunks[i].memory
|
const auto& chunk = pool.chunks[i].memory;
|
||||||
&& pool.chunks[i].size >= minSize
|
|
||||||
&& pool.chunks[i].size <= maxSize
|
if (chunk.memory && chunk.size >= minSize && chunk.size <= maxSize
|
||||||
&& !pool.pageAllocator.pagesUsed(i))
|
&& !pool.pageAllocator.pagesUsed(i))
|
||||||
return int32_t(i);
|
return int32_t(i);
|
||||||
}
|
}
|
||||||
@ -582,13 +612,13 @@ namespace dxvk {
|
|||||||
auto& typeStats = stats.memoryTypes[type.index];
|
auto& typeStats = stats.memoryTypes[type.index];
|
||||||
|
|
||||||
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
for (uint32_t i = 0; i < pool.chunks.size(); i++) {
|
||||||
if (!pool.chunks[i].memory)
|
if (!pool.chunks[i].memory.memory)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
typeStats.chunkCount += 1u;
|
typeStats.chunkCount += 1u;
|
||||||
|
|
||||||
auto& chunkStats = stats.chunks.emplace_back();
|
auto& chunkStats = stats.chunks.emplace_back();
|
||||||
chunkStats.capacity = pool.chunks[i].size;
|
chunkStats.capacity = pool.chunks[i].memory.size;
|
||||||
chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize;
|
chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize;
|
||||||
chunkStats.pageMaskOffset = stats.pageMasks.size();
|
chunkStats.pageMaskOffset = stats.pageMasks.size();
|
||||||
chunkStats.pageCount = pool.pageAllocator.pageCount(i);
|
chunkStats.pageCount = pool.pageAllocator.pageCount(i);
|
||||||
@ -933,4 +963,25 @@ namespace dxvk {
|
|||||||
return bit::BitMask(mask);
|
return bit::BitMask(mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DxvkMemoryAllocator::runWorker() {
|
||||||
|
env::setThreadName("dxvk-memory");
|
||||||
|
|
||||||
|
std::unique_lock lock(m_mutex);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
m_cond.wait_for(lock, std::chrono::seconds(1u),
|
||||||
|
[this] { return m_stopWorker; });
|
||||||
|
|
||||||
|
if (m_stopWorker)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Periodically free unused memory chunks
|
||||||
|
auto currentTime = high_resolution_clock::now();
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < m_memHeapCount; i++)
|
||||||
|
freeEmptyChunksInHeap(m_memHeaps[i], 0, currentTime);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
#include "dxvk_adapter.h"
|
#include "dxvk_adapter.h"
|
||||||
#include "dxvk_allocator.h"
|
#include "dxvk_allocator.h"
|
||||||
|
|
||||||
|
#include "../util/util_time.h"
|
||||||
|
|
||||||
namespace dxvk {
|
namespace dxvk {
|
||||||
|
|
||||||
class DxvkMemoryAllocator;
|
class DxvkMemoryAllocator;
|
||||||
@ -57,6 +59,20 @@ namespace dxvk {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Memory chunk
|
||||||
|
*
|
||||||
|
* Stores a device memory object with some metadata.
|
||||||
|
*/
|
||||||
|
struct DxvkMemoryChunk {
|
||||||
|
/// Backing storage for this chunk
|
||||||
|
DxvkDeviceMemory memory;
|
||||||
|
/// Time when the chunk has been marked as unused. Must
|
||||||
|
/// be set to 0 when allocating memory from the chunk
|
||||||
|
high_resolution_clock::time_point unusedTime = { };
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Memory pool
|
* \brief Memory pool
|
||||||
*
|
*
|
||||||
@ -68,7 +84,7 @@ namespace dxvk {
|
|||||||
constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u;
|
constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u;
|
||||||
|
|
||||||
/// Backing storage for allocated memory chunks
|
/// Backing storage for allocated memory chunks
|
||||||
std::vector<DxvkDeviceMemory> chunks;
|
std::vector<DxvkMemoryChunk> chunks;
|
||||||
/// Memory allocator covering the entire memory pool
|
/// Memory allocator covering the entire memory pool
|
||||||
DxvkPageAllocator pageAllocator;
|
DxvkPageAllocator pageAllocator;
|
||||||
/// Pool allocator that sits on top of the page allocator
|
/// Pool allocator that sits on top of the page allocator
|
||||||
@ -423,7 +439,8 @@ namespace dxvk {
|
|||||||
|
|
||||||
DxvkDevice* m_device;
|
DxvkDevice* m_device;
|
||||||
|
|
||||||
dxvk::mutex m_mutex;
|
dxvk::mutex m_mutex;
|
||||||
|
dxvk::condition_variable m_cond;
|
||||||
|
|
||||||
uint32_t m_memTypeCount = 0u;
|
uint32_t m_memTypeCount = 0u;
|
||||||
uint32_t m_memHeapCount = 0u;
|
uint32_t m_memHeapCount = 0u;
|
||||||
@ -435,6 +452,9 @@ namespace dxvk {
|
|||||||
|
|
||||||
std::array<uint32_t, 16> m_memTypesByPropertyFlags = { };
|
std::array<uint32_t, 16> m_memTypesByPropertyFlags = { };
|
||||||
|
|
||||||
|
dxvk::thread m_worker;
|
||||||
|
bool m_stopWorker = false;
|
||||||
|
|
||||||
DxvkDeviceMemory allocateDeviceMemory(
|
DxvkDeviceMemory allocateDeviceMemory(
|
||||||
DxvkMemoryType& type,
|
DxvkMemoryType& type,
|
||||||
VkDeviceSize size,
|
VkDeviceSize size,
|
||||||
@ -465,12 +485,14 @@ namespace dxvk {
|
|||||||
|
|
||||||
void freeEmptyChunksInHeap(
|
void freeEmptyChunksInHeap(
|
||||||
const DxvkMemoryHeap& heap,
|
const DxvkMemoryHeap& heap,
|
||||||
VkDeviceSize allocationSize);
|
VkDeviceSize allocationSize,
|
||||||
|
high_resolution_clock::time_point time);
|
||||||
|
|
||||||
void freeEmptyChunksInPool(
|
void freeEmptyChunksInPool(
|
||||||
DxvkMemoryType& type,
|
DxvkMemoryType& type,
|
||||||
DxvkMemoryPool& pool,
|
DxvkMemoryPool& pool,
|
||||||
VkDeviceSize allocationSize);
|
VkDeviceSize allocationSize,
|
||||||
|
high_resolution_clock::time_point time);
|
||||||
|
|
||||||
int32_t findEmptyChunkInPool(
|
int32_t findEmptyChunkInPool(
|
||||||
const DxvkMemoryPool& pool,
|
const DxvkMemoryPool& pool,
|
||||||
@ -516,6 +538,8 @@ namespace dxvk {
|
|||||||
const VkMemoryRequirements& requirements,
|
const VkMemoryRequirements& requirements,
|
||||||
VkMemoryPropertyFlags properties) const;
|
VkMemoryPropertyFlags properties) const;
|
||||||
|
|
||||||
|
void runWorker();
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user