mirror of
https://github.com/doitsujin/dxvk.git
synced 2024-12-02 01:24:11 +01:00
[dxvk] Implement shared allocation cache
Allows refilling local caches in constant time.
This commit is contained in:
parent
9a51849920
commit
2722a41675
@ -169,6 +169,14 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
void DxvkResourceAllocation::destroyBufferViews() {
|
||||
if (m_bufferViews) {
|
||||
delete m_bufferViews;
|
||||
m_bufferViews = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
DxvkResourceAllocationPool::DxvkResourceAllocationPool() {
|
||||
@ -231,9 +239,9 @@ namespace dxvk {
|
||||
uint32_t DxvkLocalAllocationCache::computePreferredAllocationCount(
|
||||
VkDeviceSize size) {
|
||||
uint32_t poolIndex = computePoolIndex(size);
|
||||
uint32_t count = (DxvkPageAllocator::PageSize / MinSize) >> poolIndex;
|
||||
uint32_t count = (PoolCapacityInBytes / MinSize) >> poolIndex;
|
||||
|
||||
return std::max(count, MinAllocationCountPerPool);
|
||||
return std::max(count, 1u);
|
||||
}
|
||||
|
||||
|
||||
@ -251,6 +259,95 @@ namespace dxvk {
|
||||
|
||||
|
||||
|
||||
DxvkSharedAllocationCache::DxvkSharedAllocationCache(
|
||||
DxvkMemoryAllocator* allocator)
|
||||
: m_allocator(allocator) {
|
||||
for (uint32_t i = 0; i < m_pools.size(); i++) {
|
||||
VkDeviceSize size = DxvkLocalAllocationCache::computeAllocationSize(i);
|
||||
m_freeLists[i].capacity = DxvkLocalAllocationCache::computePreferredAllocationCount(size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
DxvkSharedAllocationCache::~DxvkSharedAllocationCache() {
|
||||
for (const auto& freeList : m_freeLists)
|
||||
m_allocator->freeCachedAllocations(freeList.head);
|
||||
|
||||
for (const auto& pool : m_pools) {
|
||||
for (auto list : pool.lists)
|
||||
m_allocator->freeCachedAllocations(list);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
DxvkResourceAllocation* DxvkSharedAllocationCache::getAllocationList(
|
||||
VkDeviceSize allocationSize) {
|
||||
uint32_t poolIndex = DxvkLocalAllocationCache::computePoolIndex(allocationSize);
|
||||
|
||||
// If there's a list ready for us, take the whole thing
|
||||
std::unique_lock poolLock(m_poolMutex);
|
||||
auto& pool = m_pools[poolIndex];
|
||||
|
||||
if (!pool.listCount)
|
||||
return nullptr;
|
||||
|
||||
if (!(--pool.listCount))
|
||||
pool.drainTime = high_resolution_clock::now();
|
||||
|
||||
return std::exchange(pool.lists[pool.listCount], nullptr);
|
||||
}
|
||||
|
||||
|
||||
DxvkResourceAllocation* DxvkSharedAllocationCache::freeAllocation(
|
||||
DxvkResourceAllocation* allocation) {
|
||||
uint32_t poolIndex = DxvkLocalAllocationCache::computePoolIndex(allocation->m_size);
|
||||
|
||||
{ std::unique_lock freeLock(m_freeMutex);
|
||||
auto& list = m_freeLists[poolIndex];
|
||||
|
||||
allocation->m_next = list.head;
|
||||
list.head = allocation;
|
||||
|
||||
if (++list.size < list.capacity)
|
||||
return nullptr;
|
||||
|
||||
// Free list is full, try to add it to the list array
|
||||
// so that subsequent allocations can use it.
|
||||
list.head = nullptr;
|
||||
list.size = 0u;
|
||||
}
|
||||
|
||||
// Add free list to the pool if possible.
|
||||
{ std::unique_lock poolLock(m_poolMutex);
|
||||
auto& pool = m_pools[poolIndex];
|
||||
|
||||
if (likely(pool.listCount < PoolSize)) {
|
||||
pool.lists[pool.listCount++] = allocation;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// If the pool is full, destroy the entire free list
|
||||
return allocation;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxvkSharedAllocationCache::cleanupUnusedFromLockedAllocator(
|
||||
high_resolution_clock::time_point time) {
|
||||
std::unique_lock poolLock(m_poolMutex);
|
||||
|
||||
for (auto& pool : m_pools) {
|
||||
if (pool.listCount && time - pool.drainTime >= std::chrono::seconds(1u)) {
|
||||
m_allocator->freeCachedAllocationsLocked(std::exchange(
|
||||
pool.lists[--pool.listCount], nullptr));
|
||||
pool.drainTime = time;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
DxvkMemoryAllocator::DxvkMemoryAllocator(DxvkDevice* device)
|
||||
: m_device(device) {
|
||||
VkPhysicalDeviceMemoryProperties memInfo = device->adapter()->memoryProperties();
|
||||
@ -299,6 +396,14 @@ namespace dxvk {
|
||||
|
||||
m_worker.join();
|
||||
|
||||
// Destroy shared caches so that any allocations
|
||||
// that are still alive get returned to the device
|
||||
for (uint32_t i = 0; i < m_memTypeCount; i++) {
|
||||
if (m_memTypes[i].sharedCache)
|
||||
delete m_memTypes[i].sharedCache;
|
||||
}
|
||||
|
||||
// Now that no allocations are alive, we can free chunks
|
||||
for (uint32_t i = 0; i < m_memHeapCount; i++)
|
||||
freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1), high_resolution_clock::time_point());
|
||||
}
|
||||
@ -485,8 +590,11 @@ namespace dxvk {
|
||||
if (likely(memoryRequirements.memoryTypeBits)) {
|
||||
// If the given allocation cache supports the memory types and usage
|
||||
// flags that we need, try to use it to service this allocation.
|
||||
// Only use the allocation cache for mappable allocations since those
|
||||
// are expected to happen frequently.
|
||||
if (allocationCache && createInfo.size <= DxvkLocalAllocationCache::MaxSize
|
||||
&& allocationCache->m_memoryTypes && !(allocationCache->m_memoryTypes & ~memoryRequirements.memoryTypeBits)) {
|
||||
&& allocationCache->m_memoryTypes && !(allocationCache->m_memoryTypes & ~memoryRequirements.memoryTypeBits)
|
||||
&& (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
|
||||
allocation = allocationCache->allocateFromCache(createInfo.size);
|
||||
|
||||
if (likely(allocation))
|
||||
@ -924,25 +1032,37 @@ namespace dxvk {
|
||||
|
||||
void DxvkMemoryAllocator::freeAllocation(
|
||||
DxvkResourceAllocation* allocation) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
if (allocation->m_flags.test(DxvkAllocationFlag::Cacheable)) {
|
||||
// Return cacheable allocations to the shared cache
|
||||
allocation->destroyBufferViews();
|
||||
|
||||
if (likely(allocation->m_type)) {
|
||||
allocation->m_type->stats.memoryUsed -= allocation->m_size;
|
||||
if (allocation->m_type->sharedCache)
|
||||
allocation = allocation->m_type->sharedCache->freeAllocation(allocation);
|
||||
|
||||
if (unlikely(allocation->m_flags.test(DxvkAllocationFlag::OwnsMemory))) {
|
||||
// We free the actual allocation later, just update stats here.
|
||||
allocation->m_type->stats.memoryAllocated -= allocation->m_size;
|
||||
} else {
|
||||
auto& pool = allocation->m_mapPtr
|
||||
? allocation->m_type->mappedPool
|
||||
: allocation->m_type->devicePool;
|
||||
// If we get a list of allocations back from the
|
||||
// shared cache, free all of them in one go
|
||||
freeCachedAllocations(allocation);
|
||||
} else {
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
if (unlikely(pool.free(allocation->m_address, allocation->m_size)))
|
||||
freeEmptyChunksInPool(*allocation->m_type, pool, 0, high_resolution_clock::now());
|
||||
if (likely(allocation->m_type)) {
|
||||
allocation->m_type->stats.memoryUsed -= allocation->m_size;
|
||||
|
||||
if (unlikely(allocation->m_flags.test(DxvkAllocationFlag::OwnsMemory))) {
|
||||
// We free the actual allocation later, just update stats here.
|
||||
allocation->m_type->stats.memoryAllocated -= allocation->m_size;
|
||||
} else {
|
||||
auto& pool = allocation->m_mapPtr
|
||||
? allocation->m_type->mappedPool
|
||||
: allocation->m_type->devicePool;
|
||||
|
||||
if (unlikely(pool.free(allocation->m_address, allocation->m_size)))
|
||||
freeEmptyChunksInPool(*allocation->m_type, pool, 0, high_resolution_clock::now());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_allocationPool.free(allocation);
|
||||
m_allocationPool.free(allocation);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -955,6 +1075,15 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
void DxvkMemoryAllocator::freeCachedAllocations(
|
||||
DxvkResourceAllocation* allocation) {
|
||||
if (allocation) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
freeCachedAllocationsLocked(allocation);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DxvkMemoryAllocator::freeCachedAllocationsLocked(
|
||||
DxvkResourceAllocation* allocation) {
|
||||
while (allocation) {
|
||||
@ -1105,42 +1234,58 @@ namespace dxvk {
|
||||
|
||||
|
||||
bool DxvkMemoryAllocator::refillAllocationCache(
|
||||
DxvkLocalAllocationCache* cache,
|
||||
const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags properties) {
|
||||
DxvkLocalAllocationCache* cache,
|
||||
const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags properties) {
|
||||
// Ensure that all cached allocations report a power-of-two size.
|
||||
// The shared cache implementation currently relies on this.
|
||||
VkDeviceSize allocationSize = (VkDeviceSize(-1) >> bit::lzcnt(requirements.size - 1u)) + 1u;
|
||||
allocationSize = std::max(allocationSize, DxvkLocalAllocationCache::MinSize);
|
||||
|
||||
// TODO implement shared caches per memory pool
|
||||
|
||||
// No suitable allocations available from the shared cache, create some
|
||||
// new ones so that subsequent allocations of this size category can be
|
||||
// handled without locking the allocator.
|
||||
// Maximum number of allocations when we miss in the shared cache
|
||||
uint32_t allocationCount = DxvkLocalAllocationCache::computePreferredAllocationCount(allocationSize);
|
||||
|
||||
DxvkResourceAllocation* head = nullptr;
|
||||
DxvkResourceAllocation* tail = nullptr;
|
||||
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
for (auto typeIndex : bit::BitMask(cache->m_memoryTypes)) {
|
||||
auto& pool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
? m_memTypes[typeIndex].mappedPool
|
||||
: m_memTypes[typeIndex].devicePool;
|
||||
auto& memoryType = m_memTypes[typeIndex];
|
||||
|
||||
// Initialize shared cache on demand only
|
||||
if (unlikely(!memoryType.sharedCache)) {
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
if (!memoryType.sharedCache)
|
||||
memoryType.sharedCache = new DxvkSharedAllocationCache(this);
|
||||
}
|
||||
|
||||
// Try to grab a list of allocations from the shared cache first. If
|
||||
// this succeeds, allocating several pages of memory is near instant.
|
||||
DxvkResourceAllocation* allocation = memoryType.sharedCache->getAllocationList(allocationSize);
|
||||
|
||||
if (likely(allocation)) {
|
||||
allocation = cache->assignCache(allocationSize, allocation);
|
||||
freeCachedAllocations(allocation);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Fill cache with the preferred allocation count of this size category so
|
||||
// that subsequent allocations can be handled without locking the allocator.
|
||||
DxvkResourceAllocation* head = nullptr;
|
||||
DxvkResourceAllocation* tail = nullptr;
|
||||
|
||||
std::unique_lock lock(m_mutex);
|
||||
auto& memoryPool = memoryType.mappedPool;
|
||||
|
||||
while (allocationCount) {
|
||||
// Try to suballocate from existing chunks, but do not create
|
||||
// any new chunks. Let the regular code path handle that case
|
||||
// as necessary.
|
||||
int64_t address = pool.alloc(allocationSize, requirements.alignment);
|
||||
int64_t address = memoryPool.alloc(allocationSize, requirements.alignment);
|
||||
|
||||
if (address < 0)
|
||||
break;
|
||||
|
||||
// Add allocation to the list and mark it as cacheable,
|
||||
// so it will get recycled as-is after use.
|
||||
DxvkResourceAllocation* allocation = createAllocation(
|
||||
m_memTypes[typeIndex], pool, address, allocationSize);
|
||||
allocation = createAllocation(memoryType, memoryPool, address, allocationSize);
|
||||
allocation->m_flags.set(DxvkAllocationFlag::Cacheable);
|
||||
|
||||
if (tail) {
|
||||
@ -1154,15 +1299,13 @@ namespace dxvk {
|
||||
allocationCount--;
|
||||
}
|
||||
|
||||
if (!allocationCount)
|
||||
break;
|
||||
if (tail) {
|
||||
tail->m_next = cache->assignCache(allocationSize, head);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!tail)
|
||||
return false;
|
||||
|
||||
tail->m_next = cache->assignCache(allocationSize, head);
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -1588,6 +1731,12 @@ namespace dxvk {
|
||||
|
||||
heapStats[i] = stats;
|
||||
}
|
||||
|
||||
// Periodically clean up unused cached allocations
|
||||
for (uint32_t i = 0; i < m_memTypeCount; i++) {
|
||||
if (m_memTypes[i].sharedCache)
|
||||
m_memTypes[i].sharedCache->cleanupUnusedFromLockedAllocator(currentTime);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure adapter allocation statistics are consistent
|
||||
|
@ -13,6 +13,7 @@ namespace dxvk {
|
||||
class DxvkMemoryAllocator;
|
||||
class DxvkMemoryChunk;
|
||||
class DxvkSparsePageTable;
|
||||
class DxvkSharedAllocationCache;
|
||||
|
||||
/**
|
||||
* \brief Resource access flags
|
||||
@ -160,6 +161,8 @@ namespace dxvk {
|
||||
|
||||
DxvkMemoryPool devicePool;
|
||||
DxvkMemoryPool mappedPool;
|
||||
|
||||
DxvkSharedAllocationCache* sharedCache = nullptr;
|
||||
};
|
||||
|
||||
|
||||
@ -584,6 +587,8 @@ namespace dxvk {
|
||||
|
||||
DxvkResourceAllocation* m_next = nullptr;
|
||||
|
||||
void destroyBufferViews();
|
||||
|
||||
void free();
|
||||
|
||||
static force_inline uint64_t getIncrement(DxvkAccess access) {
|
||||
@ -765,10 +770,12 @@ namespace dxvk {
|
||||
* context classes in order to reduce lock contention.
|
||||
*/
|
||||
class DxvkLocalAllocationCache {
|
||||
friend class DxvkMemoryAllocator;
|
||||
constexpr static VkDeviceSize PoolCapacityInBytes = 4u * DxvkPageAllocator::PageSize;
|
||||
|
||||
friend DxvkMemoryAllocator;
|
||||
public:
|
||||
constexpr static uint32_t PoolCount = 8u;
|
||||
constexpr static uint32_t MinAllocationCountPerPool = 8u;
|
||||
// Cache allocations up to 128 kiB
|
||||
constexpr static uint32_t PoolCount = 10u;
|
||||
|
||||
constexpr static VkDeviceSize MinSize = DxvkPoolAllocator::MinSize;
|
||||
constexpr static VkDeviceSize MaxSize = MinSize << (PoolCount - 1u);
|
||||
@ -816,6 +823,24 @@ namespace dxvk {
|
||||
static uint32_t computePreferredAllocationCount(
|
||||
VkDeviceSize size);
|
||||
|
||||
/**
|
||||
* \brief Computes pool index for a given allocation size
|
||||
*
|
||||
* \param [in] size Allocation size
|
||||
* \returns Pool index
|
||||
*/
|
||||
static uint32_t computePoolIndex(
|
||||
VkDeviceSize size);
|
||||
|
||||
/**
|
||||
* \brief Computes allocation size for a given index
|
||||
*
|
||||
* \param [in] poolIndex Pool index
|
||||
* \returns Allocation size for the pool
|
||||
*/
|
||||
static VkDeviceSize computeAllocationSize(
|
||||
uint32_t index);
|
||||
|
||||
private:
|
||||
|
||||
DxvkMemoryAllocator* m_allocator = nullptr;
|
||||
@ -832,8 +857,74 @@ namespace dxvk {
|
||||
|
||||
void freeCache();
|
||||
|
||||
static uint32_t computePoolIndex(
|
||||
VkDeviceSize size);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Shared allocation cache
|
||||
*
|
||||
* Accumulates small allocations in free lists
|
||||
* that can be allocated in their entirety.
|
||||
*/
|
||||
class DxvkSharedAllocationCache {
|
||||
constexpr static uint32_t PoolCount = DxvkLocalAllocationCache::PoolCount;
|
||||
constexpr static uint32_t PoolSize = env::is32BitHostPlatform() ? 6u : 12u;
|
||||
|
||||
friend DxvkMemoryAllocator;
|
||||
public:
|
||||
|
||||
DxvkSharedAllocationCache(
|
||||
DxvkMemoryAllocator* allocator);
|
||||
|
||||
~DxvkSharedAllocationCache();
|
||||
|
||||
/**
|
||||
* \brief Retrieves list of cached allocations
|
||||
*
|
||||
* \param [in] allocationSize Required allocation size
|
||||
* \returns Pointer to head of allocation list,
|
||||
* or \c nullptr if the cache is empty.
|
||||
*/
|
||||
DxvkResourceAllocation* getAllocationList(
|
||||
VkDeviceSize allocationSize);
|
||||
|
||||
/**
|
||||
* \brief Frees cacheable allocation
|
||||
*
|
||||
* \param [in] allocation Allocation to free
|
||||
* \returns List to destroy if the cache is full. Usually,
|
||||
* \c nullptr if the allocation was successfully added.
|
||||
*/
|
||||
DxvkResourceAllocation* freeAllocation(
|
||||
DxvkResourceAllocation* allocation);
|
||||
|
||||
private:
|
||||
|
||||
struct FreeList {
|
||||
uint16_t size = 0u;
|
||||
uint16_t capacity = 0u;
|
||||
|
||||
DxvkResourceAllocation* head = nullptr;
|
||||
};
|
||||
|
||||
struct Pool {
|
||||
uint32_t listCount = 0u;
|
||||
std::array<DxvkResourceAllocation*, PoolSize> lists = { };
|
||||
high_resolution_clock::time_point drainTime = { };
|
||||
};
|
||||
|
||||
alignas(CACHE_LINE_SIZE)
|
||||
DxvkMemoryAllocator* m_allocator = nullptr;
|
||||
|
||||
dxvk::mutex m_freeMutex;
|
||||
std::array<FreeList, PoolCount> m_freeLists = { };
|
||||
|
||||
alignas(CACHE_LINE_SIZE)
|
||||
dxvk::mutex m_poolMutex;
|
||||
std::array<Pool, PoolCount> m_pools = { };
|
||||
|
||||
void cleanupUnusedFromLockedAllocator(
|
||||
high_resolution_clock::time_point time);
|
||||
|
||||
};
|
||||
|
||||
@ -848,6 +939,7 @@ namespace dxvk {
|
||||
friend DxvkMemory;
|
||||
friend DxvkResourceAllocation;
|
||||
friend DxvkLocalAllocationCache;
|
||||
friend DxvkSharedAllocationCache;
|
||||
|
||||
constexpr static uint64_t DedicatedChunkAddress = 1ull << 63u;
|
||||
|
||||
@ -1045,6 +1137,9 @@ namespace dxvk {
|
||||
void freeLocalCache(
|
||||
DxvkLocalAllocationCache* cache);
|
||||
|
||||
void freeCachedAllocations(
|
||||
DxvkResourceAllocation* allocation);
|
||||
|
||||
void freeCachedAllocationsLocked(
|
||||
DxvkResourceAllocation* allocation);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user