1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-19 14:52:10 +01:00

[dxvk] Implement local allocation cache

This commit is contained in:
Philip Rebohle 2024-09-24 12:01:43 +02:00 committed by Philip Rebohle
parent 93547aec8d
commit 4db0007af3
5 changed files with 326 additions and 14 deletions

View File

@ -329,16 +329,28 @@ namespace dxvk {
/**
* \brief Allocates new buffer slice
* \returns The new buffer slice
* \returns The new backing resource
*/
Rc<DxvkResourceAllocation> allocateSlice() {
return allocateSlice(nullptr);
}
/**
* \brief Allocates new buffer slice with cache
*
* Uses the given cache to service small allocations without
* having to block the actual allocator if possible.
* \param [in] cache Optional allocation cache
* \returns The new buffer slice
*/
Rc<DxvkResourceAllocation> allocateSlice(DxvkLocalAllocationCache* cache) {
VkBufferCreateInfo info = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
info.flags = m_info.flags;
info.usage = m_info.usage;
info.size = m_info.size;
info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
return m_allocator->createBufferResource(info, m_properties);
return m_allocator->createBufferResource(info, m_properties, cache);
}
/**

View File

@ -182,8 +182,15 @@ namespace dxvk {
const DxvkSamplerCreateInfo& createInfo) {
return new DxvkSampler(this, createInfo);
}
DxvkLocalAllocationCache DxvkDevice::createAllocationCache(
VkBufferUsageFlags bufferUsage,
VkMemoryPropertyFlags propertyFlags) {
return m_objects.memoryManager().createAllocationCache(bufferUsage, propertyFlags);
}
Rc<DxvkSparsePageAllocator> DxvkDevice::createSparsePageAllocator() {
return new DxvkSparsePageAllocator(m_objects.memoryManager());
}

View File

@ -343,6 +343,17 @@ namespace dxvk {
Rc<DxvkSampler> createSampler(
const DxvkSamplerCreateInfo& createInfo);
/**
* \brief Creates local allocation cache
*
* \param [in] bufferUsage Required buffer usage
* \param [in] propertyFlags Memory properties
* \returns Allocation cache object
*/
DxvkLocalAllocationCache createAllocationCache(
VkBufferUsageFlags bufferUsage,
VkMemoryPropertyFlags propertyFlags);
/**
* \brief Creates a sparse page allocator
* \returns Sparse page allocator

View File

@ -200,6 +200,57 @@ namespace dxvk {
DxvkResourceAllocation* DxvkLocalAllocationCache::allocateFromCache(
VkDeviceSize size) {
uint32_t poolIndex = computePoolIndex(size);
DxvkResourceAllocation* allocation = m_pools[poolIndex];
if (!allocation)
return nullptr;
m_pools[poolIndex] = allocation->m_next;
allocation->m_next = nullptr;
return allocation;
}
DxvkResourceAllocation* DxvkLocalAllocationCache::assignCache(
VkDeviceSize size,
DxvkResourceAllocation* allocation) {
uint32_t poolIndex = computePoolIndex(size);
return std::exchange(m_pools[poolIndex], allocation);
}
void DxvkLocalAllocationCache::freeCache() {
if (m_allocator)
m_allocator->freeLocalCache(this);
}
uint32_t DxvkLocalAllocationCache::computePreferredAllocationCount(
VkDeviceSize size) {
uint32_t poolIndex = computePoolIndex(size);
uint32_t count = (DxvkPageAllocator::PageSize / MinSize) >> poolIndex;
return std::max(count, MinAllocationCountPerPool);
}
uint32_t DxvkLocalAllocationCache::computePoolIndex(
VkDeviceSize size) {
return 64u - bit::lzcnt((std::max(size, MinSize) - 1u) / MinSize);
}
VkDeviceSize DxvkLocalAllocationCache::computeAllocationSize(
uint32_t index) {
return MinSize << index;
}
DxvkMemoryAllocator::DxvkMemoryAllocator(DxvkDevice* device)
: m_device(device) {
VkPhysicalDeviceMemoryProperties memInfo = device->adapter()->memoryProperties();
@ -418,7 +469,8 @@ namespace dxvk {
Rc<DxvkResourceAllocation> DxvkMemoryAllocator::createBufferResource(
const VkBufferCreateInfo& createInfo,
VkMemoryPropertyFlags properties) {
VkMemoryPropertyFlags properties,
DxvkLocalAllocationCache* allocationCache) {
Rc<DxvkResourceAllocation> allocation;
if (likely(!createInfo.flags && createInfo.sharingMode == VK_SHARING_MODE_EXCLUSIVE)) {
@ -430,10 +482,26 @@ namespace dxvk {
if (unlikely(createInfo.usage & ~m_globalBufferUsageFlags))
memoryRequirements.memoryTypeBits = findGlobalBufferMemoryTypeMask(createInfo.usage);
// If there is at least one memory type that supports the required
// buffer usage flags and requested memory properties, suballocate
// from a global buffer.
if (likely(memoryRequirements.memoryTypeBits)) {
// If the given allocation cache supports the memory types and usage
// flags that we need, try to use it to service this allocation.
if (allocationCache && createInfo.size <= DxvkLocalAllocationCache::MaxSize
&& allocationCache->m_memoryTypes && !(allocationCache->m_memoryTypes & ~memoryRequirements.memoryTypeBits)) {
allocation = allocationCache->allocateFromCache(createInfo.size);
if (likely(allocation))
return allocation;
// If the cache is currently empty for the required allocation size,
// make sure it's not. This will also initialize the shared caches
// for any relevant memory pools as necessary.
if (refillAllocationCache(allocationCache, memoryRequirements, properties))
return allocationCache->allocateFromCache(createInfo.size);
}
// If there is at least one memory type that supports the required
// buffer usage flags and requested memory properties, suballocate
// from a global buffer.
allocation = allocateMemory(memoryRequirements, properties);
if (likely(allocation && allocation->m_buffer))
@ -630,6 +698,19 @@ namespace dxvk {
}
DxvkLocalAllocationCache DxvkMemoryAllocator::createAllocationCache(
VkBufferUsageFlags bufferUsage,
VkMemoryPropertyFlags properties) {
uint32_t memoryTypeMask = m_globalBufferMemoryTypes;
if (bufferUsage & ~m_globalBufferUsageFlags)
memoryTypeMask = findGlobalBufferMemoryTypeMask(bufferUsage);
memoryTypeMask &= getMemoryTypeMask(properties);
return DxvkLocalAllocationCache(this, memoryTypeMask);
}
DxvkDeviceMemory DxvkMemoryAllocator::allocateDeviceMemory(
DxvkMemoryType& type,
VkDeviceSize size,
@ -775,7 +856,7 @@ namespace dxvk {
}
Rc<DxvkResourceAllocation> DxvkMemoryAllocator::createAllocation(
DxvkResourceAllocation* DxvkMemoryAllocator::createAllocation(
DxvkMemoryType& type,
DxvkMemoryPool& pool,
VkDeviceSize address,
@ -808,7 +889,7 @@ namespace dxvk {
}
Rc<DxvkResourceAllocation> DxvkMemoryAllocator::createAllocation(
DxvkResourceAllocation* DxvkMemoryAllocator::createAllocation(
DxvkMemoryType& type,
const DxvkDeviceMemory& memory) {
type.stats.memoryUsed += memory.size;
@ -865,6 +946,34 @@ namespace dxvk {
}
void DxvkMemoryAllocator::freeLocalCache(
DxvkLocalAllocationCache* cache) {
std::unique_lock lock(m_mutex);
for (size_t i = 0; i < cache->m_pools.size(); i++)
freeCachedAllocationsLocked(std::exchange(cache->m_pools[i], nullptr));
}
void DxvkMemoryAllocator::freeCachedAllocationsLocked(
DxvkResourceAllocation* allocation) {
while (allocation) {
auto& pool = allocation->m_mapPtr
? allocation->m_type->mappedPool
: allocation->m_type->devicePool;
// Cached allocations may have a reference count of 0, but they
// still own the memory, so make sure to release it here.
allocation->m_type->stats.memoryUsed -= allocation->m_size;
if (unlikely(pool.free(allocation->m_address, allocation->m_size)))
freeEmptyChunksInPool(*allocation->m_type, pool, 0, high_resolution_clock::now());
m_allocationPool.free(std::exchange(allocation, allocation->m_next));
}
}
void DxvkMemoryAllocator::freeEmptyChunksInHeap(
const DxvkMemoryHeap& heap,
VkDeviceSize allocationSize,
@ -995,6 +1104,68 @@ namespace dxvk {
}
bool DxvkMemoryAllocator::refillAllocationCache(
DxvkLocalAllocationCache* cache,
const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags properties) {
VkDeviceSize allocationSize = (VkDeviceSize(-1) >> bit::lzcnt(requirements.size - 1u)) + 1u;
allocationSize = std::max(allocationSize, DxvkLocalAllocationCache::MinSize);
// TODO implement shared caches per memory pool
// No suitable allocations available from the shared cache, create some
// new ones so that subsequent allocations of this size category can be
// handled without locking the allocator.
uint32_t allocationCount = DxvkLocalAllocationCache::computePreferredAllocationCount(allocationSize);
DxvkResourceAllocation* head = nullptr;
DxvkResourceAllocation* tail = nullptr;
std::unique_lock lock(m_mutex);
for (auto typeIndex : bit::BitMask(cache->m_memoryTypes)) {
auto& pool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
? m_memTypes[typeIndex].mappedPool
: m_memTypes[typeIndex].devicePool;
while (allocationCount) {
// Try to suballocate from existing chunks, but do not create
// any new chunks. Let the regular code path handle that case
// as necessary.
int64_t address = pool.alloc(allocationSize, requirements.alignment);
if (address < 0)
break;
// Add allocation to the list and mark it as cacheable,
// so it will get recycled as-is after use.
DxvkResourceAllocation* allocation = createAllocation(
m_memTypes[typeIndex], pool, address, allocationSize);
allocation->m_flags.set(DxvkAllocationFlag::Cacheable);
if (tail) {
tail->m_next = allocation;
tail = allocation;
} else {
head = allocation;
tail = allocation;
}
allocationCount--;
}
if (!allocationCount)
break;
}
if (!tail)
return false;
tail->m_next = cache->assignCache(allocationSize, head);
return true;
}
void DxvkMemoryAllocator::getAllocationStatsForPool(
const DxvkMemoryType& type,
const DxvkMemoryPool& pool,

View File

@ -420,6 +420,7 @@ namespace dxvk {
OwnsMemory = 0,
OwnsBuffer = 1,
OwnsImage = 2,
Cacheable = 3,
};
using DxvkAllocationFlags = Flags<DxvkAllocationFlag>;
@ -434,6 +435,8 @@ namespace dxvk {
*/
class alignas(CACHE_LINE_SIZE) DxvkResourceAllocation {
friend DxvkMemoryAllocator;
friend class DxvkLocalAllocationCache;
friend class DxvkSharedAllocationCache;
friend class DxvkMemory;
public:
@ -579,6 +582,8 @@ namespace dxvk {
DxvkMemoryAllocator* m_allocator = nullptr;
DxvkMemoryType* m_type = nullptr;
DxvkResourceAllocation* m_next = nullptr;
void free();
static force_inline uint64_t getIncrement(DxvkAccess access) {
@ -604,7 +609,7 @@ namespace dxvk {
~DxvkResourceAllocationPool();
template<typename... Args>
Rc<DxvkResourceAllocation> create(Args&&... args) {
DxvkResourceAllocation* create(Args&&... args) {
return new (alloc()) DxvkResourceAllocation(std::forward<Args>(args)...);
}
@ -752,6 +757,87 @@ namespace dxvk {
};
/**
* \brief Local allocation cache
*
* Provides pre-allocated memory of supported power-of-two sizes
* in a non-thread safe manner. This is intended to be used for
* context classes in order to reduce lock contention.
*/
class DxvkLocalAllocationCache {
friend class DxvkMemoryAllocator;
public:
constexpr static uint32_t PoolCount = 8u;
constexpr static uint32_t MinAllocationCountPerPool = 8u;
constexpr static VkDeviceSize MinSize = DxvkPoolAllocator::MinSize;
constexpr static VkDeviceSize MaxSize = MinSize << (PoolCount - 1u);
DxvkLocalAllocationCache() = default;
DxvkLocalAllocationCache(
DxvkMemoryAllocator* allocator,
uint32_t memoryTypes)
: m_allocator(allocator), m_memoryTypes(memoryTypes) { }
DxvkLocalAllocationCache(DxvkLocalAllocationCache&& other)
: m_allocator(other.m_allocator), m_memoryTypes(other.m_memoryTypes),
m_pools(other.m_pools) {
other.m_allocator = nullptr;
other.m_memoryTypes = 0u;
other.m_pools = { };
}
DxvkLocalAllocationCache& operator = (DxvkLocalAllocationCache&& other) {
freeCache();
m_allocator = other.m_allocator;
m_memoryTypes = other.m_memoryTypes;
m_pools = other.m_pools;
other.m_allocator = nullptr;
other.m_memoryTypes = 0u;
other.m_pools = { };
return *this;
}
~DxvkLocalAllocationCache() {
freeCache();
}
/**
* \brief Computes preferred number of cached allocations
*
* Depends on size so that a large enough number of consecutive
* allocations can be handled by the local cache without wasting
* too much memory on larger allocations.
* \param [in] size Allocation size
*/
static uint32_t computePreferredAllocationCount(
VkDeviceSize size);
private:
DxvkMemoryAllocator* m_allocator = nullptr;
uint32_t m_memoryTypes = 0u;
std::array<DxvkResourceAllocation*, PoolCount> m_pools = { };
DxvkResourceAllocation* allocateFromCache(
VkDeviceSize size);
DxvkResourceAllocation* assignCache(
VkDeviceSize size,
DxvkResourceAllocation* allocation);
void freeCache();
static uint32_t computePoolIndex(
VkDeviceSize size);
};
/**
* \brief Memory allocator
*
@ -761,6 +847,7 @@ namespace dxvk {
class DxvkMemoryAllocator {
friend DxvkMemory;
friend DxvkResourceAllocation;
friend DxvkLocalAllocationCache;
constexpr static uint64_t DedicatedChunkAddress = 1ull << 63u;
@ -836,11 +923,13 @@ namespace dxvk {
* may fall back to creating a dedicated Vulkan buffer.
* \param [in] createInfo Buffer create info
* \param [in] properties Memory property flags
* \param [in] allocationCache Optional allocation cache
* \returns Buffer resource
*/
Rc<DxvkResourceAllocation> createBufferResource(
const VkBufferCreateInfo& createInfo,
VkMemoryPropertyFlags properties);
VkMemoryPropertyFlags properties,
DxvkLocalAllocationCache* allocationCache);
/**
* \brief Creates image resource
@ -855,6 +944,17 @@ namespace dxvk {
VkMemoryPropertyFlags properties,
const void* next);
/**
* \brief Creates local allocation cache for buffer resources
*
* \param [in] bufferUsage Required buffer usage flags
* \param [in] properties Required memory properties
* \returns Local allocation cache
*/
DxvkLocalAllocationCache createAllocationCache(
VkBufferUsageFlags bufferUsage,
VkMemoryPropertyFlags properties);
/**
* \brief Queries memory stats
*
@ -942,6 +1042,12 @@ namespace dxvk {
void freeAllocation(
DxvkResourceAllocation* allocation);
void freeLocalCache(
DxvkLocalAllocationCache* cache);
void freeCachedAllocationsLocked(
DxvkResourceAllocation* allocation);
uint32_t countEmptyChunksInPool(
const DxvkMemoryPool& pool) const;
@ -965,16 +1071,21 @@ namespace dxvk {
DxvkDeviceMemory& memory,
VkMemoryPropertyFlags properties);
Rc<DxvkResourceAllocation> createAllocation(
DxvkResourceAllocation* createAllocation(
DxvkMemoryType& type,
DxvkMemoryPool& pool,
VkDeviceSize address,
VkDeviceSize size);
Rc<DxvkResourceAllocation> createAllocation(
DxvkResourceAllocation* createAllocation(
DxvkMemoryType& type,
const DxvkDeviceMemory& memory);
bool refillAllocationCache(
DxvkLocalAllocationCache* cache,
const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags properties);
void getAllocationStatsForPool(
const DxvkMemoryType& type,
const DxvkMemoryPool& pool,