From 3faa1a76dad52f2c4eabb2f875f636a1f3a661c3 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Wed, 18 Sep 2024 16:47:27 +0200 Subject: [PATCH] [dxvk] Simplify memory chunk allocation Reduces overall waste of memory by reusing already allocated chunks more aggressively. --- src/dxvk/dxvk_buffer.cpp | 19 +---- src/dxvk/dxvk_image.cpp | 18 +---- src/dxvk/dxvk_memory.cpp | 170 +++++++++++++++++++-------------------- src/dxvk/dxvk_memory.h | 60 ++++---------- src/dxvk/dxvk_sparse.cpp | 4 +- 5 files changed, 101 insertions(+), 170 deletions(-) diff --git a/src/dxvk/dxvk_buffer.cpp b/src/dxvk/dxvk_buffer.cpp index 0b37a380d..5f3211d5d 100644 --- a/src/dxvk/dxvk_buffer.cpp +++ b/src/dxvk/dxvk_buffer.cpp @@ -126,24 +126,7 @@ namespace dxvk { memoryProperties.dedicated.buffer = handle.buffer; } - // Use high memory priority for GPU-writable resources - bool isGpuWritable = (m_info.access & ( - VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT)) != 0; - - DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable); - - if (isGpuWritable) - hints.set(DxvkMemoryFlag::GpuWritable); - - // Staging buffers that can't even be used as a transfer destinations - // are likely short-lived, so we should put them on a separate memory - // pool in order to avoid fragmentation - if ((DxvkBarrierSet::getAccessTypes(m_info.access) == DxvkAccess::Read) - && (m_info.usage & VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) - hints.set(DxvkMemoryFlag::Transient); - - handle.memory = m_memAlloc->alloc(memoryRequirements, memoryProperties, hints); + handle.memory = m_memAlloc->alloc(memoryRequirements, memoryProperties); if (!handle.buffer && (!handle.memory.buffer() || (handle.memory.getBufferUsage() & info.usage) != info.usage)) handle.buffer = createBuffer(info); diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp index 8ddbd477a..5c5f06fe4 100644 --- a/src/dxvk/dxvk_image.cpp +++ b/src/dxvk/dxvk_image.cpp @@ -97,20 +97,7 @@ namespace dxvk { memoryProperties.dedicated.image = m_image.image; } - // Use high memory priority for GPU-writable resources - bool isGpuWritable = (m_info.access & ( - VK_ACCESS_SHADER_WRITE_BIT | - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) != 0; - - DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable); - - if (isGpuWritable) - hints.set(DxvkMemoryFlag::GpuWritable); - - m_image.memory = memAlloc.alloc(memoryRequirements, memoryProperties, hints); + m_image.memory = memAlloc.alloc(memoryRequirements, memoryProperties); // Try to bind the allocated memory slice to the image if (m_vkd->vkBindImageMemory(m_vkd->device(), m_image.image, @@ -140,8 +127,7 @@ namespace dxvk { core.size = SparseMemoryPageSize * properties.metadataPageCount; core.alignment = SparseMemoryPageSize; - m_image.memory = memAlloc.alloc(memoryRequirements, - memoryProperties, DxvkMemoryFlag::GpuReadable); + m_image.memory = memAlloc.alloc(memoryRequirements, memoryProperties); } } } diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index ce58f8070..e37f84de5 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -66,9 +66,8 @@ namespace dxvk { DxvkMemoryChunk::DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, - DxvkDeviceMemory memory, - DxvkMemoryFlags hints) - : m_alloc(alloc), m_type(type), m_memory(memory), m_hints(hints), + DxvkDeviceMemory memory) + : m_alloc(alloc), m_type(type), m_memory(memory), m_pageAllocator(memory.memSize), m_poolAllocator(m_pageAllocator) { @@ -85,12 +84,25 @@ namespace dxvk { DxvkMemory DxvkMemoryChunk::alloc( VkMemoryPropertyFlags flags, VkDeviceSize size, - VkDeviceSize align, - DxvkMemoryFlags hints) { - // Property flags must be compatible. This could - // be refined a bit in the future if necessary. - if (m_memory.memFlags != flags || !checkHints(hints)) - return DxvkMemory(); + VkDeviceSize align) { + if (likely(!isEmpty())) { + // If the chunk is in use, only accept allocations that do or do not need + // host access depending on whether the chunk is currently mapped in order + // to reduce the total amount of address space consumed for mapped chunks. + VkMemoryPropertyFlags got = m_memory.memPointer + ? VkMemoryPropertyFlags(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + : VkMemoryPropertyFlags(0u); + + if ((flags ^ got) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + return DxvkMemory(); + } else { + // Lazily map or unmap the chunk depending on what the first allocation + // actually needs. + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + mapChunk(); + else + unmapChunk(); + } size = dxvk::align(size, align); @@ -123,25 +135,41 @@ namespace dxvk { } - bool DxvkMemoryChunk::isCompatible(const Rc& other) const { - return other->m_memory.memFlags == m_memory.memFlags && other->m_hints == m_hints; + void DxvkMemoryChunk::mapChunk() { + if (m_memory.memPointer) + return; + + auto vk = m_alloc->device()->vkd(); + + VkResult vr = vk->vkMapMemory(vk->device(), m_memory.memHandle, + 0, m_memory.memSize, 0, &m_memory.memPointer); + + if (vr != VK_SUCCESS) + throw DxvkError(str::format("Failed to map memory: ", vr)); + + Logger::debug(str::format("Mapped memory range 0x", std::hex, + reinterpret_cast(m_memory.memPointer), " - 0x", + reinterpret_cast(m_memory.memPointer) + m_memory.memSize)); } - bool DxvkMemoryChunk::checkHints(DxvkMemoryFlags hints) const { - DxvkMemoryFlags mask( - DxvkMemoryFlag::Small, - DxvkMemoryFlag::GpuReadable, - DxvkMemoryFlag::GpuWritable, - DxvkMemoryFlag::Transient); + void DxvkMemoryChunk::unmapChunk() { + if (!m_memory.memPointer) + return; - if (hints.test(DxvkMemoryFlag::IgnoreConstraints)) - mask = DxvkMemoryFlags(); + auto vk = m_alloc->device()->vkd(); + vk->vkUnmapMemory(vk->device(), m_memory.memHandle); - return (m_hints & mask) == (hints & mask); + Logger::debug(str::format("Unmapped memory range 0x", std::hex, + reinterpret_cast(m_memory.memPointer), " - 0x", + reinterpret_cast(m_memory.memPointer) + m_memory.memSize)); + + m_memory.memPointer = nullptr; } + + DxvkMemoryAllocator::DxvkMemoryAllocator(DxvkDevice* device) : m_device (device), m_memProps (device->adapter()->memoryProperties()) { @@ -173,26 +201,12 @@ namespace dxvk { DxvkMemory DxvkMemoryAllocator::alloc( DxvkMemoryRequirements req, - DxvkMemoryProperties info, - DxvkMemoryFlags hints) { + DxvkMemoryProperties info) { std::lock_guard lock(m_mutex); - // Keep small allocations together to avoid fragmenting - // chunks for larger resources with lots of small gaps, - // as well as resources with potentially weird lifetimes - if (req.core.memoryRequirements.size <= SmallAllocationThreshold) { - hints.set(DxvkMemoryFlag::Small); - hints.clr(DxvkMemoryFlag::GpuWritable, DxvkMemoryFlag::GpuReadable); - } - - // Ignore most hints for host-visible allocations since they - // usually don't make much sense for those resources - if (info.flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - hints = hints & DxvkMemoryFlag::Transient; - // If requested, try with a dedicated allocation first. if (info.dedicated.image || info.dedicated.buffer) { - DxvkMemory result = this->tryAlloc(req, info, hints); + DxvkMemory result = this->tryAlloc(req, info); if (result) return result; @@ -211,23 +225,13 @@ namespace dxvk { req.core.memoryRequirements.alignment = align(req.core.memoryRequirements.alignment, granularity); } - DxvkMemory result = this->tryAlloc(req, info, hints); - - if (result) - return result; - - // Retry without the hint constraints - hints.set(DxvkMemoryFlag::IgnoreConstraints); - result = this->tryAlloc(req, info, hints); + DxvkMemory result = this->tryAlloc(req, info); if (result) return result; } - // If that still didn't work, probe slower memory types as - // well, but re-enable restrictions to decrease fragmentation. - hints.clr(DxvkMemoryFlag::IgnoreConstraints); - + // If that still didn't work, probe slower memory types as well const VkMemoryPropertyFlags optionalFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; @@ -235,7 +239,7 @@ namespace dxvk { if (info.flags & optionalFlags) { info.flags &= ~optionalFlags; - DxvkMemory result = this->tryAlloc(req, info, hints); + DxvkMemory result = this->tryAlloc(req, info); if (result) return result; @@ -251,8 +255,7 @@ namespace dxvk { DxvkMemory DxvkMemoryAllocator::tryAlloc( const DxvkMemoryRequirements& req, - const DxvkMemoryProperties& info, - DxvkMemoryFlags hints) { + const DxvkMemoryProperties& info) { DxvkMemory result; for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) { @@ -263,7 +266,7 @@ namespace dxvk { result = this->tryAllocFromType(&m_memTypes[i], req.core.memoryRequirements.size, req.core.memoryRequirements.alignment, - info, hints); + info); } } @@ -275,12 +278,11 @@ namespace dxvk { DxvkMemoryType* type, VkDeviceSize size, VkDeviceSize align, - const DxvkMemoryProperties& info, - DxvkMemoryFlags hints) { + const DxvkMemoryProperties& info) { constexpr VkDeviceSize DedicatedAllocationThreshold = 3; VkDeviceSize chunkSize = pickChunkSize(type->memTypeId, - DedicatedAllocationThreshold * size, hints); + DedicatedAllocationThreshold * size); DxvkMemory memory; @@ -298,7 +300,7 @@ namespace dxvk { if (!needsDedicatedAlocation && (!wantsDedicatedAllocation || heapBudgedExceeded)) { // Attempt to suballocate from existing chunks first for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) - memory = type->chunks[i]->alloc(info.flags, size, align, hints); + memory = type->chunks[i]->alloc(info.flags, size, align); // If no existing chunk can accomodate the allocation, and if a dedicated // allocation is not preferred, create a new chunk and suballocate from it @@ -309,15 +311,15 @@ namespace dxvk { this->freeEmptyChunks(type->heap); for (uint32_t i = 0; i < 6 && (chunkSize >> i) >= size && !devMem.memHandle; i++) - devMem = tryAllocDeviceMemory(type, chunkSize >> i, info, hints); + devMem = tryAllocDeviceMemory(type, chunkSize >> i, info, true); if (devMem.memHandle) { - Rc chunk = new DxvkMemoryChunk(this, type, devMem, hints); - memory = chunk->alloc(info.flags, size, align, hints); + Rc chunk = new DxvkMemoryChunk(this, type, devMem); + memory = chunk->alloc(info.flags, size, align); type->chunks.push_back(std::move(chunk)); - adjustChunkSize(type->memTypeId, devMem.memSize, hints); + adjustChunkSize(type->memTypeId, devMem.memSize); } } } @@ -328,7 +330,7 @@ namespace dxvk { if (this->shouldFreeEmptyChunks(type->heap, size)) this->freeEmptyChunks(type->heap); - DxvkDeviceMemory devMem = this->tryAllocDeviceMemory(type, size, info, hints); + DxvkDeviceMemory devMem = this->tryAllocDeviceMemory(type, size, info, false); if (devMem.memHandle != VK_NULL_HANDLE) { memory = DxvkMemory(this, nullptr, type, @@ -349,31 +351,23 @@ namespace dxvk { DxvkMemoryType* type, VkDeviceSize size, DxvkMemoryProperties info, - DxvkMemoryFlags hints) { + bool isChunk) { auto vk = m_device->vkd(); bool useMemoryPriority = (info.flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && (m_device->features().extMemoryPriority.memoryPriority); - - float priority = 0.0f; - - if (hints.test(DxvkMemoryFlag::GpuReadable)) - priority = 0.5f; - if (hints.test(DxvkMemoryFlag::GpuWritable)) - priority = 1.0f; bool dedicated = info.dedicated.buffer || info.dedicated.image; DxvkDeviceMemory result; result.memSize = size; - result.memFlags = info.flags; - result.priority = priority; VkMemoryAllocateFlagsInfo memoryFlags = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO }; memoryFlags.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; - priorityInfo.priority = priority; + priorityInfo.priority = (type->memType.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + ? 0.0f : (dedicated ? 1.0f : 0.5f); VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; memoryInfo.allocationSize = size; @@ -396,18 +390,20 @@ namespace dxvk { if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memHandle)) return DxvkDeviceMemory(); - - if (info.flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - VkResult status = vk->vkMapMemory(vk->device(), result.memHandle, 0, VK_WHOLE_SIZE, 0, &result.memPointer); - if (status) { - Logger::err(str::format("DxvkMemoryAllocator: Mapping memory failed with ", status)); - vk->vkFreeMemory(vk->device(), result.memHandle, nullptr); - return DxvkDeviceMemory(); - } + if (!isChunk && (info.flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + VkResult vr = vk->vkMapMemory(vk->device(), result.memHandle, + 0, result.memSize, 0, &result.memPointer); + + if (vr != VK_SUCCESS) + throw DxvkError(str::format("Failed to map memory: ", vr)); + + Logger::debug(str::format("Mapped memory range 0x", std::hex, + reinterpret_cast(result.memPointer), " - 0x", + reinterpret_cast(result.memPointer) + result.memSize)); } - if (type->bufferUsage && !dedicated) { + if (type->bufferUsage && isChunk) { VkBuffer buffer = VK_NULL_HANDLE; VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; @@ -507,7 +503,7 @@ namespace dxvk { } - VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, VkDeviceSize requiredSize, DxvkMemoryFlags hints) const { + VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, VkDeviceSize requiredSize) const { VkMemoryType type = m_memProps.memoryTypes[memTypeId]; VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex]; @@ -516,9 +512,6 @@ namespace dxvk { while (chunkSize < requiredSize && chunkSize < MaxChunkSize) chunkSize <<= 1u; - if (hints.test(DxvkMemoryFlag::Small)) - chunkSize = std::min(chunkSize, 16 << 20); - // Try to waste a bit less system memory especially in // 32-bit applications due to address space constraints if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) @@ -535,14 +528,13 @@ namespace dxvk { void DxvkMemoryAllocator::adjustChunkSize( uint32_t memTypeId, - VkDeviceSize allocatedSize, - DxvkMemoryFlags hints) { + VkDeviceSize allocatedSize) { VkDeviceSize chunkSize = m_memTypes[memTypeId].chunkSize; // Don't bump chunk size if we reached the maximum or if // we already were unable to allocate a full chunk. if (chunkSize <= allocatedSize && chunkSize <= m_memTypes[memTypeId].heap->stats.memoryAllocated) - m_memTypes[memTypeId].chunkSize = pickChunkSize(memTypeId, chunkSize * 2, DxvkMemoryFlags()); + m_memTypes[memTypeId].chunkSize = pickChunkSize(memTypeId, chunkSize * 2); } @@ -562,7 +554,7 @@ namespace dxvk { uint32_t numEmptyChunks = 0; for (const auto& c : type->chunks) { - if (c != chunk && c->isEmpty() && c->isCompatible(chunk)) + if (c != chunk && c->isEmpty()) numEmptyChunks += 1; } diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index fae4ad14d..739abcb96 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -54,8 +54,6 @@ namespace dxvk { VkDeviceMemory memHandle = VK_NULL_HANDLE; void* memPointer = nullptr; VkDeviceSize memSize = 0; - VkMemoryPropertyFlags memFlags = 0; - float priority = 0.0f; }; @@ -199,23 +197,6 @@ namespace dxvk { void free(); }; - - - /** - * \brief Memory allocation flags - * - * Used to batch similar allocations into the same - * set of chunks, which may help with fragmentation. - */ - enum class DxvkMemoryFlag : uint32_t { - Small = 0, ///< Small allocation - GpuReadable = 1, ///< Medium-priority resource - GpuWritable = 2, ///< High-priority resource - Transient = 3, ///< Resource is short-lived - IgnoreConstraints = 4, ///< Ignore most allocation flags - }; - - using DxvkMemoryFlags = Flags; /** @@ -231,8 +212,7 @@ namespace dxvk { DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, - DxvkDeviceMemory memory, - DxvkMemoryFlags m_hints); + DxvkDeviceMemory memory); ~DxvkMemoryChunk(); @@ -258,8 +238,7 @@ namespace dxvk { DxvkMemory alloc( VkMemoryPropertyFlags flags, VkDeviceSize size, - VkDeviceSize align, - DxvkMemoryFlags hints); + VkDeviceSize align); /** * \brief Frees memory @@ -280,24 +259,19 @@ namespace dxvk { */ bool isEmpty() const; - /** - * \brief Checks whether hints and flags of another chunk match - * \param [in] other The chunk to compare to - */ - bool isCompatible(const Rc& other) const; - private: DxvkMemoryAllocator* m_alloc; DxvkMemoryType* m_type; DxvkDeviceMemory m_memory; - DxvkMemoryFlags m_hints; DxvkPageAllocator m_pageAllocator; DxvkPoolAllocator m_poolAllocator; - bool checkHints(DxvkMemoryFlags hints) const; - + void mapChunk(); + + void unmapChunk(); + }; @@ -341,6 +315,10 @@ namespace dxvk { DxvkMemoryAllocator(DxvkDevice* device); ~DxvkMemoryAllocator(); + DxvkDevice* device() const { + return m_device; + } + /** * \brief Memory type mask for sparse resources * \returns Sparse resource memory types @@ -354,13 +332,11 @@ namespace dxvk { * * \param [in] req Memory requirements * \param [in] info Memory properties - * \param [in] hints Memory hints * \returns Allocated memory slice */ DxvkMemory alloc( DxvkMemoryRequirements req, - DxvkMemoryProperties info, - DxvkMemoryFlags hints); + DxvkMemoryProperties info); /** * \brief Queries memory stats @@ -411,21 +387,19 @@ namespace dxvk { DxvkMemory tryAlloc( const DxvkMemoryRequirements& req, - const DxvkMemoryProperties& info, - DxvkMemoryFlags hints); + const DxvkMemoryProperties& info); DxvkMemory tryAllocFromType( DxvkMemoryType* type, VkDeviceSize size, VkDeviceSize align, - const DxvkMemoryProperties& info, - DxvkMemoryFlags hints); + const DxvkMemoryProperties& info); DxvkDeviceMemory tryAllocDeviceMemory( DxvkMemoryType* type, VkDeviceSize size, DxvkMemoryProperties info, - DxvkMemoryFlags hints); + bool isChunk); void free( const DxvkMemory& memory); @@ -442,13 +416,11 @@ namespace dxvk { VkDeviceSize pickChunkSize( uint32_t memTypeId, - VkDeviceSize requiredSize, - DxvkMemoryFlags hints) const; + VkDeviceSize requiredSize) const; void adjustChunkSize( uint32_t memTypeId, - VkDeviceSize allocatedSize, - DxvkMemoryFlags hints); + VkDeviceSize allocatedSize); bool shouldFreeChunk( const DxvkMemoryType* type, diff --git a/src/dxvk/dxvk_sparse.cpp b/src/dxvk/dxvk_sparse.cpp index a8547bcfe..7948c0658 100644 --- a/src/dxvk/dxvk_sparse.cpp +++ b/src/dxvk/dxvk_sparse.cpp @@ -153,9 +153,7 @@ namespace dxvk { DxvkMemoryProperties memoryProperties = { }; memoryProperties.flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - DxvkMemory memory = m_memory->alloc(memoryRequirements, - memoryProperties, DxvkMemoryFlag::GpuReadable); - + DxvkMemory memory = m_memory->alloc(memoryRequirements, memoryProperties); return new DxvkSparsePage(std::move(memory)); }