diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp index 5c5f06fe4..d5142e171 100644 --- a/src/dxvk/dxvk_image.cpp +++ b/src/dxvk/dxvk_image.cpp @@ -76,27 +76,29 @@ namespace dxvk { DxvkMemoryProperties memoryProperties = { }; memoryProperties.flags = m_memFlags; + if (memoryRequirements.dedicated.prefersDedicatedAllocation || m_shared) { + memoryProperties.dedicated = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO }; + memoryProperties.dedicated.image = m_image.image; + } + if (m_shared) { memoryRequirements.dedicated.prefersDedicatedAllocation = VK_TRUE; memoryRequirements.dedicated.requiresDedicatedAllocation = VK_TRUE; if (createInfo.sharing.mode == DxvkSharedHandleMode::Export) { memoryProperties.sharedExport = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO }; + memoryProperties.sharedExport.pNext = std::exchange(memoryProperties.dedicated.pNext, &memoryProperties.sharedExport); memoryProperties.sharedExport.handleTypes = createInfo.sharing.type; } if (createInfo.sharing.mode == DxvkSharedHandleMode::Import) { memoryProperties.sharedImportWin32 = { VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR }; + memoryProperties.sharedImportWin32.pNext = std::exchange(memoryProperties.dedicated.pNext, &memoryProperties.sharedImportWin32); memoryProperties.sharedImportWin32.handleType = createInfo.sharing.type; memoryProperties.sharedImportWin32.handle = createInfo.sharing.handle; } } - if (memoryRequirements.dedicated.prefersDedicatedAllocation) { - memoryProperties.dedicated = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO }; - memoryProperties.dedicated.image = m_image.image; - } - m_image.memory = memAlloc.alloc(memoryRequirements, memoryProperties); // Try to bind the allocated memory slice to the image diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index 436ab04fb..5836451da 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -10,30 +10,29 @@ namespace dxvk { DxvkMemory::DxvkMemory() { } DxvkMemory::DxvkMemory( DxvkMemoryAllocator* alloc, - DxvkMemoryChunk* chunk, DxvkMemoryType* type, VkBuffer buffer, VkDeviceMemory memory, - VkDeviceSize offset, + VkDeviceSize address, VkDeviceSize length, void* mapPtr) : m_alloc (alloc), - m_chunk (chunk), m_type (type), m_buffer (buffer), m_memory (memory), - m_offset (offset), + m_address (address), m_length (length), - m_mapPtr (mapPtr) { } + m_mapPtr (mapPtr) { + + } DxvkMemory::DxvkMemory(DxvkMemory&& other) : m_alloc (std::exchange(other.m_alloc, nullptr)), - m_chunk (std::exchange(other.m_chunk, nullptr)), m_type (std::exchange(other.m_type, nullptr)), m_buffer (std::exchange(other.m_buffer, VkBuffer(VK_NULL_HANDLE))), m_memory (std::exchange(other.m_memory, VkDeviceMemory(VK_NULL_HANDLE))), - m_offset (std::exchange(other.m_offset, 0)), + m_address (std::exchange(other.m_address, 0)), m_length (std::exchange(other.m_length, 0)), m_mapPtr (std::exchange(other.m_mapPtr, nullptr)) { } @@ -41,11 +40,10 @@ namespace dxvk { DxvkMemory& DxvkMemory::operator = (DxvkMemory&& other) { this->free(); m_alloc = std::exchange(other.m_alloc, nullptr); - m_chunk = std::exchange(other.m_chunk, nullptr); m_type = std::exchange(other.m_type, nullptr); m_buffer = std::exchange(other.m_buffer, VkBuffer(VK_NULL_HANDLE)); m_memory = std::exchange(other.m_memory, VkDeviceMemory(VK_NULL_HANDLE)); - m_offset = std::exchange(other.m_offset, 0); + m_address = std::exchange(other.m_address, 0); m_length = std::exchange(other.m_length, 0); m_mapPtr = std::exchange(other.m_mapPtr, nullptr); return *this; @@ -61,141 +59,38 @@ namespace dxvk { if (m_alloc != nullptr) m_alloc->free(*this); } - - - DxvkMemoryChunk::DxvkMemoryChunk( - DxvkMemoryAllocator* alloc, - DxvkMemoryType* type, - DxvkDeviceMemory memory) - : m_alloc(alloc), m_type(type), m_memory(memory), - m_poolAllocator(m_pageAllocator) { - m_pageAllocator.addChunk(memory.memSize); - } - - - DxvkMemoryChunk::~DxvkMemoryChunk() { - // This call is technically not thread-safe, but it - // doesn't need to be since we don't free chunks - m_alloc->freeDeviceMemory(m_type, m_memory); - } - - - DxvkMemory DxvkMemoryChunk::alloc( - VkMemoryPropertyFlags flags, - VkDeviceSize size, - VkDeviceSize align) { - if (likely(!isEmpty())) { - // If the chunk is in use, only accept allocations that do or do not need - // host access depending on whether the chunk is currently mapped in order - // to reduce the total amount of address space consumed for mapped chunks. - VkMemoryPropertyFlags got = m_memory.memPointer - ? VkMemoryPropertyFlags(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - : VkMemoryPropertyFlags(0u); - - if ((flags ^ got) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - return DxvkMemory(); - } else { - // Lazily map or unmap the chunk depending on what the first allocation - // actually needs. - if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - mapChunk(); - else - unmapChunk(); - } - - size = dxvk::align(size, align); - - int64_t address = size <= DxvkPoolAllocator::MaxSize - ? m_poolAllocator.alloc(size) - : m_pageAllocator.alloc(size, align); - - if (address < 0) - return DxvkMemory(); - - // Create the memory object with the aligned slice - return DxvkMemory(m_alloc, this, m_type, - m_memory.buffer, m_memory.memHandle, address, size, - reinterpret_cast(m_memory.memPointer) + address); - } - - - void DxvkMemoryChunk::free( - VkDeviceSize offset, - VkDeviceSize length) { - if (length <= DxvkPoolAllocator::MaxSize) - m_poolAllocator.free(offset, length); - else - m_pageAllocator.free(offset, length); - } - - - bool DxvkMemoryChunk::isEmpty() const { - return m_pageAllocator.pagesUsed(0u) == 0u; - } - - - void DxvkMemoryChunk::getAllocationStats(DxvkMemoryAllocationStats& stats) const { - auto& chunkStats = stats.chunks.emplace_back(); - chunkStats.capacity = uint64_t(m_pageAllocator.pageCount(0u)) * DxvkPageAllocator::PageSize; - chunkStats.used = uint64_t(m_pageAllocator.pagesUsed(0u)) * DxvkPageAllocator::PageSize; - chunkStats.pageMaskOffset = stats.pageMasks.size(); - chunkStats.pageCount = m_pageAllocator.pageCount(0u); - - stats.pageMasks.resize(chunkStats.pageMaskOffset + (chunkStats.pageCount + 31u) / 32u); - m_pageAllocator.getPageAllocationMask(0u, &stats.pageMasks.at(chunkStats.pageMaskOffset)); - } - - - void DxvkMemoryChunk::mapChunk() { - if (m_memory.memPointer) - return; - - auto vk = m_alloc->device()->vkd(); - - VkResult vr = vk->vkMapMemory(vk->device(), m_memory.memHandle, - 0, m_memory.memSize, 0, &m_memory.memPointer); - - if (vr != VK_SUCCESS) - throw DxvkError(str::format("Failed to map memory: ", vr)); - - Logger::debug(str::format("Mapped memory range 0x", std::hex, - reinterpret_cast(m_memory.memPointer), " - 0x", - reinterpret_cast(m_memory.memPointer) + m_memory.memSize)); - } - - - void DxvkMemoryChunk::unmapChunk() { - if (!m_memory.memPointer) - return; - - auto vk = m_alloc->device()->vkd(); - vk->vkUnmapMemory(vk->device(), m_memory.memHandle); - - Logger::debug(str::format("Unmapped memory range 0x", std::hex, - reinterpret_cast(m_memory.memPointer), " - 0x", - reinterpret_cast(m_memory.memPointer) + m_memory.memSize)); - - m_memory.memPointer = nullptr; - } DxvkMemoryAllocator::DxvkMemoryAllocator(DxvkDevice* device) - : m_device (device), - m_memProps (device->adapter()->memoryProperties()) { - for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) - m_memHeaps[i].properties = m_memProps.memoryHeaps[i]; - - for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { - m_memTypes[i].heap = &m_memHeaps[m_memProps.memoryTypes[i].heapIndex]; - m_memTypes[i].heapId = m_memProps.memoryTypes[i].heapIndex; - m_memTypes[i].memType = m_memProps.memoryTypes[i]; - m_memTypes[i].memTypeId = i; - m_memTypes[i].chunkSize = MinChunkSize; - m_memTypes[i].bufferUsage = 0; + : m_device(device) { + VkPhysicalDeviceMemoryProperties memInfo = device->adapter()->memoryProperties(); + + m_memTypeCount = memInfo.memoryTypeCount; + m_memHeapCount = memInfo.memoryHeapCount; + + for (uint32_t i = 0; i < m_memHeapCount; i++) { + auto& heap = m_memHeaps[i]; + + heap.index = i; + heap.properties = memInfo.memoryHeaps[i]; } + for (uint32_t i = 0; i < m_memTypeCount; i++) { + auto& type = m_memTypes[i]; + + type.index = i; + type.properties = memInfo.memoryTypes[i]; + type.heap = &m_memHeaps[type.properties.heapIndex]; + type.heap->memoryTypes |= 1u << i; + + type.devicePool.maxChunkSize = determineMaxChunkSize(type, false); + type.mappedPool.maxChunkSize = determineMaxChunkSize(type, true); + } + + determineMemoryTypesWithPropertyFlags(); + if (device->features().core.features.sparseBinding) m_sparseMemoryTypes = determineSparseMemoryTypes(device); @@ -204,220 +99,223 @@ namespace dxvk { DxvkMemoryAllocator::~DxvkMemoryAllocator() { - + auto vk = m_device->vkd(); + + for (uint32_t i = 0; i < m_memHeapCount; i++) + freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1)); } DxvkMemory DxvkMemoryAllocator::alloc( DxvkMemoryRequirements req, - DxvkMemoryProperties info) { - std::lock_guard lock(m_mutex); - - // If requested, try with a dedicated allocation first. - if (info.dedicated.image || info.dedicated.buffer) { - DxvkMemory result = this->tryAlloc(req, info); - - if (result) - return result; - } - - // If possible, retry without a dedicated allocation - if (!req.dedicated.requiresDedicatedAllocation) { - info.dedicated.image = VK_NULL_HANDLE; - info.dedicated.buffer = VK_NULL_HANDLE; - - // If we're allocating tiled image memory, ensure - // that it will not overlap with buffer memory. - if (req.tiling == VK_IMAGE_TILING_OPTIMAL) { - VkDeviceSize granularity = m_device->properties().core.properties.limits.bufferImageGranularity; - req.core.memoryRequirements.size = align(req.core.memoryRequirements.size, granularity); - req.core.memoryRequirements.alignment = align(req.core.memoryRequirements.alignment, granularity); - } - - DxvkMemory result = this->tryAlloc(req, info); - - if (result) - return result; - } - - // If that still didn't work, probe slower memory types as well - const VkMemoryPropertyFlags optionalFlags = - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - - if (info.flags & optionalFlags) { - info.flags &= ~optionalFlags; - - DxvkMemory result = this->tryAlloc(req, info); - - if (result) - return result; - } - - // We weren't able to allocate memory for this resource form any type - this->logMemoryError(req.core.memoryRequirements); - this->logMemoryStats(); - - throw DxvkError("DxvkMemoryAllocator: Memory allocation failed"); - } - - - DxvkMemory DxvkMemoryAllocator::tryAlloc( - const DxvkMemoryRequirements& req, const DxvkMemoryProperties& info) { - DxvkMemory result; - - for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) { - const bool supported = (req.core.memoryRequirements.memoryTypeBits & (1u << i)) != 0; - const bool adequate = (m_memTypes[i].memType.propertyFlags & info.flags) == info.flags; - - if (supported && adequate) { - result = this->tryAllocFromType(&m_memTypes[i], - req.core.memoryRequirements.size, - req.core.memoryRequirements.alignment, - info); - } + // Enforce that tiled images do not overlap with buffers in memory. + // Only changes anything for small images on older Nvidia hardware. + if (req.tiling == VK_IMAGE_TILING_OPTIMAL) { + req.core.memoryRequirements.alignment = std::max(req.core.memoryRequirements.alignment, + m_device->properties().core.properties.limits.bufferImageGranularity); } - - return result; - } - - - DxvkMemory DxvkMemoryAllocator::tryAllocFromType( - DxvkMemoryType* type, - VkDeviceSize size, - VkDeviceSize align, - const DxvkMemoryProperties& info) { - constexpr VkDeviceSize DedicatedAllocationThreshold = 3; - VkDeviceSize chunkSize = pickChunkSize(type->memTypeId, - DedicatedAllocationThreshold * size); + // If requested, try to create a dedicated allocation. If this + // fails, we may still fall back to a suballocation unless a + // dedicated allocation is explicitly required. + if (unlikely(info.dedicated.buffer || info.dedicated.image)) { + DxvkMemory memory = allocateDedicatedMemory( + req.core.memoryRequirements, info.flags, &info.dedicated); - DxvkMemory memory; + if (memory) + return memory; - // Require dedicated allocations for resources that use the Vulkan dedicated - // allocation bits, or are too large to fit into a single full-sized chunk - bool needsDedicatedAlocation = size >= chunkSize || info.dedicated.buffer || info.dedicated.image; - - // Prefer a dedicated allocation for very large resources in order to - // reduce fragmentation if a large number of those resources are in use - bool wantsDedicatedAllocation = DedicatedAllocationThreshold * size > chunkSize; - - // Try to reuse existing memory as much as possible in case the heap is nearly full - bool heapBudgedExceeded = 5 * type->stats.memoryUsed + size > 4 * type->heap->properties.size; - - if (!needsDedicatedAlocation && (!wantsDedicatedAllocation || heapBudgedExceeded)) { - // Attempt to suballocate from existing chunks first - for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) - memory = type->chunks[i]->alloc(info.flags, size, align); - - // If no existing chunk can accomodate the allocation, and if a dedicated - // allocation is not preferred, create a new chunk and suballocate from it - if (!memory && !wantsDedicatedAllocation) { - DxvkDeviceMemory devMem; - - if (this->shouldFreeEmptyChunks(type->heapId, chunkSize)) - this->freeEmptyChunks(type->heap); - - for (uint32_t i = 0; i < 6 && (chunkSize >> i) >= size && !devMem.memHandle; i++) - devMem = tryAllocDeviceMemory(type, chunkSize >> i, info, true); - - if (devMem.memHandle) { - Rc chunk = new DxvkMemoryChunk(this, type, devMem); - memory = chunk->alloc(info.flags, size, align); - - type->chunks.push_back(std::move(chunk)); - - adjustChunkSize(type->memTypeId, devMem.memSize); - } + if (req.dedicated.requiresDedicatedAllocation && (info.flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { + return allocateDedicatedMemory(req.core.memoryRequirements, + info.flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &info.dedicated); } } - // If a dedicated allocation is required or preferred and we haven't managed - // to suballocate any memory before, try to create a dedicated allocation - if (!memory && (needsDedicatedAlocation || wantsDedicatedAllocation)) { - if (this->shouldFreeEmptyChunks(type->heapId, size)) - this->freeEmptyChunks(type->heap); + // Suballocate memory from an existing chunk + DxvkMemory memory = allocateMemory(req.core.memoryRequirements, info.flags); - DxvkDeviceMemory devMem = this->tryAllocDeviceMemory(type, size, info, false); - - if (devMem.memHandle != VK_NULL_HANDLE) { - memory = DxvkMemory(this, nullptr, type, - devMem.buffer, devMem.memHandle, 0, size, devMem.memPointer); - } - } - - if (memory) { - type->stats.memoryUsed += memory.m_length; - m_device->notifyMemoryUse(type->heapId, memory.m_length); + if (unlikely(!memory) && (info.flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { + memory = allocateMemory(req.core.memoryRequirements, + info.flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); } return memory; } - - - DxvkDeviceMemory DxvkMemoryAllocator::tryAllocDeviceMemory( - DxvkMemoryType* type, - VkDeviceSize size, - DxvkMemoryProperties info, - bool isChunk) { - auto vk = m_device->vkd(); - bool useMemoryPriority = (info.flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) - && (m_device->features().extMemoryPriority.memoryPriority); - bool dedicated = info.dedicated.buffer || info.dedicated.image; + DxvkMemory DxvkMemoryAllocator::allocateMemory( + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties) { + std::lock_guard lock(m_mutex); - DxvkDeviceMemory result; - result.memSize = size; + // Ensure the allocation size is also aligned + VkDeviceSize size = align(requirements.size, requirements.alignment); - VkMemoryAllocateFlagsInfo memoryFlags = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO }; - memoryFlags.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; + for (auto typeIndex : getMemoryTypeMask(requirements, properties)) { + auto& type = m_memTypes[typeIndex]; - VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; - priorityInfo.priority = (type->memType.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - ? 0.0f : (dedicated ? 1.0f : 0.5f); + // Use correct memory pool depending on property flags. This way we avoid + // wasting address space on fallback allocations, or on UMA devices that + // only expose one memory type. + auto& selectedPool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + ? type.mappedPool + : type.devicePool; - VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; - memoryInfo.allocationSize = size; - memoryInfo.memoryTypeIndex = type->memTypeId; + // Always try to suballocate first, even if the allocation is + // very large. We will decide what to do if this fails. + int64_t address = selectedPool.alloc(size, requirements.alignment); - if (info.sharedExport.handleTypes) - info.sharedExport.pNext = std::exchange(memoryInfo.pNext, &info.sharedExport); + if (likely(address >= 0)) + return createMemory(type, selectedPool, address, size); - if (info.sharedImportWin32.handleType) - info.sharedImportWin32.pNext = std::exchange(memoryInfo.pNext, &info.sharedImportWin32); + // If the memory type is host-visible, try to find an existing chunk + // in the other memory pool of the memory type and move over. + if (type.properties.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + auto& oppositePool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + ? type.devicePool + : type.mappedPool; - if (info.dedicated.buffer || info.dedicated.image) - info.dedicated.pNext = std::exchange(memoryInfo.pNext, &info.dedicated); + int32_t freeChunkIndex = findEmptyChunkInPool(oppositePool, + size, selectedPool.maxChunkSize); - if (useMemoryPriority) - priorityInfo.pNext = std::exchange(memoryInfo.pNext, &priorityInfo); + if (freeChunkIndex >= 0) { + uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].size); + selectedPool.chunks.resize(std::max(selectedPool.chunks.size(), poolChunkIndex + 1u)); + selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex]; - if (!info.dedicated.image && (type->bufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)) - memoryFlags.pNext = std::exchange(memoryInfo.pNext, &memoryFlags); + oppositePool.pageAllocator.removeChunk(freeChunkIndex); + oppositePool.chunks[freeChunkIndex] = DxvkDeviceMemory(); - if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memHandle)) - return DxvkDeviceMemory(); + mapDeviceMemory(selectedPool.chunks[poolChunkIndex], properties); - if (!isChunk && (info.flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { - VkResult vr = vk->vkMapMemory(vk->device(), result.memHandle, - 0, result.memSize, 0, &result.memPointer); + address = selectedPool.alloc(size, requirements.alignment); - if (vr != VK_SUCCESS) - throw DxvkError(str::format("Failed to map memory: ", vr)); + if (likely(address >= 0)) + return createMemory(type, selectedPool, address, size); + } + } - Logger::debug(str::format("Mapped memory range 0x", std::hex, - reinterpret_cast(result.memPointer), " - 0x", - reinterpret_cast(result.memPointer) + result.memSize)); + // If the allocation is very large, use a dedicated allocation instead + // of creating a new chunk. This way we avoid excessive fragmentation, + // especially when a multiple such resources are created at once. + if (size * MinResourcesPerChunk > selectedPool.maxChunkSize) { + DxvkDeviceMemory memory = allocateDeviceMemory(type, requirements.size, nullptr); + + if (!memory.memory) + continue; + + mapDeviceMemory(memory, properties); + return createMemory(type, memory); + } + + // Try to allocate a new chunk that is large enough to hold + // multiple resources of the type we're tying to allocate. + VkDeviceSize desiredSize = selectedPool.nextChunkSize; + + while (desiredSize < size * MinResourcesPerChunk) + desiredSize *= 2u; + + if (allocateChunkInPool(type, selectedPool, properties, size, desiredSize)) { + address = selectedPool.alloc(size, requirements.alignment); + return createMemory(type, selectedPool, address, size); + } } - if (type->bufferUsage && isChunk) { + logMemoryError(requirements); + logMemoryStats(); + + return DxvkMemory(); + } + + + DxvkMemory DxvkMemoryAllocator::allocateDedicatedMemory( + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties, + const void* next) { + std::lock_guard lock(m_mutex); + + DxvkDeviceMemory memory = { }; + + for (auto typeIndex : getMemoryTypeMask(requirements, properties)) { + auto& type = m_memTypes[typeIndex]; + memory = allocateDeviceMemory(type, requirements.size, next); + + if (likely(memory.memory != VK_NULL_HANDLE)) { + mapDeviceMemory(memory, properties); + return createMemory(type, memory); + } + } + + logMemoryError(requirements); + logMemoryStats(); + + return DxvkMemory(); + } + + + DxvkDeviceMemory DxvkMemoryAllocator::allocateDeviceMemory( + DxvkMemoryType& type, + VkDeviceSize size, + const void* next) { + auto vk = m_device->vkd(); + + // Preemptively free some unused allocations to reduce memory waste + freeEmptyChunksInHeap(*type.heap, size); + + VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next }; + memoryInfo.allocationSize = size; + memoryInfo.memoryTypeIndex = type.index; + + // Decide on a memory priority based on the memory type and allocation properties + VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT }; + + if (type.properties.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + if (type.properties.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + // BAR allocation. Give this a low priority since these are typically useful + // when when placed in system memory. + priorityInfo.priority = 0.0f; + } else if (next) { + // Dedicated allocation, may or may not be a shared resource. Assign this the + // highest priority since this is expected to be a high-bandwidth resource, + // such as a render target. + priorityInfo.priority = 1.0f; + } else { + // Standard priority for resource allocations + priorityInfo.priority = 0.5f; + } + + if (m_device->features().extMemoryPriority.memoryPriority) + priorityInfo.pNext = std::exchange(memoryInfo.pNext, &priorityInfo); + } + + // If buffers can be created on this memory type, also enable the device address bit + VkMemoryAllocateFlagsInfo memoryFlags = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO }; + + if (type.bufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + memoryFlags.pNext = std::exchange(memoryInfo.pNext, &memoryFlags); + memoryFlags.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; + } + + // Try to allocate memory. If this fails, free any remaining + // unused memory from the heap and try again. + DxvkDeviceMemory result = { }; + result.size = size; + + if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) { + freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1)); + + if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) + return DxvkDeviceMemory(); + } + + // Create global buffer if the allocation supports it + if (type.bufferUsage && !next) { VkBuffer buffer = VK_NULL_HANDLE; VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; - bufferInfo.size = result.memSize; - bufferInfo.usage = type->bufferUsage; + bufferInfo.size = size; + bufferInfo.usage = type.bufferUsage; bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VkResult status = vk->vkCreateBuffer(vk->device(), &bufferInfo, nullptr, &buffer); @@ -430,8 +328,8 @@ namespace dxvk { vk->vkGetBufferMemoryRequirements2(vk->device(), &memInfo, &requirements); if ((requirements.memoryRequirements.size == size) - && (requirements.memoryRequirements.memoryTypeBits & (1u << type->memTypeId))) { - status = vk->vkBindBufferMemory(vk->device(), buffer, result.memHandle, 0); + && (requirements.memoryRequirements.memoryTypeBits & (1u << type.index))) { + status = vk->vkBindBufferMemory(vk->device(), buffer, result.memory, 0); if (status == VK_SUCCESS) result.buffer = buffer; @@ -444,167 +342,286 @@ namespace dxvk { if (!result.buffer) { Logger::warn(str::format("Failed to create global buffer:", "\n size: ", std::dec, size, - "\n usage: ", std::hex, type->bufferUsage, - "\n type: ", std::dec, type->memTypeId)); + "\n usage: ", std::hex, type.bufferUsage, + "\n type: ", std::dec, type.index)); } } - type->stats.memoryAllocated += size; - m_device->notifyMemoryAlloc(type->heapId, size); + type.stats.memoryAllocated += size; + + m_device->notifyMemoryAlloc(type.properties.heapIndex, size); return result; } + bool DxvkMemoryAllocator::allocateChunkInPool( + DxvkMemoryType& type, + DxvkMemoryPool& pool, + VkMemoryPropertyFlags properties, + VkDeviceSize requiredSize, + VkDeviceSize desiredSize) { + // Try to allocate device memory. If the allocation fails, retry with + // a smaller size until we reach a point where we cannot service the + // allocation. + DxvkDeviceMemory chunk = { }; + + while (!chunk.memory && desiredSize >= std::max(requiredSize, DxvkMemoryPool::MinChunkSize)) { + chunk = allocateDeviceMemory(type, desiredSize, nullptr); + desiredSize /= 2u; + } + + if (!chunk.memory) + return false; + + mapDeviceMemory(chunk, properties); + + // If we expect the application to require more memory in the + // future, increase the chunk size for subsequent allocations. + if (pool.nextChunkSize < pool.maxChunkSize + && pool.nextChunkSize <= type.stats.memoryAllocated / 2u) + pool.nextChunkSize *= 2u; + + // Add the newly created chunk to the pool + uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size); + + pool.chunks.resize(std::max(pool.chunks.size(), chunkIndex + 1u)); + pool.chunks[chunkIndex] = chunk; + return true; + } + + + DxvkMemory DxvkMemoryAllocator::createMemory( + DxvkMemoryType& type, + DxvkMemoryPool& pool, + VkDeviceSize address, + VkDeviceSize size) { + type.stats.memoryUsed += size; + + m_device->notifyMemoryUse(type.properties.heapIndex, size); + + uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits; + const auto& chunk = pool.chunks[chunkIndex]; + + void* mapPtr = chunk.mapPtr + ? reinterpret_cast(chunk.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask) + : nullptr; + + return DxvkMemory(this, &type, chunk.buffer, chunk.memory, address, size, mapPtr); + } + + + DxvkMemory DxvkMemoryAllocator::createMemory( + DxvkMemoryType& type, + const DxvkDeviceMemory& memory) { + type.stats.memoryUsed += memory.size; + + m_device->notifyMemoryUse(type.properties.heapIndex, memory.size); + + return DxvkMemory(this, &type, memory.buffer, memory.memory, + DedicatedChunkAddress, memory.size, memory.mapPtr); + } + + void DxvkMemoryAllocator::free( const DxvkMemory& memory) { std::lock_guard lock(m_mutex); memory.m_type->stats.memoryUsed -= memory.m_length; - if (memory.m_chunk != nullptr) { - this->freeChunkMemory( - memory.m_type, - memory.m_chunk, - memory.m_offset, - memory.m_length); - } else { + m_device->notifyMemoryUse(memory.m_type->properties.heapIndex, -memory.m_length); + + if (unlikely(memory.m_address == DedicatedChunkAddress)) { DxvkDeviceMemory devMem; - devMem.buffer = memory.m_buffer; - devMem.memHandle = memory.m_memory; - devMem.memPointer = nullptr; - devMem.memSize = memory.m_length; - this->freeDeviceMemory(memory.m_type, devMem); - } + devMem.buffer = memory.m_buffer; + devMem.memory = memory.m_memory; + devMem.mapPtr = nullptr; + devMem.size = memory.m_length; - m_device->notifyMemoryUse(memory.m_type->heapId, -memory.m_length); - } + this->freeDeviceMemory(*memory.m_type, devMem); + } else { + DxvkMemoryPool& pool = memory.m_mapPtr + ? memory.m_type->mappedPool + : memory.m_type->devicePool; - - void DxvkMemoryAllocator::freeChunkMemory( - DxvkMemoryType* type, - DxvkMemoryChunk* chunk, - VkDeviceSize offset, - VkDeviceSize length) { - chunk->free(offset, length); - - if (chunk->isEmpty()) { - Rc chunkRef = chunk; - - // Free the chunk if we have to, or at least put it at the end of - // the list so that chunks that are already in use and cannot be - // freed are prioritized for allocations to reduce memory pressure. - type->chunks.erase(std::remove(type->chunks.begin(), type->chunks.end(), chunkRef)); - - if (!this->shouldFreeChunk(type, chunkRef)) - type->chunks.push_back(std::move(chunkRef)); + if (unlikely(pool.free(memory.m_address, memory.m_length))) + freeEmptyChunksInPool(*memory.m_type, pool, 0); } } - + void DxvkMemoryAllocator::freeDeviceMemory( - DxvkMemoryType* type, + DxvkMemoryType& type, DxvkDeviceMemory memory) { auto vk = m_device->vkd(); vk->vkDestroyBuffer(vk->device(), memory.buffer, nullptr); - vk->vkFreeMemory(vk->device(), memory.memHandle, nullptr); + vk->vkFreeMemory(vk->device(), memory.memory, nullptr); - type->stats.memoryAllocated -= memory.memSize; - m_device->notifyMemoryAlloc(type->heapId, memory.memSize); + type.stats.memoryAllocated -= memory.size; + m_device->notifyMemoryAlloc(type.properties.heapIndex, memory.size); } - VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, VkDeviceSize requiredSize) const { - VkMemoryType type = m_memProps.memoryTypes[memTypeId]; - VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex]; + void DxvkMemoryAllocator::freeEmptyChunksInHeap( + const DxvkMemoryHeap& heap, + VkDeviceSize allocationSize) { + for (auto typeIndex : bit::BitMask(heap.memoryTypes)) { + auto& type = m_memTypes[typeIndex]; - VkDeviceSize chunkSize = m_memTypes[memTypeId].chunkSize; - - while (chunkSize < requiredSize && chunkSize < MaxChunkSize) - chunkSize <<= 1u; - - // Try to waste a bit less system memory especially in - // 32-bit applications due to address space constraints - if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - chunkSize = std::min((env::is32BitHostPlatform() ? 16 : 64) << 20, chunkSize); - - // Reduce the chunk size on small heaps so - // we can at least fit in 15 allocations - while (chunkSize * 15 > heap.size) - chunkSize >>= 1; - - return chunkSize; - } - - - void DxvkMemoryAllocator::adjustChunkSize( - uint32_t memTypeId, - VkDeviceSize allocatedSize) { - VkDeviceSize chunkSize = m_memTypes[memTypeId].chunkSize; - - // Don't bump chunk size if we reached the maximum or if - // we already were unable to allocate a full chunk. - if (chunkSize <= allocatedSize && chunkSize <= m_memTypes[memTypeId].stats.memoryAllocated) - m_memTypes[memTypeId].chunkSize = pickChunkSize(memTypeId, chunkSize * 2); - } - - - bool DxvkMemoryAllocator::shouldFreeChunk( - const DxvkMemoryType* type, - const Rc& chunk) const { - // Under memory pressure, we should start freeing everything. - if (this->shouldFreeEmptyChunks(type->heapId, 0)) - return true; - - // Free chunks that are below the current chunk size since it probably - // not going to be able to serve enough allocations to be useful. - if (chunk->size() < type->chunkSize) - return true; - - // Only keep a small number of chunks of each type around to save memory. - uint32_t numEmptyChunks = 0; - - for (const auto& c : type->chunks) { - if (c != chunk && c->isEmpty()) - numEmptyChunks += 1; + freeEmptyChunksInPool(type, type.devicePool, allocationSize); + freeEmptyChunksInPool(type, type.mappedPool, allocationSize); } - - // Be a bit more lenient on system memory since data uploads may otherwise - // lead to a large number of allocations and deallocations at runtime. - uint32_t maxEmptyChunks = env::is32BitHostPlatform() ? 2 : 4; - - if ((type->memType.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) - || !(type->memType.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) - maxEmptyChunks = 1; - - return numEmptyChunks >= maxEmptyChunks; } - bool DxvkMemoryAllocator::shouldFreeEmptyChunks( - uint32_t heapIndex, - VkDeviceSize allocationSize) const { - VkDeviceSize budget = (m_memHeaps[heapIndex].properties.size * 4) / 5; + void DxvkMemoryAllocator::freeEmptyChunksInPool( + DxvkMemoryType& type, + DxvkMemoryPool& pool, + VkDeviceSize allocationSize) { + // Allow for one unused max-size chunk on device-local memory types. + // For system memory allocations, we need to be more lenient since + // applications will frequently allocate staging buffers. + VkDeviceSize maxUnusedMemory = pool.maxChunkSize; - DxvkMemoryStats stats = getMemoryStats(heapIndex); - return stats.memoryAllocated + allocationSize > budget; - } + if (!(type.properties.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + && (type.properties.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + && (&pool == &type.mappedPool)) + maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u; + VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5; + VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated; - void DxvkMemoryAllocator::freeEmptyChunks( - const DxvkMemoryHeap* heap) { - for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { - DxvkMemoryType* type = &m_memTypes[i]; + VkDeviceSize unusedMemory = 0u; - if (type->heap != heap) + for (uint32_t i = 0; i < pool.chunks.size(); i++) { + DxvkDeviceMemory chunk = pool.chunks[i]; + + if (!chunk.memory || pool.pageAllocator.pagesUsed(i)) continue; - type->chunks.erase( - std::remove_if(type->chunks.begin(), type->chunks.end(), - [] (const Rc& chunk) { return chunk->isEmpty(); }), - type->chunks.end()); + // Free the chunk if it is smaller than the current chunk size of + // the pool, since it is unlikely to be useful for future allocations. + // Also free if the pending allocation would exceed the heap budget. + bool shouldFree = chunk.size < pool.nextChunkSize + || allocationSize + heapAllocated > heapBudget + || allocationSize > heapBudget; + + // If we don't free the chunk under these conditions, count it towards + // unused memory in the current memory pool. Once we exceed the limit, + // free any empty chunk we encounter. + if (!shouldFree) { + unusedMemory += chunk.size; + shouldFree = unusedMemory > maxUnusedMemory; + } + + if (shouldFree) { + freeDeviceMemory(type, chunk); + heapAllocated -= chunk.size; + + pool.chunks[i] = DxvkDeviceMemory(); + pool.pageAllocator.removeChunk(i); + } } } + int32_t DxvkMemoryAllocator::findEmptyChunkInPool( + const DxvkMemoryPool& pool, + VkDeviceSize minSize, + VkDeviceSize maxSize) const { + for (uint32_t i = 0; i < pool.chunks.size(); i++) { + if (pool.chunks[i].memory + && pool.chunks[i].size >= minSize + && pool.chunks[i].size <= maxSize + && !pool.pageAllocator.pagesUsed(i)) + return int32_t(i); + } + + return -1; + } + + + void DxvkMemoryAllocator::mapDeviceMemory( + DxvkDeviceMemory& memory, + VkMemoryPropertyFlags properties) { + if (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + if (memory.mapPtr) + return; + + auto vk = m_device->vkd(); + + VkResult vr = vk->vkMapMemory(vk->device(), + memory.memory, 0, memory.size, 0, &memory.mapPtr); + + if (vr != VK_SUCCESS) { + throw DxvkError(str::format("Failed to map Vulkan memory: ", vr, + "\n size: ", memory.size, " bytes")); + } + + Logger::debug(str::format("Mapped memory region 0x", std::hex, + reinterpret_cast(memory.mapPtr), " - 0x", + reinterpret_cast(memory.mapPtr) + memory.size - 1u)); + } else { + if (!memory.mapPtr) + return; + + auto vk = m_device->vkd(); + vk->vkUnmapMemory(vk->device(), memory.memory); + + Logger::debug(str::format("Unmapped memory region 0x", std::hex, + reinterpret_cast(memory.mapPtr), " - 0x", + reinterpret_cast(memory.mapPtr) + memory.size - 1u)); + + memory.mapPtr = nullptr; + } + } + + + void DxvkMemoryAllocator::getAllocationStatsForPool( + const DxvkMemoryType& type, + const DxvkMemoryPool& pool, + DxvkMemoryAllocationStats& stats) { + auto& typeStats = stats.memoryTypes[type.index]; + + for (uint32_t i = 0; i < pool.chunks.size(); i++) { + if (!pool.chunks[i].memory) + continue; + + typeStats.chunkCount += 1u; + + auto& chunkStats = stats.chunks.emplace_back(); + chunkStats.capacity = pool.chunks[i].size; + chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize; + chunkStats.pageMaskOffset = stats.pageMasks.size(); + chunkStats.pageCount = pool.pageAllocator.pageCount(i); + + size_t maskCount = (chunkStats.pageCount + 31u) / 32u; + stats.pageMasks.resize(chunkStats.pageMaskOffset + maskCount); + + pool.pageAllocator.getPageAllocationMask(i, &stats.pageMasks[chunkStats.pageMaskOffset]); + } + } + + + VkDeviceSize DxvkMemoryAllocator::determineMaxChunkSize( + const DxvkMemoryType& type, + bool mappable) const { + VkDeviceSize size = DxvkMemoryPool::MaxChunkSize; + + // Prefer smaller chunks for host-visible allocations in order to + // reduce the amount of address space required. We compensate for + // the smaller size by allowing more unused memory on these heaps. + if (mappable) + size /= env::is32BitHostPlatform() ? 16u : 4u; + + // Ensure that we can at least do 15 allocations to fill + // the heap. Might be useful on systems with small BAR. + while (15u * size > type.heap->properties.size) + size /= 2u; + + // Always use at least the minimum chunk size + return std::max(size, DxvkMemoryPool::MinChunkSize); + } + + uint32_t DxvkMemoryAllocator::determineSparseMemoryTypes( DxvkDevice* device) const { auto vk = device->vkd(); @@ -706,7 +723,7 @@ namespace dxvk { while (typeMask) { uint32_t type = bit::tzcnt(typeMask); - if (type < m_memProps.memoryTypeCount) + if (type < m_memTypeCount) m_memTypes.at(type).bufferUsage |= bufferInfo.usage; typeMask &= typeMask - 1; @@ -718,7 +735,7 @@ namespace dxvk { // Only use a minimal set of usage flags for the global buffer if the // full combination of flags is not supported for whatever reason. - for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { + for (uint32_t i = 0; i < m_memTypeCount; i++) { bufferInfo.usage = m_memTypes[i].bufferUsage; if (!getBufferMemoryRequirements(bufferInfo, requirements) @@ -731,14 +748,55 @@ namespace dxvk { } + void DxvkMemoryAllocator::determineMemoryTypesWithPropertyFlags() { + // Initialize look-up table for memory type masks based on required property + // flags. This lets us avoid iterating over unsupported memory types + for (uint32_t i = 0; i < m_memTypesByPropertyFlags.size(); i++) { + VkMemoryPropertyFlags flags = VkMemoryPropertyFlags(i); + uint32_t mask = 0u; + + for (uint32_t j = 0; j < m_memTypeCount; j++) { + VkMemoryPropertyFlags typeFlags = m_memTypes[j].properties.propertyFlags; + + if ((typeFlags & flags) != flags) + continue; + + // Do not include device-local memory types if a non-device + // local one exists with the same required propery flags. + if (mask && !(flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + && (typeFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) + continue; + + mask |= 1u << j; + } + + m_memTypesByPropertyFlags[i] = mask; + } + + // If there is no cached coherent memory type, reuse the uncached + // one. This is likely slow, but API front-ends are relying on it. + uint32_t hostCachedIndex = uint32_t( + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + + uint32_t hostCoherentIndex = uint32_t( + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + if (!m_memTypesByPropertyFlags[hostCachedIndex]) + m_memTypesByPropertyFlags[hostCachedIndex] = m_memTypesByPropertyFlags[hostCoherentIndex]; + } + + DxvkMemoryStats DxvkMemoryAllocator::getMemoryStats(uint32_t heap) const { DxvkMemoryStats result = { }; - for (size_t i = 0; i < m_memProps.memoryTypeCount; i++) { - if (m_memTypes[i].heapId == heap) { - result.memoryAllocated += m_memTypes[i].stats.memoryAllocated; - result.memoryUsed += m_memTypes[i].stats.memoryUsed; - } + for (auto typeIndex : bit::BitMask(m_memHeaps[heap].memoryTypes)) { + const auto& type = m_memTypes[typeIndex]; + + result.memoryAllocated += type.stats.memoryAllocated; + result.memoryUsed += type.stats.memoryUsed; } return result; @@ -751,17 +809,18 @@ namespace dxvk { stats.chunks.clear(); stats.pageMasks.clear(); - for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { - const auto& type = m_memTypes[i]; + for (uint32_t i = 0; i < m_memTypeCount; i++) { + const auto& typeInfo = m_memTypes[i]; + auto& typeStats = stats.memoryTypes[i]; - stats.memoryTypes[i].properties = type.memType; - stats.memoryTypes[i].allocated = type.stats.memoryAllocated; - stats.memoryTypes[i].used = type.stats.memoryUsed; - stats.memoryTypes[i].chunkIndex = stats.chunks.size(); - stats.memoryTypes[i].chunkCount = type.chunks.size(); + typeStats.properties = typeInfo.properties; + typeStats.allocated = typeInfo.stats.memoryAllocated; + typeStats.used = typeInfo.stats.memoryUsed; + typeStats.chunkIndex = stats.chunks.size(); + typeStats.chunkCount = 0u; - for (const auto& chunk : type.chunks) - chunk->getAllocationStats(stats); + getAllocationStatsForPool(typeInfo, typeInfo.devicePool, stats); + getAllocationStatsForPool(typeInfo, typeInfo.mappedPool, stats); } } @@ -845,7 +904,7 @@ namespace dxvk { std::stringstream sstr; sstr << "Heap Size (MiB) Allocated Used Reserved Budget" << std::endl; - for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) { + for (uint32_t i = 0; i < m_memHeapCount; i++) { DxvkMemoryStats stats = getMemoryStats(i); sstr << std::setw(2) << i << ": " @@ -864,4 +923,14 @@ namespace dxvk { Logger::err(sstr.str()); } + + bit::BitMask DxvkMemoryAllocator::getMemoryTypeMask( + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties) const { + uint32_t mask = requirements.memoryTypeBits; + mask &= m_memTypesByPropertyFlags[uint32_t(properties) % uint32_t(m_memTypesByPropertyFlags.size())]; + + return bit::BitMask(mask); + } + } diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index df64eba23..9df6555dd 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -50,13 +50,51 @@ namespace dxvk { * be persistently mapped. */ struct DxvkDeviceMemory { - VkBuffer buffer = VK_NULL_HANDLE; - VkDeviceMemory memHandle = VK_NULL_HANDLE; - void* memPointer = nullptr; - VkDeviceSize memSize = 0; + VkBuffer buffer = VK_NULL_HANDLE; + VkDeviceMemory memory = VK_NULL_HANDLE; + VkDeviceSize size = 0; + void* mapPtr = nullptr; }; + /** + * \brief Memory pool + * + * Stores a list of memory chunks, as well as an allocator + * over the entire pool. + */ + struct DxvkMemoryPool { + constexpr static VkDeviceSize MaxChunkSize = DxvkPageAllocator::MaxChunkSize; + constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u; + + /// Backing storage for allocated memory chunks + std::vector chunks; + /// Memory allocator covering the entire memory pool + DxvkPageAllocator pageAllocator; + /// Pool allocator that sits on top of the page allocator + DxvkPoolAllocator poolAllocator = { pageAllocator }; + /// Minimum desired allocation size for the next chunk. + /// Always a power of two. + VkDeviceSize nextChunkSize = MinChunkSize; + /// Maximum chunk size for the memory pool. Hard limit. + VkDeviceSize maxChunkSize = MaxChunkSize; + + force_inline int64_t alloc(uint64_t size, uint64_t align) { + if (size <= DxvkPoolAllocator::MaxSize) + return poolAllocator.alloc(size); + else + return pageAllocator.alloc(size, align); + } + + force_inline bool free(uint64_t address, uint64_t size) { + if (size <= DxvkPoolAllocator::MaxSize) + return poolAllocator.free(address, size); + else + return pageAllocator.free(address, size); + } + }; + + /** * \brief Memory heap * @@ -64,6 +102,8 @@ namespace dxvk { * its properties as well as allocation statistics. */ struct DxvkMemoryHeap { + uint32_t index = 0u; + uint32_t memoryTypes = 0u; VkMemoryHeap properties = { }; }; @@ -76,21 +116,20 @@ namespace dxvk { * this memory type. */ struct DxvkMemoryType { - DxvkMemoryHeap* heap = nullptr; - uint32_t heapId = 0u; + uint32_t index = 0u; + VkMemoryType properties = { }; - VkMemoryType memType = { }; - uint32_t memTypeId = 0u; + DxvkMemoryHeap* heap = nullptr; DxvkMemoryStats stats = { }; - VkDeviceSize chunkSize = 0u; VkBufferUsageFlags bufferUsage = 0u; - std::vector> chunks; + DxvkMemoryPool devicePool; + DxvkMemoryPool mappedPool; }; - - + + /** * \brief Memory type statistics */ @@ -147,11 +186,10 @@ namespace dxvk { DxvkMemory(); DxvkMemory( DxvkMemoryAllocator* alloc, - DxvkMemoryChunk* chunk, DxvkMemoryType* type, VkBuffer buffer, VkDeviceMemory memory, - VkDeviceSize offset, + VkDeviceSize address, VkDeviceSize length, void* mapPtr); DxvkMemory (DxvkMemory&& other); @@ -187,7 +225,7 @@ namespace dxvk { * \returns Offset into device memory */ VkDeviceSize offset() const { - return m_offset; + return m_address & DxvkPageAllocator::ChunkAddressMask; } /** @@ -230,101 +268,16 @@ namespace dxvk { private: DxvkMemoryAllocator* m_alloc = nullptr; - DxvkMemoryChunk* m_chunk = nullptr; DxvkMemoryType* m_type = nullptr; VkBuffer m_buffer = VK_NULL_HANDLE; VkDeviceMemory m_memory = VK_NULL_HANDLE; - VkDeviceSize m_offset = 0; + VkDeviceSize m_address = 0; VkDeviceSize m_length = 0; void* m_mapPtr = nullptr; void free(); }; - - - /** - * \brief Memory chunk - * - * A single chunk of memory that provides a - * sub-allocator. This is not thread-safe. - */ - class DxvkMemoryChunk : public RcObject { - - public: - - DxvkMemoryChunk( - DxvkMemoryAllocator* alloc, - DxvkMemoryType* type, - DxvkDeviceMemory memory); - - ~DxvkMemoryChunk(); - - /** - * \brief Queries chunk size - * \returns Chunk size - */ - VkDeviceSize size() const { - return m_memory.memSize; - } - - /** - * \brief Allocates memory from the chunk - * - * On failure, this returns a slice with - * \c VK_NULL_HANDLE as the memory handle. - * \param [in] flags Requested memory type flags - * \param [in] size Number of bytes to allocate - * \param [in] align Required alignment - * \param [in] hints Memory category - * \returns The allocated memory slice - */ - DxvkMemory alloc( - VkMemoryPropertyFlags flags, - VkDeviceSize size, - VkDeviceSize align); - - /** - * \brief Frees memory - * - * Returns a slice back to the chunk. - * Called automatically when a memory - * slice runs out of scope. - * \param [in] offset Slice offset - * \param [in] length Slice length - */ - void free( - VkDeviceSize offset, - VkDeviceSize length); - - /** - * \brief Checks whether the chunk is being used - * \returns \c true if there are no allocations left - */ - bool isEmpty() const; - - /** - * \brief Retrieves allocation stats for this chunk - * - * Adds overall stats and the page mask to the given structure. - * \param [out] stats Allocation stats - */ - void getAllocationStats(DxvkMemoryAllocationStats& stats) const; - - private: - - DxvkMemoryAllocator* m_alloc; - DxvkMemoryType* m_type; - DxvkDeviceMemory m_memory; - - DxvkPageAllocator m_pageAllocator; - DxvkPoolAllocator m_poolAllocator; - - void mapChunk(); - - void unmapChunk(); - - }; /** @@ -358,10 +311,14 @@ namespace dxvk { friend class DxvkMemory; friend class DxvkMemoryChunk; + constexpr static uint64_t DedicatedChunkAddress = 1ull << 63u; + constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10; constexpr static VkDeviceSize MinChunkSize = 4ull << 20; constexpr static VkDeviceSize MaxChunkSize = 256ull << 20; + + constexpr static VkDeviceSize MinResourcesPerChunk = 4u; public: DxvkMemoryAllocator(DxvkDevice* device); @@ -381,15 +338,44 @@ namespace dxvk { /** * \brief Allocates device memory - * + * + * Legacy interface for memory allocation, to be removed. * \param [in] req Memory requirements * \param [in] info Memory properties * \returns Allocated memory slice */ DxvkMemory alloc( DxvkMemoryRequirements req, - DxvkMemoryProperties info); - + const DxvkMemoryProperties& info); + + /** + * \brief Allocates memory for a regular resource + * + * This method should be used when a dedicated allocation is + * not required. Very large resources may still be placed in + * a dedicated allocation. + * \param [in] requirements Memory requirements + * \param [in] properties Memory property flags. Some of + * these may be ignored in case of memory pressure. + */ + DxvkMemory allocateMemory( + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties); + + /** + * \brief Allocates memory for a resource + * + * Will always create a dedicated allocation. + * \param [in] requirements Memory requirements + * \param [in] properties Memory property flags. Some of + * these may be ignored in case of memory pressure. + * \param [in] next Further memory properties + */ + DxvkMemory allocateDedicatedMemory( + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties, + const void* next); + /** * \brief Queries memory stats * @@ -435,73 +421,101 @@ namespace dxvk { private: - DxvkDevice* m_device; - VkPhysicalDeviceMemoryProperties m_memProps; - - dxvk::mutex m_mutex; - std::array m_memHeaps = { }; + DxvkDevice* m_device; + + dxvk::mutex m_mutex; + + uint32_t m_memTypeCount = 0u; + uint32_t m_memHeapCount = 0u; + std::array m_memTypes = { }; + std::array m_memHeaps = { }; uint32_t m_sparseMemoryTypes = 0u; - DxvkMemory tryAlloc( - const DxvkMemoryRequirements& req, - const DxvkMemoryProperties& info); - - DxvkMemory tryAllocFromType( - DxvkMemoryType* type, - VkDeviceSize size, - VkDeviceSize align, - const DxvkMemoryProperties& info); - - DxvkDeviceMemory tryAllocDeviceMemory( - DxvkMemoryType* type, - VkDeviceSize size, - DxvkMemoryProperties info, - bool isChunk); - + std::array m_memTypesByPropertyFlags = { }; + + DxvkDeviceMemory allocateDeviceMemory( + DxvkMemoryType& type, + VkDeviceSize size, + const void* next); + + bool allocateChunkInPool( + DxvkMemoryType& type, + DxvkMemoryPool& pool, + VkMemoryPropertyFlags properties, + VkDeviceSize requiredSize, + VkDeviceSize desiredSize); + + DxvkMemory createMemory( + DxvkMemoryType& type, + DxvkMemoryPool& pool, + VkDeviceSize address, + VkDeviceSize size); + void free( const DxvkMemory& memory); - - void freeChunkMemory( - DxvkMemoryType* type, - DxvkMemoryChunk* chunk, - VkDeviceSize offset, - VkDeviceSize length); - + void freeDeviceMemory( - DxvkMemoryType* type, + DxvkMemoryType& type, DxvkDeviceMemory memory); - VkDeviceSize pickChunkSize( - uint32_t memTypeId, - VkDeviceSize requiredSize) const; + uint32_t countEmptyChunksInPool( + const DxvkMemoryPool& pool) const; - void adjustChunkSize( - uint32_t memTypeId, - VkDeviceSize allocatedSize); + void freeEmptyChunksInHeap( + const DxvkMemoryHeap& heap, + VkDeviceSize allocationSize); - bool shouldFreeChunk( - const DxvkMemoryType* type, - const Rc& chunk) const; + void freeEmptyChunksInPool( + DxvkMemoryType& type, + DxvkMemoryPool& pool, + VkDeviceSize allocationSize); - bool shouldFreeEmptyChunks( - uint32_t heapIndex, - VkDeviceSize allocationSize) const; + int32_t findEmptyChunkInPool( + const DxvkMemoryPool& pool, + VkDeviceSize minSize, + VkDeviceSize maxSize) const; - void freeEmptyChunks( - const DxvkMemoryHeap* heap); + void mapDeviceMemory( + DxvkDeviceMemory& memory, + VkMemoryPropertyFlags properties); + + DxvkMemory createMemory( + DxvkMemoryType& type, + const DxvkMemoryPool& pool, + VkDeviceSize address, + VkDeviceSize size); + + DxvkMemory createMemory( + DxvkMemoryType& type, + const DxvkDeviceMemory& memory); + + void getAllocationStatsForPool( + const DxvkMemoryType& type, + const DxvkMemoryPool& pool, + DxvkMemoryAllocationStats& stats); + + VkDeviceSize determineMaxChunkSize( + const DxvkMemoryType& type, + bool mappable) const; uint32_t determineSparseMemoryTypes( DxvkDevice* device) const; void determineBufferUsageFlagsPerMemoryType(); + void determineMemoryTypesWithPropertyFlags(); + void logMemoryError( const VkMemoryRequirements& req) const; void logMemoryStats() const; + bit::BitMask getMemoryTypeMask( + const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags properties) const; + }; }