[dxvk] Rewrite memory allocator

Uses a single allocator pair per memory type in order to make memory allocations on the entire memory pool faster, and further reduce fragmentation by applying the best fit strategy to the whole memory pool.
2025-04-06 09:57:53 +02:00 · 2024-09-20 22:18:51 +02:00 · 2024-09-20 22:18:51 +02:00 · e1fd2bff2c
commit e1fd2bff2c
parent 3a4dadb528
3 changed files with 689 additions and 604 deletions
--- a/src/dxvk/dxvk_image.cpp
+++ b/src/dxvk/dxvk_image.cpp
@ -76,27 +76,29 @@ namespace dxvk {
      DxvkMemoryProperties memoryProperties = { };
      memoryProperties.flags = m_memFlags;

+      if (memoryRequirements.dedicated.prefersDedicatedAllocation || m_shared) {
+        memoryProperties.dedicated = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO };
+        memoryProperties.dedicated.image = m_image.image;
+      }
+
      if (m_shared) {
        memoryRequirements.dedicated.prefersDedicatedAllocation = VK_TRUE;
        memoryRequirements.dedicated.requiresDedicatedAllocation = VK_TRUE;

        if (createInfo.sharing.mode == DxvkSharedHandleMode::Export) {
          memoryProperties.sharedExport = { VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO };
+          memoryProperties.sharedExport.pNext = std::exchange(memoryProperties.dedicated.pNext, &memoryProperties.sharedExport);
          memoryProperties.sharedExport.handleTypes = createInfo.sharing.type;
        }

        if (createInfo.sharing.mode == DxvkSharedHandleMode::Import) {
          memoryProperties.sharedImportWin32 = { VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR };
+          memoryProperties.sharedImportWin32.pNext = std::exchange(memoryProperties.dedicated.pNext, &memoryProperties.sharedImportWin32);
          memoryProperties.sharedImportWin32.handleType = createInfo.sharing.type;
          memoryProperties.sharedImportWin32.handle = createInfo.sharing.handle;
        }
      }

-      if (memoryRequirements.dedicated.prefersDedicatedAllocation) {
-        memoryProperties.dedicated = { VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO };
-        memoryProperties.dedicated.image = m_image.image;
-      }
-
      m_image.memory = memAlloc.alloc(memoryRequirements, memoryProperties);

      // Try to bind the allocated memory slice to the image
--- a/src/dxvk/dxvk_memory.cpp
+++ b/src/dxvk/dxvk_memory.cpp
@ -10,30 +10,29 @@ namespace dxvk {
  DxvkMemory::DxvkMemory() { }
  DxvkMemory::DxvkMemory(
          DxvkMemoryAllocator*  alloc,
-          DxvkMemoryChunk*      chunk,
          DxvkMemoryType*       type,
          VkBuffer              buffer,
          VkDeviceMemory        memory,
-          VkDeviceSize          offset,
+          VkDeviceSize          address,
          VkDeviceSize          length,
          void*                 mapPtr)
  : m_alloc   (alloc),
-    m_chunk   (chunk),
    m_type    (type),
    m_buffer  (buffer),
    m_memory  (memory),
-    m_offset  (offset),
+    m_address (address),
    m_length  (length),
-    m_mapPtr  (mapPtr) { }
+    m_mapPtr  (mapPtr) {
+
+  }
  
  
  DxvkMemory::DxvkMemory(DxvkMemory&& other)
  : m_alloc   (std::exchange(other.m_alloc,  nullptr)),
-    m_chunk   (std::exchange(other.m_chunk,  nullptr)),
    m_type    (std::exchange(other.m_type,   nullptr)),
    m_buffer  (std::exchange(other.m_buffer, VkBuffer(VK_NULL_HANDLE))),
    m_memory  (std::exchange(other.m_memory, VkDeviceMemory(VK_NULL_HANDLE))),
-    m_offset  (std::exchange(other.m_offset, 0)),
+    m_address (std::exchange(other.m_address, 0)),
    m_length  (std::exchange(other.m_length, 0)),
    m_mapPtr  (std::exchange(other.m_mapPtr, nullptr)) { }
  
@ -41,11 +40,10 @@ namespace dxvk {
  DxvkMemory& DxvkMemory::operator = (DxvkMemory&& other) {
    this->free();
    m_alloc   = std::exchange(other.m_alloc,  nullptr);
-    m_chunk   = std::exchange(other.m_chunk,  nullptr);
    m_type    = std::exchange(other.m_type,   nullptr);
    m_buffer  = std::exchange(other.m_buffer, VkBuffer(VK_NULL_HANDLE));
    m_memory  = std::exchange(other.m_memory, VkDeviceMemory(VK_NULL_HANDLE));
-    m_offset  = std::exchange(other.m_offset, 0);
+    m_address = std::exchange(other.m_address, 0);
    m_length  = std::exchange(other.m_length, 0);
    m_mapPtr  = std::exchange(other.m_mapPtr, nullptr);
    return *this;
@ -61,141 +59,38 @@ namespace dxvk {
    if (m_alloc != nullptr)
      m_alloc->free(*this);
  }
-  
-
-  DxvkMemoryChunk::DxvkMemoryChunk(
-          DxvkMemoryAllocator*  alloc,
-          DxvkMemoryType*       type,
-          DxvkDeviceMemory      memory)
-  : m_alloc(alloc), m_type(type), m_memory(memory),
-    m_poolAllocator(m_pageAllocator) {
-    m_pageAllocator.addChunk(memory.memSize);
-  }
-  
-  
-  DxvkMemoryChunk::~DxvkMemoryChunk() {
-    // This call is technically not thread-safe, but it
-    // doesn't need to be since we don't free chunks
-    m_alloc->freeDeviceMemory(m_type, m_memory);
-  }
-  
-  
-  DxvkMemory DxvkMemoryChunk::alloc(
-          VkMemoryPropertyFlags flags,
-          VkDeviceSize          size,
-          VkDeviceSize          align) {
-    if (likely(!isEmpty())) {
-      // If the chunk is in use, only accept allocations that do or do not need
-      // host access depending on whether the chunk is currently mapped in order
-      // to reduce the total amount of address space consumed for mapped chunks.
-      VkMemoryPropertyFlags got = m_memory.memPointer
-        ? VkMemoryPropertyFlags(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        : VkMemoryPropertyFlags(0u);
-
-      if ((flags ^ got) & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        return DxvkMemory();
-    } else {
-      // Lazily map or unmap the chunk depending on what the first allocation
-      // actually needs.
-      if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        mapChunk();
-      else
-        unmapChunk();
-    }
-
-    size = dxvk::align(size, align);
-
-    int64_t address = size <= DxvkPoolAllocator::MaxSize
-      ? m_poolAllocator.alloc(size)
-      : m_pageAllocator.alloc(size, align);
-
-    if (address < 0)
-      return DxvkMemory();
-
-    // Create the memory object with the aligned slice
-    return DxvkMemory(m_alloc, this, m_type,
-      m_memory.buffer, m_memory.memHandle, address, size,
-      reinterpret_cast<char*>(m_memory.memPointer) + address);
-  }
-  
-  
-  void DxvkMemoryChunk::free(
-          VkDeviceSize  offset,
-          VkDeviceSize  length) {
-    if (length <= DxvkPoolAllocator::MaxSize)
-      m_poolAllocator.free(offset, length);
-    else
-      m_pageAllocator.free(offset, length);
-  }
-  
-  
-  bool DxvkMemoryChunk::isEmpty() const {
-    return m_pageAllocator.pagesUsed(0u) == 0u;
-  }
-
-
-  void DxvkMemoryChunk::getAllocationStats(DxvkMemoryAllocationStats& stats) const {
-    auto& chunkStats = stats.chunks.emplace_back();
-    chunkStats.capacity = uint64_t(m_pageAllocator.pageCount(0u)) * DxvkPageAllocator::PageSize;
-    chunkStats.used = uint64_t(m_pageAllocator.pagesUsed(0u)) * DxvkPageAllocator::PageSize;
-    chunkStats.pageMaskOffset = stats.pageMasks.size();
-    chunkStats.pageCount = m_pageAllocator.pageCount(0u);
-
-    stats.pageMasks.resize(chunkStats.pageMaskOffset + (chunkStats.pageCount + 31u) / 32u);
-    m_pageAllocator.getPageAllocationMask(0u, &stats.pageMasks.at(chunkStats.pageMaskOffset));
-  }
-
-
-  void DxvkMemoryChunk::mapChunk() {
-    if (m_memory.memPointer)
-      return;
-
-    auto vk = m_alloc->device()->vkd();
-
-    VkResult vr = vk->vkMapMemory(vk->device(), m_memory.memHandle,
-      0, m_memory.memSize, 0, &m_memory.memPointer);
-
-    if (vr != VK_SUCCESS)
-      throw DxvkError(str::format("Failed to map memory: ", vr));
-
-    Logger::debug(str::format("Mapped memory range 0x", std::hex,
-      reinterpret_cast<uintptr_t>(m_memory.memPointer), " - 0x",
-      reinterpret_cast<uintptr_t>(m_memory.memPointer) + m_memory.memSize));
-  }
-
-
-  void DxvkMemoryChunk::unmapChunk() {
-    if (!m_memory.memPointer)
-      return;
-
-    auto vk = m_alloc->device()->vkd();
-    vk->vkUnmapMemory(vk->device(), m_memory.memHandle);
-
-    Logger::debug(str::format("Unmapped memory range 0x", std::hex,
-      reinterpret_cast<uintptr_t>(m_memory.memPointer), " - 0x",
-      reinterpret_cast<uintptr_t>(m_memory.memPointer) + m_memory.memSize));
-
-    m_memory.memPointer = nullptr;
-  }




  DxvkMemoryAllocator::DxvkMemoryAllocator(DxvkDevice* device)
-  : m_device          (device),
-    m_memProps        (device->adapter()->memoryProperties()) {
-    for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++)
-      m_memHeaps[i].properties = m_memProps.memoryHeaps[i];
-    
-    for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) {
-      m_memTypes[i].heap       = &m_memHeaps[m_memProps.memoryTypes[i].heapIndex];
-      m_memTypes[i].heapId     = m_memProps.memoryTypes[i].heapIndex;
-      m_memTypes[i].memType    = m_memProps.memoryTypes[i];
-      m_memTypes[i].memTypeId  = i;
-      m_memTypes[i].chunkSize  = MinChunkSize;
-      m_memTypes[i].bufferUsage = 0;
+  : m_device(device) {
+    VkPhysicalDeviceMemoryProperties memInfo = device->adapter()->memoryProperties();
+
+    m_memTypeCount = memInfo.memoryTypeCount;
+    m_memHeapCount = memInfo.memoryHeapCount;
+
+    for (uint32_t i = 0; i < m_memHeapCount; i++) {
+      auto& heap = m_memHeaps[i];
+
+      heap.index = i;
+      heap.properties = memInfo.memoryHeaps[i];
    }

+    for (uint32_t i = 0; i < m_memTypeCount; i++) {
+      auto& type = m_memTypes[i];
+
+      type.index = i;
+      type.properties = memInfo.memoryTypes[i];
+      type.heap = &m_memHeaps[type.properties.heapIndex];
+      type.heap->memoryTypes |= 1u << i;
+
+      type.devicePool.maxChunkSize = determineMaxChunkSize(type, false);
+      type.mappedPool.maxChunkSize = determineMaxChunkSize(type, true);
+    }
+
+    determineMemoryTypesWithPropertyFlags();
+
    if (device->features().core.features.sparseBinding)
      m_sparseMemoryTypes = determineSparseMemoryTypes(device);

@ -204,220 +99,223 @@ namespace dxvk {
  
  
  DxvkMemoryAllocator::~DxvkMemoryAllocator() {
-    
+    auto vk = m_device->vkd();
+
+    for (uint32_t i = 0; i < m_memHeapCount; i++)
+      freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1));
  }
  
  
  DxvkMemory DxvkMemoryAllocator::alloc(
          DxvkMemoryRequirements            req,
-          DxvkMemoryProperties              info) {
-    std::lock_guard<dxvk::mutex> lock(m_mutex);
-
-    // If requested, try with a dedicated allocation first.
-    if (info.dedicated.image || info.dedicated.buffer) {
-      DxvkMemory result = this->tryAlloc(req, info);
-
-      if (result)
-        return result;
-    }
-
-    // If possible, retry without a dedicated allocation
-    if (!req.dedicated.requiresDedicatedAllocation) {
-      info.dedicated.image = VK_NULL_HANDLE;
-      info.dedicated.buffer = VK_NULL_HANDLE;
-
-      // If we're allocating tiled image memory, ensure
-      // that it will not overlap with buffer memory.
-      if (req.tiling == VK_IMAGE_TILING_OPTIMAL) {
-        VkDeviceSize granularity = m_device->properties().core.properties.limits.bufferImageGranularity;
-        req.core.memoryRequirements.size      = align(req.core.memoryRequirements.size,       granularity);
-        req.core.memoryRequirements.alignment = align(req.core.memoryRequirements.alignment,  granularity);
-      }
-
-      DxvkMemory result = this->tryAlloc(req, info);
-
-      if (result)
-        return result;
-    }
-
-    // If that still didn't work, probe slower memory types as well
-    const VkMemoryPropertyFlags optionalFlags =
-      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-      VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
-
-    if (info.flags & optionalFlags) {
-      info.flags &= ~optionalFlags;
-
-      DxvkMemory result = this->tryAlloc(req, info);
-
-      if (result)
-        return result;
-    }
-
-    // We weren't able to allocate memory for this resource form any type
-    this->logMemoryError(req.core.memoryRequirements);
-    this->logMemoryStats();
-
-    throw DxvkError("DxvkMemoryAllocator: Memory allocation failed");
-  }
-  
-  
-  DxvkMemory DxvkMemoryAllocator::tryAlloc(
-    const DxvkMemoryRequirements&           req,
    const DxvkMemoryProperties&             info) {
-    DxvkMemory result;
-
-    for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) {
-      const bool supported = (req.core.memoryRequirements.memoryTypeBits & (1u << i)) != 0;
-      const bool adequate  = (m_memTypes[i].memType.propertyFlags & info.flags) == info.flags;
-      
-      if (supported && adequate) {
-        result = this->tryAllocFromType(&m_memTypes[i],
-          req.core.memoryRequirements.size,
-          req.core.memoryRequirements.alignment,
-          info);
-      }
+    // Enforce that tiled images do not overlap with buffers in memory.
+    // Only changes anything for small images on older Nvidia hardware.
+    if (req.tiling == VK_IMAGE_TILING_OPTIMAL) {
+      req.core.memoryRequirements.alignment = std::max(req.core.memoryRequirements.alignment,
+        m_device->properties().core.properties.limits.bufferImageGranularity);
    }
-    
-    return result;
-  }
-  
-  
-  DxvkMemory DxvkMemoryAllocator::tryAllocFromType(
-          DxvkMemoryType*                   type,
-          VkDeviceSize                      size,
-          VkDeviceSize                      align,
-    const DxvkMemoryProperties&             info) {
-    constexpr VkDeviceSize DedicatedAllocationThreshold = 3;

-    VkDeviceSize chunkSize = pickChunkSize(type->memTypeId,
-      DedicatedAllocationThreshold * size);
+    // If requested, try to create a dedicated allocation. If this
+    // fails, we may still fall back to a suballocation unless a
+    // dedicated allocation is explicitly required.
+    if (unlikely(info.dedicated.buffer || info.dedicated.image)) {
+      DxvkMemory memory = allocateDedicatedMemory(
+        req.core.memoryRequirements, info.flags, &info.dedicated);

-    DxvkMemory memory;
+      if (memory)
+        return memory;

-    // Require dedicated allocations for resources that use the Vulkan dedicated
-    // allocation bits, or are too large to fit into a single full-sized chunk
-    bool needsDedicatedAlocation = size >= chunkSize || info.dedicated.buffer || info.dedicated.image;
-
-    // Prefer a dedicated allocation for very large resources in order to
-    // reduce fragmentation if a large number of those resources are in use
-    bool wantsDedicatedAllocation = DedicatedAllocationThreshold * size > chunkSize;
-
-    // Try to reuse existing memory as much as possible in case the heap is nearly full
-    bool heapBudgedExceeded = 5 * type->stats.memoryUsed + size > 4 * type->heap->properties.size;
-
-    if (!needsDedicatedAlocation && (!wantsDedicatedAllocation || heapBudgedExceeded)) {
-      // Attempt to suballocate from existing chunks first
-      for (uint32_t i = 0; i < type->chunks.size() && !memory; i++)
-        memory = type->chunks[i]->alloc(info.flags, size, align);
-
-      // If no existing chunk can accomodate the allocation, and if a dedicated
-      // allocation is not preferred, create a new chunk and suballocate from it
-      if (!memory && !wantsDedicatedAllocation) {
-        DxvkDeviceMemory devMem;
-
-        if (this->shouldFreeEmptyChunks(type->heapId, chunkSize))
-          this->freeEmptyChunks(type->heap);
-
-        for (uint32_t i = 0; i < 6 && (chunkSize >> i) >= size && !devMem.memHandle; i++)
-          devMem = tryAllocDeviceMemory(type, chunkSize >> i, info, true);
-
-        if (devMem.memHandle) {
-          Rc<DxvkMemoryChunk> chunk = new DxvkMemoryChunk(this, type, devMem);
-          memory = chunk->alloc(info.flags, size, align);
-
-          type->chunks.push_back(std::move(chunk));
-
-          adjustChunkSize(type->memTypeId, devMem.memSize);
-        }
+      if (req.dedicated.requiresDedicatedAllocation && (info.flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
+        return allocateDedicatedMemory(req.core.memoryRequirements,
+          info.flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &info.dedicated);
      }
    }

-    // If a dedicated allocation is required or preferred and we haven't managed
-    // to suballocate any memory before, try to create a dedicated allocation
-    if (!memory && (needsDedicatedAlocation || wantsDedicatedAllocation)) {
-      if (this->shouldFreeEmptyChunks(type->heapId, size))
-        this->freeEmptyChunks(type->heap);
+    // Suballocate memory from an existing chunk
+    DxvkMemory memory = allocateMemory(req.core.memoryRequirements, info.flags);

-      DxvkDeviceMemory devMem = this->tryAllocDeviceMemory(type, size, info, false);
-
-      if (devMem.memHandle != VK_NULL_HANDLE) {
-        memory = DxvkMemory(this, nullptr, type,
-          devMem.buffer, devMem.memHandle, 0, size, devMem.memPointer);
-      }
-    }
-
-    if (memory) {
-      type->stats.memoryUsed += memory.m_length;
-      m_device->notifyMemoryUse(type->heapId, memory.m_length);
+    if (unlikely(!memory) && (info.flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
+      memory = allocateMemory(req.core.memoryRequirements,
+        info.flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
    }

    return memory;
  }
-  
-  
-  DxvkDeviceMemory DxvkMemoryAllocator::tryAllocDeviceMemory(
-          DxvkMemoryType*                   type,
-          VkDeviceSize                      size,
-          DxvkMemoryProperties              info,
-          bool                              isChunk) {
-    auto vk = m_device->vkd();

-    bool useMemoryPriority = (info.flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
-                          && (m_device->features().extMemoryPriority.memoryPriority);

-    bool dedicated = info.dedicated.buffer || info.dedicated.image;
+  DxvkMemory DxvkMemoryAllocator::allocateMemory(
+    const VkMemoryRequirements&             requirements,
+          VkMemoryPropertyFlags             properties) {
+    std::lock_guard<dxvk::mutex> lock(m_mutex);

-    DxvkDeviceMemory result;
-    result.memSize  = size;
+    // Ensure the allocation size is also aligned
+    VkDeviceSize size = align(requirements.size, requirements.alignment);

-    VkMemoryAllocateFlagsInfo memoryFlags = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO };
-    memoryFlags.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
+    for (auto typeIndex : getMemoryTypeMask(requirements, properties)) {
+      auto& type = m_memTypes[typeIndex];

-    VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT };
-    priorityInfo.priority = (type->memType.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-      ? 0.0f : (dedicated ? 1.0f : 0.5f);
+      // Use correct memory pool depending on property flags. This way we avoid
+      // wasting address space on fallback allocations, or on UMA devices that
+      // only expose one memory type.
+      auto& selectedPool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+        ? type.mappedPool
+        : type.devicePool;

-    VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO };
-    memoryInfo.allocationSize   = size;
-    memoryInfo.memoryTypeIndex  = type->memTypeId;
+      // Always try to suballocate first, even if the allocation is
+      // very large. We will decide what to do if this fails.
+      int64_t address = selectedPool.alloc(size, requirements.alignment);

-    if (info.sharedExport.handleTypes)
-      info.sharedExport.pNext = std::exchange(memoryInfo.pNext, &info.sharedExport);
+      if (likely(address >= 0))
+        return createMemory(type, selectedPool, address, size);

-    if (info.sharedImportWin32.handleType)
-      info.sharedImportWin32.pNext = std::exchange(memoryInfo.pNext, &info.sharedImportWin32);
+      // If the memory type is host-visible, try to find an existing chunk
+      // in the other memory pool of the memory type and move over.
+      if (type.properties.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+        auto& oppositePool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+          ? type.devicePool
+          : type.mappedPool;

-    if (info.dedicated.buffer || info.dedicated.image)
-      info.dedicated.pNext = std::exchange(memoryInfo.pNext, &info.dedicated);
+        int32_t freeChunkIndex = findEmptyChunkInPool(oppositePool,
+          size, selectedPool.maxChunkSize);

-    if (useMemoryPriority)
-      priorityInfo.pNext = std::exchange(memoryInfo.pNext, &priorityInfo);
+        if (freeChunkIndex >= 0) {
+          uint32_t poolChunkIndex = selectedPool.pageAllocator.addChunk(oppositePool.chunks[freeChunkIndex].size);
+          selectedPool.chunks.resize(std::max<size_t>(selectedPool.chunks.size(), poolChunkIndex + 1u));
+          selectedPool.chunks[poolChunkIndex] = oppositePool.chunks[freeChunkIndex];

-    if (!info.dedicated.image && (type->bufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT))
-      memoryFlags.pNext = std::exchange(memoryInfo.pNext, &memoryFlags);
+          oppositePool.pageAllocator.removeChunk(freeChunkIndex);
+          oppositePool.chunks[freeChunkIndex] = DxvkDeviceMemory();

-    if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memHandle))
-      return DxvkDeviceMemory();
+          mapDeviceMemory(selectedPool.chunks[poolChunkIndex], properties);

-    if (!isChunk && (info.flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
-      VkResult vr = vk->vkMapMemory(vk->device(), result.memHandle,
-        0, result.memSize, 0, &result.memPointer);
+          address = selectedPool.alloc(size, requirements.alignment);

-      if (vr != VK_SUCCESS)
-        throw DxvkError(str::format("Failed to map memory: ", vr));
+          if (likely(address >= 0))
+            return createMemory(type, selectedPool, address, size);
+        }
+      }

-      Logger::debug(str::format("Mapped memory range 0x", std::hex,
-        reinterpret_cast<uintptr_t>(result.memPointer), " - 0x",
-        reinterpret_cast<uintptr_t>(result.memPointer) + result.memSize));
+      // If the allocation is very large, use a dedicated allocation instead
+      // of creating a new chunk. This way we avoid excessive fragmentation,
+      // especially when a multiple such resources are created at once.
+      if (size * MinResourcesPerChunk > selectedPool.maxChunkSize) {
+        DxvkDeviceMemory memory = allocateDeviceMemory(type, requirements.size, nullptr);
+
+        if (!memory.memory)
+          continue;
+
+        mapDeviceMemory(memory, properties);
+        return createMemory(type, memory);
+      }
+
+      // Try to allocate a new chunk that is large enough to hold
+      // multiple resources of the type we're tying to allocate.
+      VkDeviceSize desiredSize = selectedPool.nextChunkSize;
+
+      while (desiredSize < size * MinResourcesPerChunk)
+        desiredSize *= 2u;
+
+      if (allocateChunkInPool(type, selectedPool, properties, size, desiredSize)) {
+        address = selectedPool.alloc(size, requirements.alignment);
+        return createMemory(type, selectedPool, address, size);
+      }
    }

-    if (type->bufferUsage && isChunk) {
+    logMemoryError(requirements);
+    logMemoryStats();
+
+    return DxvkMemory();
+  }
+
+
+  DxvkMemory DxvkMemoryAllocator::allocateDedicatedMemory(
+    const VkMemoryRequirements&             requirements,
+          VkMemoryPropertyFlags             properties,
+    const void*                             next) {
+    std::lock_guard<dxvk::mutex> lock(m_mutex);
+
+    DxvkDeviceMemory memory = { };
+
+    for (auto typeIndex : getMemoryTypeMask(requirements, properties)) {
+      auto& type = m_memTypes[typeIndex];
+      memory = allocateDeviceMemory(type, requirements.size, next);
+
+      if (likely(memory.memory != VK_NULL_HANDLE)) {
+        mapDeviceMemory(memory, properties);
+        return createMemory(type, memory);
+      }
+    }
+
+    logMemoryError(requirements);
+    logMemoryStats();
+
+    return DxvkMemory();
+  }
+
+
+  DxvkDeviceMemory DxvkMemoryAllocator::allocateDeviceMemory(
+          DxvkMemoryType&       type,
+          VkDeviceSize          size,
+    const void*                 next) {
+    auto vk = m_device->vkd();
+
+    // Preemptively free some unused allocations to reduce memory waste
+    freeEmptyChunksInHeap(*type.heap, size);
+
+    VkMemoryAllocateInfo memoryInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, next };
+    memoryInfo.allocationSize = size;
+    memoryInfo.memoryTypeIndex = type.index;
+
+    // Decide on a memory priority based on the memory type and allocation properties
+    VkMemoryPriorityAllocateInfoEXT priorityInfo = { VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT };
+
+    if (type.properties.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
+      if (type.properties.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+        // BAR allocation. Give this a low priority since these are typically useful
+        // when when placed in system memory.
+        priorityInfo.priority = 0.0f;
+      } else if (next) {
+        // Dedicated allocation, may or may not be a shared resource. Assign this the
+        // highest priority since this is expected to be a high-bandwidth resource,
+        // such as a render target.
+        priorityInfo.priority = 1.0f;
+      } else {
+        // Standard priority for resource allocations
+        priorityInfo.priority = 0.5f;
+      }
+
+      if (m_device->features().extMemoryPriority.memoryPriority)
+        priorityInfo.pNext = std::exchange(memoryInfo.pNext, &priorityInfo);
+    }
+
+    // If buffers can be created on this memory type, also enable the device address bit
+    VkMemoryAllocateFlagsInfo memoryFlags = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO };
+
+    if (type.bufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+      memoryFlags.pNext = std::exchange(memoryInfo.pNext, &memoryFlags);
+      memoryFlags.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
+    }
+
+    // Try to allocate memory. If this fails, free any remaining
+    // unused memory from the heap and try again.
+    DxvkDeviceMemory result = { };
+    result.size = size;
+
+    if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory)) {
+      freeEmptyChunksInHeap(*type.heap, VkDeviceSize(-1));
+
+      if (vk->vkAllocateMemory(vk->device(), &memoryInfo, nullptr, &result.memory))
+        return DxvkDeviceMemory();
+    }
+
+    // Create global buffer if the allocation supports it
+    if (type.bufferUsage && !next) {
      VkBuffer buffer = VK_NULL_HANDLE;

      VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
-      bufferInfo.size = result.memSize;
-      bufferInfo.usage = type->bufferUsage;
+      bufferInfo.size = size;
+      bufferInfo.usage = type.bufferUsage;
      bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;

      VkResult status = vk->vkCreateBuffer(vk->device(), &bufferInfo, nullptr, &buffer);
@ -430,8 +328,8 @@ namespace dxvk {
        vk->vkGetBufferMemoryRequirements2(vk->device(), &memInfo, &requirements);

        if ((requirements.memoryRequirements.size == size)
-         && (requirements.memoryRequirements.memoryTypeBits & (1u << type->memTypeId))) {
-          status = vk->vkBindBufferMemory(vk->device(), buffer, result.memHandle, 0);
+         && (requirements.memoryRequirements.memoryTypeBits & (1u << type.index))) {
+          status = vk->vkBindBufferMemory(vk->device(), buffer, result.memory, 0);

          if (status == VK_SUCCESS)
            result.buffer = buffer;
@ -444,167 +342,286 @@ namespace dxvk {
      if (!result.buffer) {
        Logger::warn(str::format("Failed to create global buffer:",
          "\n  size:  ", std::dec, size,
-          "\n  usage: ", std::hex, type->bufferUsage,
-          "\n  type:  ", std::dec, type->memTypeId));
+          "\n  usage: ", std::hex, type.bufferUsage,
+          "\n  type:  ", std::dec, type.index));
      }
    }

-    type->stats.memoryAllocated += size;
-    m_device->notifyMemoryAlloc(type->heapId, size);
+    type.stats.memoryAllocated += size;
+
+    m_device->notifyMemoryAlloc(type.properties.heapIndex, size);
    return result;
  }


+  bool DxvkMemoryAllocator::allocateChunkInPool(
+          DxvkMemoryType&       type,
+          DxvkMemoryPool&       pool,
+          VkMemoryPropertyFlags properties,
+          VkDeviceSize          requiredSize,
+          VkDeviceSize          desiredSize) {
+    // Try to allocate device memory. If the allocation fails, retry with
+    // a smaller size until we reach a point where we cannot service the
+    // allocation.
+    DxvkDeviceMemory chunk = { };
+
+    while (!chunk.memory && desiredSize >= std::max(requiredSize, DxvkMemoryPool::MinChunkSize)) {
+      chunk = allocateDeviceMemory(type, desiredSize, nullptr);
+      desiredSize /= 2u;
+    }
+
+    if (!chunk.memory)
+      return false;
+
+    mapDeviceMemory(chunk, properties);
+
+    // If we expect the application to require more memory in the
+    // future, increase the chunk size for subsequent allocations.
+    if (pool.nextChunkSize < pool.maxChunkSize
+     && pool.nextChunkSize <= type.stats.memoryAllocated / 2u)
+      pool.nextChunkSize *= 2u;
+
+    // Add the newly created chunk to the pool
+    uint32_t chunkIndex = pool.pageAllocator.addChunk(chunk.size);
+
+    pool.chunks.resize(std::max<size_t>(pool.chunks.size(), chunkIndex + 1u));
+    pool.chunks[chunkIndex] = chunk;
+    return true;
+  }
+
+
+  DxvkMemory DxvkMemoryAllocator::createMemory(
+          DxvkMemoryType&       type,
+          DxvkMemoryPool&       pool,
+          VkDeviceSize          address,
+          VkDeviceSize          size) {
+    type.stats.memoryUsed += size;
+
+    m_device->notifyMemoryUse(type.properties.heapIndex, size);
+
+    uint32_t chunkIndex = address >> DxvkPageAllocator::ChunkAddressBits;
+    const auto& chunk = pool.chunks[chunkIndex];
+
+    void* mapPtr = chunk.mapPtr
+      ? reinterpret_cast<char*>(chunk.mapPtr) + (address & DxvkPageAllocator::ChunkAddressMask)
+      : nullptr;
+
+    return DxvkMemory(this, &type, chunk.buffer, chunk.memory, address, size, mapPtr);
+  }
+
+
+  DxvkMemory DxvkMemoryAllocator::createMemory(
+          DxvkMemoryType&       type,
+    const DxvkDeviceMemory&     memory) {
+    type.stats.memoryUsed += memory.size;
+
+    m_device->notifyMemoryUse(type.properties.heapIndex, memory.size);
+
+    return DxvkMemory(this, &type, memory.buffer, memory.memory,
+      DedicatedChunkAddress, memory.size, memory.mapPtr);
+  }
+
+
  void DxvkMemoryAllocator::free(
    const DxvkMemory&           memory) {
    std::lock_guard<dxvk::mutex> lock(m_mutex);
    memory.m_type->stats.memoryUsed -= memory.m_length;

-    if (memory.m_chunk != nullptr) {
-      this->freeChunkMemory(
-        memory.m_type,
-        memory.m_chunk,
-        memory.m_offset,
-        memory.m_length);
-    } else {
+    m_device->notifyMemoryUse(memory.m_type->properties.heapIndex, -memory.m_length);
+
+    if (unlikely(memory.m_address == DedicatedChunkAddress)) {
      DxvkDeviceMemory devMem;
-      devMem.buffer     = memory.m_buffer;
-      devMem.memHandle  = memory.m_memory;
-      devMem.memPointer = nullptr;
-      devMem.memSize    = memory.m_length;
-      this->freeDeviceMemory(memory.m_type, devMem);
-    }
+      devMem.buffer   = memory.m_buffer;
+      devMem.memory   = memory.m_memory;
+      devMem.mapPtr   = nullptr;
+      devMem.size     = memory.m_length;

-    m_device->notifyMemoryUse(memory.m_type->heapId, -memory.m_length);
-  }
+      this->freeDeviceMemory(*memory.m_type, devMem);
+    } else {
+      DxvkMemoryPool& pool = memory.m_mapPtr
+        ? memory.m_type->mappedPool
+        : memory.m_type->devicePool;

-  
-  void DxvkMemoryAllocator::freeChunkMemory(
-          DxvkMemoryType*       type,
-          DxvkMemoryChunk*      chunk,
-          VkDeviceSize          offset,
-          VkDeviceSize          length) {
-    chunk->free(offset, length);
-
-    if (chunk->isEmpty()) {
-      Rc<DxvkMemoryChunk> chunkRef = chunk;
-
-      // Free the chunk if we have to, or at least put it at the end of
-      // the list so that chunks that are already in use and cannot be
-      // freed are prioritized for allocations to reduce memory pressure.
-      type->chunks.erase(std::remove(type->chunks.begin(), type->chunks.end(), chunkRef));
-
-      if (!this->shouldFreeChunk(type, chunkRef))
-        type->chunks.push_back(std::move(chunkRef));
+      if (unlikely(pool.free(memory.m_address, memory.m_length)))
+        freeEmptyChunksInPool(*memory.m_type, pool, 0);
    }
  }
-  
+

  void DxvkMemoryAllocator::freeDeviceMemory(
-          DxvkMemoryType*       type,
+          DxvkMemoryType&       type,
          DxvkDeviceMemory      memory) {
    auto vk = m_device->vkd();
    vk->vkDestroyBuffer(vk->device(), memory.buffer, nullptr);
-    vk->vkFreeMemory(vk->device(), memory.memHandle, nullptr);
+    vk->vkFreeMemory(vk->device(), memory.memory, nullptr);

-    type->stats.memoryAllocated -= memory.memSize;
-    m_device->notifyMemoryAlloc(type->heapId, memory.memSize);
+    type.stats.memoryAllocated -= memory.size;
+    m_device->notifyMemoryAlloc(type.properties.heapIndex, memory.size);
  }


-  VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, VkDeviceSize requiredSize) const {
-    VkMemoryType type = m_memProps.memoryTypes[memTypeId];
-    VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex];
+  void DxvkMemoryAllocator::freeEmptyChunksInHeap(
+    const DxvkMemoryHeap&       heap,
+          VkDeviceSize          allocationSize) {
+    for (auto typeIndex : bit::BitMask(heap.memoryTypes)) {
+      auto& type = m_memTypes[typeIndex];

-    VkDeviceSize chunkSize = m_memTypes[memTypeId].chunkSize;
-
-    while (chunkSize < requiredSize && chunkSize < MaxChunkSize)
-      chunkSize <<= 1u;
-
-    // Try to waste a bit less system memory especially in
-    // 32-bit applications due to address space constraints
-    if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-      chunkSize = std::min<VkDeviceSize>((env::is32BitHostPlatform() ? 16 : 64) << 20, chunkSize);
-
-    // Reduce the chunk size on small heaps so
-    // we can at least fit in 15 allocations
-    while (chunkSize * 15 > heap.size)
-      chunkSize >>= 1;
-
-    return chunkSize;
-  }
-
-
-  void DxvkMemoryAllocator::adjustChunkSize(
-          uint32_t              memTypeId,
-          VkDeviceSize          allocatedSize) {
-    VkDeviceSize chunkSize = m_memTypes[memTypeId].chunkSize;
-
-    // Don't bump chunk size if we reached the maximum or if
-    // we already were unable to allocate a full chunk.
-    if (chunkSize <= allocatedSize && chunkSize <= m_memTypes[memTypeId].stats.memoryAllocated)
-      m_memTypes[memTypeId].chunkSize = pickChunkSize(memTypeId, chunkSize * 2);
-  }
-
-
-  bool DxvkMemoryAllocator::shouldFreeChunk(
-    const DxvkMemoryType*       type,
-    const Rc<DxvkMemoryChunk>&  chunk) const {
-    // Under memory pressure, we should start freeing everything.
-    if (this->shouldFreeEmptyChunks(type->heapId, 0))
-      return true;
-
-    // Free chunks that are below the current chunk size since it probably
-    // not going to be able to serve enough allocations to be useful.
-    if (chunk->size() < type->chunkSize)
-      return true;
-
-    // Only keep a small number of chunks of each type around to save memory.
-    uint32_t numEmptyChunks = 0;
-
-    for (const auto& c : type->chunks) {
-      if (c != chunk && c->isEmpty())
-        numEmptyChunks += 1;
+      freeEmptyChunksInPool(type, type.devicePool, allocationSize);
+      freeEmptyChunksInPool(type, type.mappedPool, allocationSize);
    }
-
-    // Be a bit more lenient on system memory since data uploads may otherwise
-    // lead to a large number of allocations and deallocations at runtime.
-    uint32_t maxEmptyChunks = env::is32BitHostPlatform() ? 2 : 4;
-
-    if ((type->memType.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
-     || !(type->memType.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
-      maxEmptyChunks = 1;
-
-    return numEmptyChunks >= maxEmptyChunks;
  }


-  bool DxvkMemoryAllocator::shouldFreeEmptyChunks(
-          uint32_t              heapIndex,
-          VkDeviceSize          allocationSize) const {
-    VkDeviceSize budget = (m_memHeaps[heapIndex].properties.size * 4) / 5;
+  void DxvkMemoryAllocator::freeEmptyChunksInPool(
+          DxvkMemoryType&       type,
+          DxvkMemoryPool&       pool,
+          VkDeviceSize          allocationSize) {
+    // Allow for one unused max-size chunk on device-local memory types.
+    // For system memory allocations, we need to be more lenient since
+    // applications will frequently allocate staging buffers.
+    VkDeviceSize maxUnusedMemory = pool.maxChunkSize;

-    DxvkMemoryStats stats = getMemoryStats(heapIndex);
-    return stats.memoryAllocated + allocationSize > budget;
-  }
+    if (!(type.properties.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+     && (type.properties.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+     && (&pool == &type.mappedPool))
+      maxUnusedMemory *= env::is32BitHostPlatform() ? 2u : 4u;

+    VkDeviceSize heapBudget = (type.heap->properties.size * 4) / 5;
+    VkDeviceSize heapAllocated = getMemoryStats(type.heap->index).memoryAllocated;

-  void DxvkMemoryAllocator::freeEmptyChunks(
-    const DxvkMemoryHeap*       heap) {
-    for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) {
-      DxvkMemoryType* type = &m_memTypes[i];
+    VkDeviceSize unusedMemory = 0u;

-      if (type->heap != heap)
+    for (uint32_t i = 0; i < pool.chunks.size(); i++) {
+      DxvkDeviceMemory chunk = pool.chunks[i];
+
+      if (!chunk.memory || pool.pageAllocator.pagesUsed(i))
        continue;

-      type->chunks.erase(
-        std::remove_if(type->chunks.begin(), type->chunks.end(),
-          [] (const Rc<DxvkMemoryChunk>& chunk) { return chunk->isEmpty(); }),
-        type->chunks.end());
+      // Free the chunk if it is smaller than the current chunk size of
+      // the pool, since it is unlikely to be useful for future allocations.
+      // Also free if the pending allocation would exceed the heap budget.
+      bool shouldFree = chunk.size < pool.nextChunkSize
+        || allocationSize + heapAllocated > heapBudget
+        || allocationSize > heapBudget;
+
+      // If we don't free the chunk under these conditions, count it towards
+      // unused memory in the current memory pool. Once we exceed the limit,
+      // free any empty chunk we encounter.
+      if (!shouldFree) {
+        unusedMemory += chunk.size;
+        shouldFree = unusedMemory > maxUnusedMemory;
+      }
+
+      if (shouldFree) {
+        freeDeviceMemory(type, chunk);
+        heapAllocated -= chunk.size;
+
+        pool.chunks[i] = DxvkDeviceMemory();
+        pool.pageAllocator.removeChunk(i);
+      }
    }
  }


+  int32_t DxvkMemoryAllocator::findEmptyChunkInPool(
+    const DxvkMemoryPool&       pool,
+          VkDeviceSize          minSize,
+          VkDeviceSize          maxSize) const {
+    for (uint32_t i = 0; i < pool.chunks.size(); i++) {
+      if (pool.chunks[i].memory
+       && pool.chunks[i].size >= minSize
+       && pool.chunks[i].size <= maxSize
+       && !pool.pageAllocator.pagesUsed(i))
+        return int32_t(i);
+    }
+
+    return -1;
+  }
+
+
+  void DxvkMemoryAllocator::mapDeviceMemory(
+          DxvkDeviceMemory&     memory,
+          VkMemoryPropertyFlags properties) {
+    if (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+      if (memory.mapPtr)
+        return;
+
+      auto vk = m_device->vkd();
+
+      VkResult vr = vk->vkMapMemory(vk->device(),
+        memory.memory, 0, memory.size, 0, &memory.mapPtr);
+
+      if (vr != VK_SUCCESS) {
+        throw DxvkError(str::format("Failed to map Vulkan memory: ", vr,
+          "\n  size: ", memory.size, " bytes"));
+      }
+
+      Logger::debug(str::format("Mapped memory region 0x", std::hex,
+        reinterpret_cast<uintptr_t>(memory.mapPtr), " - 0x",
+        reinterpret_cast<uintptr_t>(memory.mapPtr) + memory.size - 1u));
+    } else {
+      if (!memory.mapPtr)
+        return;
+
+      auto vk = m_device->vkd();
+      vk->vkUnmapMemory(vk->device(), memory.memory);
+
+      Logger::debug(str::format("Unmapped memory region 0x", std::hex,
+        reinterpret_cast<uintptr_t>(memory.mapPtr), " - 0x",
+        reinterpret_cast<uintptr_t>(memory.mapPtr) + memory.size - 1u));
+
+      memory.mapPtr = nullptr;
+    }
+  }
+
+
+  void DxvkMemoryAllocator::getAllocationStatsForPool(
+    const DxvkMemoryType&       type,
+    const DxvkMemoryPool&       pool,
+          DxvkMemoryAllocationStats& stats) {
+    auto& typeStats = stats.memoryTypes[type.index];
+
+    for (uint32_t i = 0; i < pool.chunks.size(); i++) {
+      if (!pool.chunks[i].memory)
+        continue;
+
+      typeStats.chunkCount += 1u;
+
+      auto& chunkStats = stats.chunks.emplace_back();
+      chunkStats.capacity = pool.chunks[i].size;
+      chunkStats.used = pool.pageAllocator.pagesUsed(i) * DxvkPageAllocator::PageSize;
+      chunkStats.pageMaskOffset = stats.pageMasks.size();
+      chunkStats.pageCount = pool.pageAllocator.pageCount(i);
+
+      size_t maskCount = (chunkStats.pageCount + 31u) / 32u;
+      stats.pageMasks.resize(chunkStats.pageMaskOffset + maskCount);
+
+      pool.pageAllocator.getPageAllocationMask(i, &stats.pageMasks[chunkStats.pageMaskOffset]);
+    }
+  }
+
+
+  VkDeviceSize DxvkMemoryAllocator::determineMaxChunkSize(
+    const DxvkMemoryType&       type,
+          bool                  mappable) const {
+    VkDeviceSize size = DxvkMemoryPool::MaxChunkSize;
+
+    // Prefer smaller chunks for host-visible allocations in order to
+    // reduce the amount of address space required. We compensate for
+    // the smaller size by allowing more unused memory on these heaps.
+    if (mappable)
+      size /= env::is32BitHostPlatform() ? 16u : 4u;
+
+    // Ensure that we can at least do 15 allocations to fill
+    // the heap. Might be useful on systems with small BAR.
+    while (15u * size > type.heap->properties.size)
+      size /= 2u;
+
+    // Always use at least the minimum chunk size
+    return std::max(size, DxvkMemoryPool::MinChunkSize);
+  }
+
+
  uint32_t DxvkMemoryAllocator::determineSparseMemoryTypes(
          DxvkDevice*           device) const {
    auto vk = device->vkd();
@ -706,7 +723,7 @@ namespace dxvk {
        while (typeMask) {
          uint32_t type = bit::tzcnt(typeMask);

-          if (type < m_memProps.memoryTypeCount)
+          if (type < m_memTypeCount)
            m_memTypes.at(type).bufferUsage |= bufferInfo.usage;

          typeMask &= typeMask - 1;
@ -718,7 +735,7 @@ namespace dxvk {

    // Only use a minimal set of usage flags for the global buffer if the
    // full combination of flags is not supported for whatever reason.
-    for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) {
+    for (uint32_t i = 0; i < m_memTypeCount; i++) {
      bufferInfo.usage = m_memTypes[i].bufferUsage;

      if (!getBufferMemoryRequirements(bufferInfo, requirements)
@ -731,14 +748,55 @@ namespace dxvk {
  }


+  void DxvkMemoryAllocator::determineMemoryTypesWithPropertyFlags() {
+    // Initialize look-up table for memory type masks based on required property
+    // flags. This lets us avoid iterating over unsupported memory types
+    for (uint32_t i = 0; i < m_memTypesByPropertyFlags.size(); i++) {
+      VkMemoryPropertyFlags flags = VkMemoryPropertyFlags(i);
+      uint32_t mask = 0u;
+
+      for (uint32_t j = 0; j < m_memTypeCount; j++) {
+        VkMemoryPropertyFlags typeFlags = m_memTypes[j].properties.propertyFlags;
+
+        if ((typeFlags & flags) != flags)
+          continue;
+
+        // Do not include device-local memory types if a non-device
+        // local one exists with the same required propery flags.
+        if (mask && !(flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+         && (typeFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
+          continue;
+
+        mask |= 1u << j;
+      }
+
+      m_memTypesByPropertyFlags[i] = mask;
+    }
+
+    // If there is no cached coherent memory type, reuse the uncached
+    // one. This is likely slow, but API front-ends are relying on it.
+    uint32_t hostCachedIndex = uint32_t(
+      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+      VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+
+    uint32_t hostCoherentIndex = uint32_t(
+      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+
+    if (!m_memTypesByPropertyFlags[hostCachedIndex])
+      m_memTypesByPropertyFlags[hostCachedIndex] = m_memTypesByPropertyFlags[hostCoherentIndex];
+  }
+
+
  DxvkMemoryStats DxvkMemoryAllocator::getMemoryStats(uint32_t heap) const {
    DxvkMemoryStats result = { };

-    for (size_t i = 0; i < m_memProps.memoryTypeCount; i++) {
-      if (m_memTypes[i].heapId == heap) {
-        result.memoryAllocated += m_memTypes[i].stats.memoryAllocated;
-        result.memoryUsed += m_memTypes[i].stats.memoryUsed;
-      }
+    for (auto typeIndex : bit::BitMask(m_memHeaps[heap].memoryTypes)) {
+      const auto& type = m_memTypes[typeIndex];
+
+      result.memoryAllocated += type.stats.memoryAllocated;
+      result.memoryUsed += type.stats.memoryUsed;
    }

    return result;
@ -751,17 +809,18 @@ namespace dxvk {
    stats.chunks.clear();
    stats.pageMasks.clear();

-    for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) {
-      const auto& type = m_memTypes[i];
+    for (uint32_t i = 0; i < m_memTypeCount; i++) {
+      const auto& typeInfo = m_memTypes[i];
+      auto& typeStats = stats.memoryTypes[i];

-      stats.memoryTypes[i].properties = type.memType;
-      stats.memoryTypes[i].allocated = type.stats.memoryAllocated;
-      stats.memoryTypes[i].used = type.stats.memoryUsed;
-      stats.memoryTypes[i].chunkIndex = stats.chunks.size();
-      stats.memoryTypes[i].chunkCount = type.chunks.size();
+      typeStats.properties = typeInfo.properties;
+      typeStats.allocated = typeInfo.stats.memoryAllocated;
+      typeStats.used = typeInfo.stats.memoryUsed;
+      typeStats.chunkIndex = stats.chunks.size();
+      typeStats.chunkCount = 0u;

-      for (const auto& chunk : type.chunks)
-        chunk->getAllocationStats(stats);
+      getAllocationStatsForPool(typeInfo, typeInfo.devicePool, stats);
+      getAllocationStatsForPool(typeInfo, typeInfo.mappedPool, stats);
    }
  }

@ -845,7 +904,7 @@ namespace dxvk {
    std::stringstream sstr;
    sstr << "Heap  Size (MiB)  Allocated   Used        Reserved    Budget" << std::endl;

-    for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) {
+    for (uint32_t i = 0; i < m_memHeapCount; i++) {
      DxvkMemoryStats stats = getMemoryStats(i);

      sstr << std::setw(2) << i << ":   "
@ -864,4 +923,14 @@ namespace dxvk {
    Logger::err(sstr.str());
  }

+
+  bit::BitMask DxvkMemoryAllocator::getMemoryTypeMask(
+    const VkMemoryRequirements&             requirements,
+          VkMemoryPropertyFlags             properties) const {
+    uint32_t mask = requirements.memoryTypeBits;
+    mask &= m_memTypesByPropertyFlags[uint32_t(properties) % uint32_t(m_memTypesByPropertyFlags.size())];
+
+    return bit::BitMask(mask);
+  }
+
 }
--- a/src/dxvk/dxvk_memory.h
+++ b/src/dxvk/dxvk_memory.h
@ -50,13 +50,51 @@ namespace dxvk {
   * be persistently mapped.
   */
  struct DxvkDeviceMemory {
-    VkBuffer              buffer     = VK_NULL_HANDLE;
-    VkDeviceMemory        memHandle  = VK_NULL_HANDLE;
-    void*                 memPointer = nullptr;
-    VkDeviceSize          memSize    = 0;
+    VkBuffer              buffer  = VK_NULL_HANDLE;
+    VkDeviceMemory        memory  = VK_NULL_HANDLE;
+    VkDeviceSize          size    = 0;
+    void*                 mapPtr  = nullptr;
  };

  
+  /**
+   * \brief Memory pool
+   *
+   * Stores a list of memory chunks, as well as an allocator
+   * over the entire pool.
+   */
+  struct DxvkMemoryPool {
+    constexpr static VkDeviceSize MaxChunkSize = DxvkPageAllocator::MaxChunkSize;
+    constexpr static VkDeviceSize MinChunkSize = MaxChunkSize / 64u;
+
+    /// Backing storage for allocated memory chunks
+    std::vector<DxvkDeviceMemory> chunks;
+    /// Memory allocator covering the entire memory pool
+    DxvkPageAllocator pageAllocator;
+    /// Pool allocator that sits on top of the page allocator
+    DxvkPoolAllocator poolAllocator = { pageAllocator };
+    /// Minimum desired allocation size for the next chunk.
+    /// Always a power of two.
+    VkDeviceSize nextChunkSize = MinChunkSize;
+    /// Maximum chunk size for the memory pool. Hard limit.
+    VkDeviceSize maxChunkSize = MaxChunkSize;
+
+    force_inline int64_t alloc(uint64_t size, uint64_t align) {
+      if (size <= DxvkPoolAllocator::MaxSize)
+        return poolAllocator.alloc(size);
+      else
+        return pageAllocator.alloc(size, align);
+    }
+
+    force_inline bool free(uint64_t address, uint64_t size) {
+      if (size <= DxvkPoolAllocator::MaxSize)
+        return poolAllocator.free(address, size);
+      else
+        return pageAllocator.free(address, size);
+    }
+  };
+
+
  /**
   * \brief Memory heap
   * 
@ -64,6 +102,8 @@ namespace dxvk {
   * its properties as well as allocation statistics.
   */
  struct DxvkMemoryHeap {
+    uint32_t          index         = 0u;
+    uint32_t          memoryTypes   = 0u;
    VkMemoryHeap      properties    = { };
  };

@ -76,21 +116,20 @@ namespace dxvk {
   * this memory type.
   */
  struct DxvkMemoryType {
-    DxvkMemoryHeap*   heap          = nullptr;
-    uint32_t          heapId        = 0u;
+    uint32_t          index         = 0u;
+    VkMemoryType      properties    = { };

-    VkMemoryType      memType       = { };
-    uint32_t          memTypeId     = 0u;
+    DxvkMemoryHeap*   heap          = nullptr;

    DxvkMemoryStats   stats         = { };

-    VkDeviceSize      chunkSize     = 0u;
    VkBufferUsageFlags bufferUsage  = 0u;

-    std::vector<Rc<DxvkMemoryChunk>> chunks;
+    DxvkMemoryPool    devicePool;
+    DxvkMemoryPool    mappedPool;
  };
-  
-  
+
+
  /**
   * \brief Memory type statistics
   */
@ -147,11 +186,10 @@ namespace dxvk {
    DxvkMemory();
    DxvkMemory(
      DxvkMemoryAllocator*  alloc,
-      DxvkMemoryChunk*      chunk,
      DxvkMemoryType*       type,
      VkBuffer              buffer,
      VkDeviceMemory        memory,
-      VkDeviceSize          offset,
+      VkDeviceSize          address,
      VkDeviceSize          length,
      void*                 mapPtr);
    DxvkMemory             (DxvkMemory&& other);
@ -187,7 +225,7 @@ namespace dxvk {
     * \returns Offset into device memory
     */
    VkDeviceSize offset() const {
-      return m_offset;
+      return m_address & DxvkPageAllocator::ChunkAddressMask;
    }
    
    /**
@ -230,101 +268,16 @@ namespace dxvk {
  private:
    
    DxvkMemoryAllocator*  m_alloc  = nullptr;
-    DxvkMemoryChunk*      m_chunk  = nullptr;
    DxvkMemoryType*       m_type   = nullptr;
    VkBuffer              m_buffer = VK_NULL_HANDLE;
    VkDeviceMemory        m_memory = VK_NULL_HANDLE;
-    VkDeviceSize          m_offset = 0;
+    VkDeviceSize          m_address = 0;
    VkDeviceSize          m_length = 0;
    void*                 m_mapPtr = nullptr;
    
    void free();
    
  };
-  
-  
-  /**
-   * \brief Memory chunk
-   * 
-   * A single chunk of memory that provides a
-   * sub-allocator. This is not thread-safe.
-   */
-  class DxvkMemoryChunk : public RcObject {
-    
-  public:
-    
-    DxvkMemoryChunk(
-            DxvkMemoryAllocator*  alloc,
-            DxvkMemoryType*       type,
-            DxvkDeviceMemory      memory);
-    
-    ~DxvkMemoryChunk();
-
-    /**
-     * \brief Queries chunk size
-     * \returns Chunk size
-     */
-    VkDeviceSize size() const {
-      return m_memory.memSize;
-    }
-
-    /**
-     * \brief Allocates memory from the chunk
-     * 
-     * On failure, this returns a slice with
-     * \c VK_NULL_HANDLE as the memory handle.
-     * \param [in] flags Requested memory type flags
-     * \param [in] size Number of bytes to allocate
-     * \param [in] align Required alignment
-     * \param [in] hints Memory category
-     * \returns The allocated memory slice
-     */
-    DxvkMemory alloc(
-            VkMemoryPropertyFlags flags,
-            VkDeviceSize          size,
-            VkDeviceSize          align);
-    
-    /**
-     * \brief Frees memory
-     * 
-     * Returns a slice back to the chunk.
-     * Called automatically when a memory
-     * slice runs out of scope.
-     * \param [in] offset Slice offset
-     * \param [in] length Slice length
-     */
-    void free(
-            VkDeviceSize  offset,
-            VkDeviceSize  length);
-
-    /**
-     * \brief Checks whether the chunk is being used
-     * \returns \c true if there are no allocations left
-     */
-    bool isEmpty() const;
-
-    /**
-     * \brief Retrieves allocation stats for this chunk
-     *
-     * Adds overall stats and the page mask to the given structure.
-     * \param [out] stats Allocation stats
-     */
-    void getAllocationStats(DxvkMemoryAllocationStats& stats) const;
-
-  private:
-    
-    DxvkMemoryAllocator*  m_alloc;
-    DxvkMemoryType*       m_type;
-    DxvkDeviceMemory      m_memory;
-
-    DxvkPageAllocator     m_pageAllocator;
-    DxvkPoolAllocator     m_poolAllocator;
-
-    void mapChunk();
-
-    void unmapChunk();
-
-  };


  /**
@ -358,10 +311,14 @@ namespace dxvk {
    friend class DxvkMemory;
    friend class DxvkMemoryChunk;

+    constexpr static uint64_t DedicatedChunkAddress = 1ull << 63u;
+
    constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10;

    constexpr static VkDeviceSize MinChunkSize =   4ull << 20;
    constexpr static VkDeviceSize MaxChunkSize = 256ull << 20;
+
+    constexpr static VkDeviceSize MinResourcesPerChunk = 4u;
  public:
    
    DxvkMemoryAllocator(DxvkDevice* device);
@ -381,15 +338,44 @@ namespace dxvk {

    /**
     * \brief Allocates device memory
-     * 
+     *
+     * Legacy interface for memory allocation, to be removed.
     * \param [in] req Memory requirements
     * \param [in] info Memory properties
     * \returns Allocated memory slice
     */
    DxvkMemory alloc(
            DxvkMemoryRequirements            req,
-            DxvkMemoryProperties              info);
-    
+      const DxvkMemoryProperties&             info);
+
+    /**
+     * \brief Allocates memory for a regular resource
+     *
+     * This method should be used when a dedicated allocation is
+     * not required. Very large resources may still be placed in
+     * a dedicated allocation.
+     * \param [in] requirements Memory requirements
+     * \param [in] properties Memory property flags. Some of
+     *    these may be ignored in case of memory pressure.
+     */
+    DxvkMemory allocateMemory(
+      const VkMemoryRequirements&             requirements,
+            VkMemoryPropertyFlags             properties);
+
+    /**
+     * \brief Allocates memory for a resource
+     *
+     * Will always create a dedicated allocation.
+     * \param [in] requirements Memory requirements
+     * \param [in] properties Memory property flags. Some of
+     *    these may be ignored in case of memory pressure.
+     * \param [in] next Further memory properties
+     */
+    DxvkMemory allocateDedicatedMemory(
+      const VkMemoryRequirements&             requirements,
+            VkMemoryPropertyFlags             properties,
+      const void*                             next);
+
    /**
     * \brief Queries memory stats
     * 
@ -435,73 +421,101 @@ namespace dxvk {

  private:

-    DxvkDevice*                                     m_device;
-    VkPhysicalDeviceMemoryProperties                m_memProps;
-    
-    dxvk::mutex                                     m_mutex;
-    std::array<DxvkMemoryHeap, VK_MAX_MEMORY_HEAPS> m_memHeaps = { };
+    DxvkDevice* m_device;
+
+    dxvk::mutex m_mutex;
+
+    uint32_t m_memTypeCount = 0u;
+    uint32_t m_memHeapCount = 0u;
+
    std::array<DxvkMemoryType, VK_MAX_MEMORY_TYPES> m_memTypes = { };
+    std::array<DxvkMemoryHeap, VK_MAX_MEMORY_HEAPS> m_memHeaps = { };

    uint32_t m_sparseMemoryTypes = 0u;

-    DxvkMemory tryAlloc(
-      const DxvkMemoryRequirements&           req,
-      const DxvkMemoryProperties&             info);
-    
-    DxvkMemory tryAllocFromType(
-            DxvkMemoryType*                   type,
-            VkDeviceSize                      size,
-            VkDeviceSize                      align,
-      const DxvkMemoryProperties&             info);
-    
-    DxvkDeviceMemory tryAllocDeviceMemory(
-            DxvkMemoryType*                   type,
-            VkDeviceSize                      size,
-            DxvkMemoryProperties              info,
-            bool                              isChunk);
-    
+    std::array<uint32_t, 16> m_memTypesByPropertyFlags = { };
+
+    DxvkDeviceMemory allocateDeviceMemory(
+            DxvkMemoryType&       type,
+            VkDeviceSize          size,
+      const void*                 next);
+
+    bool allocateChunkInPool(
+            DxvkMemoryType&       type,
+            DxvkMemoryPool&       pool,
+            VkMemoryPropertyFlags properties,
+            VkDeviceSize          requiredSize,
+            VkDeviceSize          desiredSize);
+
+    DxvkMemory createMemory(
+            DxvkMemoryType&       type,
+            DxvkMemoryPool&       pool,
+            VkDeviceSize          address,
+            VkDeviceSize          size);
+
    void free(
      const DxvkMemory&           memory);
-    
-    void freeChunkMemory(
-            DxvkMemoryType*       type,
-            DxvkMemoryChunk*      chunk,
-            VkDeviceSize          offset,
-            VkDeviceSize          length);
-    
+
    void freeDeviceMemory(
-            DxvkMemoryType*       type,
+            DxvkMemoryType&       type,
            DxvkDeviceMemory      memory);
    
-    VkDeviceSize pickChunkSize(
-            uint32_t              memTypeId,
-            VkDeviceSize          requiredSize) const;
+    uint32_t countEmptyChunksInPool(
+      const DxvkMemoryPool&       pool) const;

-    void adjustChunkSize(
-            uint32_t              memTypeId,
-            VkDeviceSize          allocatedSize);
+    void freeEmptyChunksInHeap(
+      const DxvkMemoryHeap&       heap,
+            VkDeviceSize          allocationSize);

-    bool shouldFreeChunk(
-      const DxvkMemoryType*       type,
-      const Rc<DxvkMemoryChunk>&  chunk) const;
+    void freeEmptyChunksInPool(
+            DxvkMemoryType&       type,
+            DxvkMemoryPool&       pool,
+            VkDeviceSize          allocationSize);

-    bool shouldFreeEmptyChunks(
-            uint32_t              heapIndex,
-            VkDeviceSize          allocationSize) const;
+    int32_t findEmptyChunkInPool(
+      const DxvkMemoryPool&       pool,
+            VkDeviceSize          minSize,
+            VkDeviceSize          maxSize) const;

-    void freeEmptyChunks(
-      const DxvkMemoryHeap*       heap);
+    void mapDeviceMemory(
+            DxvkDeviceMemory&     memory,
+            VkMemoryPropertyFlags properties);
+
+    DxvkMemory createMemory(
+            DxvkMemoryType&       type,
+      const DxvkMemoryPool&       pool,
+            VkDeviceSize          address,
+            VkDeviceSize          size);
+
+    DxvkMemory createMemory(
+            DxvkMemoryType&       type,
+      const DxvkDeviceMemory&     memory);
+
+    void getAllocationStatsForPool(
+      const DxvkMemoryType&       type,
+      const DxvkMemoryPool&       pool,
+            DxvkMemoryAllocationStats& stats);
+
+    VkDeviceSize determineMaxChunkSize(
+      const DxvkMemoryType&       type,
+            bool                  mappable) const;

    uint32_t determineSparseMemoryTypes(
            DxvkDevice*           device) const;

    void determineBufferUsageFlagsPerMemoryType();

+    void determineMemoryTypesWithPropertyFlags();
+
    void logMemoryError(
      const VkMemoryRequirements& req) const;

    void logMemoryStats() const;

+    bit::BitMask getMemoryTypeMask(
+      const VkMemoryRequirements&             requirements,
+            VkMemoryPropertyFlags             properties) const;
+
  };
  
 }