diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp
index 08f3c3db4..ef9624f21 100644
--- a/src/dxvk/dxvk_memory.cpp
+++ b/src/dxvk/dxvk_memory.cpp
@@ -169,6 +169,14 @@ namespace dxvk {
   }
 
 
+  void DxvkResourceAllocation::destroyBufferViews() {
+    if (m_bufferViews) {
+      delete m_bufferViews;
+      m_bufferViews = nullptr;
+    }
+  }
+
+
 
 
   DxvkResourceAllocationPool::DxvkResourceAllocationPool() {
@@ -231,9 +239,9 @@ namespace dxvk {
   uint32_t DxvkLocalAllocationCache::computePreferredAllocationCount(
           VkDeviceSize                size) {
     uint32_t poolIndex = computePoolIndex(size);
-    uint32_t count = (DxvkPageAllocator::PageSize / MinSize) >> poolIndex;
+    uint32_t count = (PoolCapacityInBytes / MinSize) >> poolIndex;
 
-    return std::max(count, MinAllocationCountPerPool);
+    return std::max(count, 1u);
   }
 
 
@@ -251,6 +259,95 @@ namespace dxvk {
 
 
 
+  DxvkSharedAllocationCache::DxvkSharedAllocationCache(
+          DxvkMemoryAllocator*        allocator)
+  : m_allocator(allocator) {
+    for (uint32_t i = 0; i < m_pools.size(); i++) {
+      VkDeviceSize size = DxvkLocalAllocationCache::computeAllocationSize(i);
+      m_freeLists[i].capacity = DxvkLocalAllocationCache::computePreferredAllocationCount(size);
+    }
+  }
+
+
+  DxvkSharedAllocationCache::~DxvkSharedAllocationCache() {
+    for (const auto& freeList : m_freeLists)
+      m_allocator->freeCachedAllocations(freeList.head);
+
+    for (const auto& pool : m_pools) {
+      for (auto list : pool.lists)
+        m_allocator->freeCachedAllocations(list);
+    }
+  }
+
+
+  DxvkResourceAllocation* DxvkSharedAllocationCache::getAllocationList(
+          VkDeviceSize                allocationSize) {
+    uint32_t poolIndex = DxvkLocalAllocationCache::computePoolIndex(allocationSize);
+
+    // If there's a list ready for us, take the whole thing
+    std::unique_lock poolLock(m_poolMutex);
+    auto& pool = m_pools[poolIndex];
+
+    if (!pool.listCount)
+      return nullptr;
+
+    if (!(--pool.listCount))
+      pool.drainTime = high_resolution_clock::now();
+
+    return std::exchange(pool.lists[pool.listCount], nullptr);
+  }
+
+
+  DxvkResourceAllocation* DxvkSharedAllocationCache::freeAllocation(
+          DxvkResourceAllocation*     allocation) {
+    uint32_t poolIndex = DxvkLocalAllocationCache::computePoolIndex(allocation->m_size);
+
+    { std::unique_lock freeLock(m_freeMutex);
+      auto& list = m_freeLists[poolIndex];
+
+      allocation->m_next = list.head;
+      list.head = allocation;
+
+      if (++list.size < list.capacity)
+        return nullptr;
+
+      // Free list is full, try to add it to the list array
+      // so that subsequent allocations can use it.
+      list.head = nullptr;
+      list.size = 0u;
+    }
+
+    // Add free list to the pool if possible.
+    { std::unique_lock poolLock(m_poolMutex);
+      auto& pool = m_pools[poolIndex];
+
+      if (likely(pool.listCount < PoolSize)) {
+        pool.lists[pool.listCount++] = allocation;
+        return nullptr;
+      }
+
+      // If the pool is full, destroy the entire free list
+      return allocation;
+    }
+  }
+
+
+  void DxvkSharedAllocationCache::cleanupUnusedFromLockedAllocator(
+          high_resolution_clock::time_point time) {
+    std::unique_lock poolLock(m_poolMutex);
+
+    for (auto& pool : m_pools) {
+      if (pool.listCount && time - pool.drainTime >= std::chrono::seconds(1u)) {
+        m_allocator->freeCachedAllocationsLocked(std::exchange(
+          pool.lists[--pool.listCount], nullptr));
+        pool.drainTime = time;
+      }
+    }
+  }
+
+
+
+
   DxvkMemoryAllocator::DxvkMemoryAllocator(DxvkDevice* device)
   : m_device(device) {
     VkPhysicalDeviceMemoryProperties memInfo = device->adapter()->memoryProperties();
@@ -299,6 +396,14 @@ namespace dxvk {
 
     m_worker.join();
 
+    // Destroy shared caches so that any allocations
+    // that are still alive get returned to the device
+    for (uint32_t i = 0; i < m_memTypeCount; i++) {
+      if (m_memTypes[i].sharedCache)
+        delete m_memTypes[i].sharedCache;
+    }
+
+    // Now that no allocations are alive, we can free chunks
     for (uint32_t i = 0; i < m_memHeapCount; i++)
       freeEmptyChunksInHeap(m_memHeaps[i], VkDeviceSize(-1), high_resolution_clock::time_point());
   }
@@ -485,8 +590,11 @@ namespace dxvk {
       if (likely(memoryRequirements.memoryTypeBits)) {
         // If the given allocation cache supports the memory types and usage
         // flags that we need, try to use it to service this allocation.
+        // Only use the allocation cache for mappable allocations since those
+        // are expected to happen frequently.
         if (allocationCache && createInfo.size <= DxvkLocalAllocationCache::MaxSize
-         && allocationCache->m_memoryTypes && !(allocationCache->m_memoryTypes & ~memoryRequirements.memoryTypeBits)) {
+         && allocationCache->m_memoryTypes && !(allocationCache->m_memoryTypes & ~memoryRequirements.memoryTypeBits)
+         && (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
           allocation = allocationCache->allocateFromCache(createInfo.size);
 
           if (likely(allocation))
@@ -924,25 +1032,37 @@ namespace dxvk {
 
   void DxvkMemoryAllocator::freeAllocation(
           DxvkResourceAllocation* allocation) {
-    std::unique_lock lock(m_mutex);
+    if (allocation->m_flags.test(DxvkAllocationFlag::Cacheable)) {
+      // Return cacheable allocations to the shared cache
+      allocation->destroyBufferViews();
 
-    if (likely(allocation->m_type)) {
-      allocation->m_type->stats.memoryUsed -= allocation->m_size;
+      if (allocation->m_type->sharedCache)
+        allocation = allocation->m_type->sharedCache->freeAllocation(allocation);
 
-      if (unlikely(allocation->m_flags.test(DxvkAllocationFlag::OwnsMemory))) {
-        // We free the actual allocation later, just update stats here.
-        allocation->m_type->stats.memoryAllocated -= allocation->m_size;
-      } else {
-        auto& pool = allocation->m_mapPtr
-          ? allocation->m_type->mappedPool
-          : allocation->m_type->devicePool;
+      // If we get a list of allocations back from the
+      // shared cache, free all of them in one go
+      freeCachedAllocations(allocation);
+    } else {
+      std::unique_lock lock(m_mutex);
 
-        if (unlikely(pool.free(allocation->m_address, allocation->m_size)))
-          freeEmptyChunksInPool(*allocation->m_type, pool, 0, high_resolution_clock::now());
+      if (likely(allocation->m_type)) {
+        allocation->m_type->stats.memoryUsed -= allocation->m_size;
+
+        if (unlikely(allocation->m_flags.test(DxvkAllocationFlag::OwnsMemory))) {
+          // We free the actual allocation later, just update stats here.
+          allocation->m_type->stats.memoryAllocated -= allocation->m_size;
+        } else {
+          auto& pool = allocation->m_mapPtr
+            ? allocation->m_type->mappedPool
+            : allocation->m_type->devicePool;
+
+          if (unlikely(pool.free(allocation->m_address, allocation->m_size)))
+            freeEmptyChunksInPool(*allocation->m_type, pool, 0, high_resolution_clock::now());
+        }
       }
-    }
 
-    m_allocationPool.free(allocation);
+      m_allocationPool.free(allocation);
+    }
   }
 
 
@@ -955,6 +1075,15 @@ namespace dxvk {
   }
 
 
+  void DxvkMemoryAllocator::freeCachedAllocations(
+          DxvkResourceAllocation* allocation) {
+    if (allocation) {
+      std::unique_lock lock(m_mutex);
+      freeCachedAllocationsLocked(allocation);
+    }
+  }
+
+
   void DxvkMemoryAllocator::freeCachedAllocationsLocked(
           DxvkResourceAllocation* allocation) {
     while (allocation) {
@@ -1105,42 +1234,58 @@ namespace dxvk {
 
 
   bool DxvkMemoryAllocator::refillAllocationCache(
-          DxvkLocalAllocationCache* cache,
-    const VkMemoryRequirements& requirements,
-          VkMemoryPropertyFlags properties) {
+          DxvkLocalAllocationCache*   cache,
+    const VkMemoryRequirements&       requirements,
+          VkMemoryPropertyFlags       properties) {
+    // Ensure that all cached allocations report a power-of-two size.
+    // The shared cache implementation currently relies on this.
     VkDeviceSize allocationSize = (VkDeviceSize(-1) >> bit::lzcnt(requirements.size - 1u)) + 1u;
     allocationSize = std::max(allocationSize, DxvkLocalAllocationCache::MinSize);
 
-    // TODO implement shared caches per memory pool
-
-    // No suitable allocations available from the shared cache, create some
-    // new ones so that subsequent allocations of this size category can be
-    // handled without locking the allocator.
+    // Maximum number of allocations when we miss in the shared cache
     uint32_t allocationCount = DxvkLocalAllocationCache::computePreferredAllocationCount(allocationSize);
 
-    DxvkResourceAllocation* head = nullptr;
-    DxvkResourceAllocation* tail = nullptr;
-
-    std::unique_lock lock(m_mutex);
-
     for (auto typeIndex : bit::BitMask(cache->m_memoryTypes)) {
-      auto& pool = (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        ? m_memTypes[typeIndex].mappedPool
-        : m_memTypes[typeIndex].devicePool;
+      auto& memoryType = m_memTypes[typeIndex];
+
+      // Initialize shared cache on demand only
+      if (unlikely(!memoryType.sharedCache)) {
+        std::unique_lock lock(m_mutex);
+
+        if (!memoryType.sharedCache)
+          memoryType.sharedCache = new DxvkSharedAllocationCache(this);
+      }
+
+      // Try to grab a list of allocations from the shared cache first. If
+      // this succeeds, allocating several pages of memory is near instant.
+      DxvkResourceAllocation* allocation = memoryType.sharedCache->getAllocationList(allocationSize);
+
+      if (likely(allocation)) {
+        allocation = cache->assignCache(allocationSize, allocation);
+        freeCachedAllocations(allocation);
+        return true;
+      }
+
+      // Fill cache with the preferred allocation count of this size category so
+      // that subsequent allocations can be handled without locking the allocator.
+      DxvkResourceAllocation* head = nullptr;
+      DxvkResourceAllocation* tail = nullptr;
+
+      std::unique_lock lock(m_mutex);
+      auto& memoryPool = memoryType.mappedPool;
 
       while (allocationCount) {
         // Try to suballocate from existing chunks, but do not create
         // any new chunks. Let the regular code path handle that case
         // as necessary.
-        int64_t address = pool.alloc(allocationSize, requirements.alignment);
+        int64_t address = memoryPool.alloc(allocationSize, requirements.alignment);
 
         if (address < 0)
           break;
 
         // Add allocation to the list and mark it as cacheable,
         // so it will get recycled as-is after use.
-        DxvkResourceAllocation* allocation = createAllocation(
-          m_memTypes[typeIndex], pool, address, allocationSize);
+        allocation = createAllocation(memoryType, memoryPool, address, allocationSize);
         allocation->m_flags.set(DxvkAllocationFlag::Cacheable);
 
         if (tail) {
@@ -1154,15 +1299,13 @@ namespace dxvk {
         allocationCount--;
       }
 
-      if (!allocationCount)
-        break;
+      if (tail) {
+        tail->m_next = cache->assignCache(allocationSize, head);
+        return true;
+      }
     }
 
-    if (!tail)
-      return false;
-
-    tail->m_next = cache->assignCache(allocationSize, head);
-    return true;
+    return false;
   }
 
 
@@ -1588,6 +1731,12 @@ namespace dxvk {
 
         heapStats[i] = stats;
       }
+
+      // Periodically clean up unused cached allocations
+      for (uint32_t i = 0; i < m_memTypeCount; i++) {
+        if (m_memTypes[i].sharedCache)
+          m_memTypes[i].sharedCache->cleanupUnusedFromLockedAllocator(currentTime);
+      }
     }
 
     // Ensure adapter allocation statistics are consistent
diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h
index 1ce68e278..acac2bf0e 100644
--- a/src/dxvk/dxvk_memory.h
+++ b/src/dxvk/dxvk_memory.h
@@ -13,6 +13,7 @@ namespace dxvk {
   class DxvkMemoryAllocator;
   class DxvkMemoryChunk;
   class DxvkSparsePageTable;
+  class DxvkSharedAllocationCache;
 
   /**
    * \brief Resource access flags
@@ -160,6 +161,8 @@ namespace dxvk {
 
     DxvkMemoryPool    devicePool;
     DxvkMemoryPool    mappedPool;
+
+    DxvkSharedAllocationCache* sharedCache = nullptr;
   };
 
 
@@ -584,6 +587,8 @@ namespace dxvk {
 
     DxvkResourceAllocation*     m_next = nullptr;
 
+    void destroyBufferViews();
+
     void free();
 
     static force_inline uint64_t getIncrement(DxvkAccess access) {
@@ -765,10 +770,12 @@ namespace dxvk {
    * context classes in order to reduce lock contention.
    */
   class DxvkLocalAllocationCache {
-    friend class DxvkMemoryAllocator;
+    constexpr static VkDeviceSize PoolCapacityInBytes = 4u * DxvkPageAllocator::PageSize;
+
+    friend DxvkMemoryAllocator;
   public:
-    constexpr static uint32_t PoolCount = 8u;
-    constexpr static uint32_t MinAllocationCountPerPool = 8u;
+    // Cache allocations up to 128 kiB
+    constexpr static uint32_t PoolCount = 10u;
 
     constexpr static VkDeviceSize MinSize = DxvkPoolAllocator::MinSize;
     constexpr static VkDeviceSize MaxSize = MinSize << (PoolCount - 1u);
@@ -816,6 +823,24 @@ namespace dxvk {
     static uint32_t computePreferredAllocationCount(
             VkDeviceSize                size);
 
+    /**
+     * \brief Computes pool index for a given allocation size
+     *
+     * \param [in] size Allocation size
+     * \returns Pool index
+     */
+    static uint32_t computePoolIndex(
+            VkDeviceSize                size);
+
+    /**
+     * \brief Computes allocation size for a given index
+     *
+     * \param [in] poolIndex Pool index
+     * \returns Allocation size for the pool
+     */
+    static VkDeviceSize computeAllocationSize(
+            uint32_t                    index);
+
   private:
 
     DxvkMemoryAllocator*  m_allocator   = nullptr;
@@ -832,8 +857,74 @@ namespace dxvk {
 
     void freeCache();
 
-    static uint32_t computePoolIndex(
-            VkDeviceSize                size);
+  };
+
+
+  /**
+   * \brief Shared allocation cache
+   *
+   * Accumulates small allocations in free lists
+   * that can be allocated in their entirety.
+   */
+  class DxvkSharedAllocationCache {
+    constexpr static uint32_t PoolCount = DxvkLocalAllocationCache::PoolCount;
+    constexpr static uint32_t PoolSize = env::is32BitHostPlatform() ? 6u : 12u;
+
+    friend DxvkMemoryAllocator;
+  public:
+
+    DxvkSharedAllocationCache(
+            DxvkMemoryAllocator*        allocator);
+
+    ~DxvkSharedAllocationCache();
+
+    /**
+     * \brief Retrieves list of cached allocations
+     *
+     * \param [in] allocationSize Required allocation size
+     * \returns Pointer to head of allocation list,
+     *    or \c nullptr if the cache is empty.
+     */
+    DxvkResourceAllocation* getAllocationList(
+            VkDeviceSize                allocationSize);
+
+    /**
+     * \brief Frees cacheable allocation
+     *
+     * \param [in] allocation Allocation to free
+     * \returns List to destroy if the cache is full. Usually,
+     *    \c nullptr if the allocation was successfully added.
+     */
+    DxvkResourceAllocation* freeAllocation(
+            DxvkResourceAllocation*     allocation);
+
+  private:
+
+    struct FreeList {
+      uint16_t size = 0u;
+      uint16_t capacity = 0u;
+
+      DxvkResourceAllocation* head = nullptr;
+    };
+
+    struct Pool {
+      uint32_t listCount = 0u;
+      std::array<DxvkResourceAllocation*, PoolSize> lists = { };
+      high_resolution_clock::time_point drainTime = { };
+    };
+
+    alignas(CACHE_LINE_SIZE)
+    DxvkMemoryAllocator*        m_allocator = nullptr;
+
+    dxvk::mutex                 m_freeMutex;
+    std::array<FreeList, PoolCount> m_freeLists = { };
+
+    alignas(CACHE_LINE_SIZE)
+    dxvk::mutex                 m_poolMutex;
+    std::array<Pool, PoolCount> m_pools = { };
+
+    void cleanupUnusedFromLockedAllocator(
+            high_resolution_clock::time_point time);
 
   };
 
@@ -848,6 +939,7 @@ namespace dxvk {
     friend DxvkMemory;
     friend DxvkResourceAllocation;
     friend DxvkLocalAllocationCache;
+    friend DxvkSharedAllocationCache;
 
     constexpr static uint64_t DedicatedChunkAddress = 1ull << 63u;
 
@@ -1045,6 +1137,9 @@ namespace dxvk {
     void freeLocalCache(
             DxvkLocalAllocationCache* cache);
 
+    void freeCachedAllocations(
+            DxvkResourceAllocation* allocation);
+
     void freeCachedAllocationsLocked(
             DxvkResourceAllocation* allocation);