1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-20 19:54:19 +01:00

[dxvk] Replace allocation priority with allocation flags

This commit is contained in:
Philip Rebohle 2022-01-12 16:22:05 +01:00
parent d34bbdb58e
commit 15ab07ab94
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
4 changed files with 115 additions and 54 deletions

View File

@ -102,11 +102,15 @@ namespace dxvk {
bool isGpuWritable = (m_info.access & (
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT)) != 0;
float priority = isGpuWritable ? 1.0f : 0.5f;
DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable);
if (isGpuWritable)
hints.set(DxvkMemoryFlag::GpuWritable);
// Ask driver whether we should be using a dedicated allocation
handle.memory = m_memAlloc->alloc(&memReq.memoryRequirements,
dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, priority);
dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, hints);
if (vkd->vkBindBufferMemory(vkd->device(), handle.buffer,
handle.memory.memory(), handle.memory.offset()) != VK_SUCCESS)

View File

@ -86,7 +86,7 @@ namespace dxvk {
m_vkd->vkGetImageMemoryRequirements2(
m_vkd->device(), &memReqInfo, &memReq);
if (info.tiling != VK_IMAGE_TILING_LINEAR && !dedicatedRequirements.prefersDedicatedAllocation) {
memReq.memoryRequirements.size = align(memReq.memoryRequirements.size, memAlloc.bufferImageGranularity());
memReq.memoryRequirements.alignment = align(memReq.memoryRequirements.alignment , memAlloc.bufferImageGranularity());
@ -100,11 +100,14 @@ namespace dxvk {
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) != 0;
float priority = isGpuWritable ? 1.0f : 0.5f;
DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable);
if (isGpuWritable)
hints.set(DxvkMemoryFlag::GpuWritable);
// Ask driver whether we should be using a dedicated allocation
m_image.memory = memAlloc.alloc(&memReq.memoryRequirements,
dedicatedRequirements, dedMemoryAllocInfo, memFlags, priority);
dedicatedRequirements, dedMemoryAllocInfo, memFlags, hints);
// Try to bind the allocated memory slice to the image
if (m_vkd->vkBindImageMemory(m_vkd->device(), m_image.image,

View File

@ -60,8 +60,9 @@ namespace dxvk {
DxvkMemoryChunk::DxvkMemoryChunk(
DxvkMemoryAllocator* alloc,
DxvkMemoryType* type,
DxvkDeviceMemory memory)
: m_alloc(alloc), m_type(type), m_memory(memory) {
DxvkDeviceMemory memory,
DxvkMemoryFlags hints)
: m_alloc(alloc), m_type(type), m_memory(memory), m_hints(hints) {
// Mark the entire chunk as free
m_freeList.push_back(FreeSlice { 0, memory.memSize });
}
@ -78,11 +79,10 @@ namespace dxvk {
VkMemoryPropertyFlags flags,
VkDeviceSize size,
VkDeviceSize align,
float priority) {
DxvkMemoryFlags hints) {
// Property flags must be compatible. This could
// be refined a bit in the future if necessary.
if (m_memory.memFlags != flags
|| m_memory.priority != priority)
if (m_memory.memFlags != flags || !checkHints(hints))
return DxvkMemory();
// If the chunk is full, return
@ -160,6 +160,19 @@ namespace dxvk {
}
bool DxvkMemoryChunk::checkHints(DxvkMemoryFlags hints) const {
DxvkMemoryFlags mask(
DxvkMemoryFlag::Small,
DxvkMemoryFlag::GpuReadable,
DxvkMemoryFlag::GpuWritable);
if (hints.test(DxvkMemoryFlag::IgnoreConstraints))
mask = DxvkMemoryFlags();
return (m_hints & mask) == (hints & mask);
}
DxvkMemoryAllocator::DxvkMemoryAllocator(const DxvkDevice* device)
: m_vkd (device->vkd()),
m_device (device),
@ -182,7 +195,6 @@ namespace dxvk {
m_memTypes[i].heapId = m_memProps.memoryTypes[i].heapIndex;
m_memTypes[i].memType = m_memProps.memoryTypes[i];
m_memTypes[i].memTypeId = i;
m_memTypes[i].chunkSize = pickChunkSize(i);
}
/* Work around an issue on Nvidia drivers where using the entire
@ -197,10 +209,8 @@ namespace dxvk {
for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) {
VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
if ((m_memTypes[i].memType.propertyFlags & flags) == flags) {
if ((m_memTypes[i].memType.propertyFlags & flags) == flags)
m_memTypes[i].heap->budget = 32 << 20;
m_memTypes[i].chunkSize = 1 << 20;
}
}
}
}
@ -217,19 +227,38 @@ namespace dxvk {
const VkMemoryDedicatedRequirements& dedAllocReq,
const VkMemoryDedicatedAllocateInfo& dedAllocInfo,
VkMemoryPropertyFlags flags,
float priority) {
DxvkMemoryFlags hints) {
std::lock_guard<dxvk::mutex> lock(m_mutex);
// Keep small allocations together to avoid fragmenting
// chunks for larger resources with lots of small gaps,
// as well as resources with potentially weird lifetimes
if (req->size <= SmallAllocationThreshold) {
hints.set(DxvkMemoryFlag::Small);
hints.clr(DxvkMemoryFlag::GpuWritable, DxvkMemoryFlag::GpuReadable);
}
// Ignore all hints for host-visible allocations since they
// usually don't make much sense for those resources
if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
hints = DxvkMemoryFlags();
// Try to allocate from a memory type which supports the given flags exactly
auto dedAllocPtr = dedAllocReq.prefersDedicatedAllocation ? &dedAllocInfo : nullptr;
DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, priority);
DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, hints);
// If the first attempt failed, try ignoring the dedicated allocation
if (!result && dedAllocPtr && !dedAllocReq.requiresDedicatedAllocation) {
result = this->tryAlloc(req, nullptr, flags, priority);
result = this->tryAlloc(req, nullptr, flags, hints);
dedAllocPtr = nullptr;
}
// Retry without the hint constraints
if (!result) {
hints.set(DxvkMemoryFlag::IgnoreConstraints);
result = this->tryAlloc(req, nullptr, flags, hints);
}
// If that still didn't work, probe slower memory types as well
VkMemoryPropertyFlags optFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
@ -239,7 +268,7 @@ namespace dxvk {
remFlags |= optFlags & -optFlags;
optFlags &= ~remFlags;
result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, priority);
result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, hints);
}
if (!result) {
@ -276,7 +305,7 @@ namespace dxvk {
const VkMemoryRequirements* req,
const VkMemoryDedicatedAllocateInfo* dedAllocInfo,
VkMemoryPropertyFlags flags,
float priority) {
DxvkMemoryFlags hints) {
DxvkMemory result;
for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) {
@ -285,7 +314,7 @@ namespace dxvk {
if (supported && adequate) {
result = this->tryAllocFromType(&m_memTypes[i],
flags, req->size, req->alignment, priority, dedAllocInfo);
flags, req->size, req->alignment, hints, dedAllocInfo);
}
}
@ -298,35 +327,31 @@ namespace dxvk {
VkMemoryPropertyFlags flags,
VkDeviceSize size,
VkDeviceSize align,
float priority,
DxvkMemoryFlags hints,
const VkMemoryDedicatedAllocateInfo* dedAllocInfo) {
// Prevent unnecessary external host memory fragmentation
bool isDeviceLocal = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0;
if (!isDeviceLocal)
priority = 0.0f;
VkDeviceSize chunkSize = pickChunkSize(type->memTypeId, hints);
DxvkMemory memory;
if (size >= type->chunkSize || dedAllocInfo) {
if (size >= chunkSize || dedAllocInfo) {
DxvkDeviceMemory devMem = this->tryAllocDeviceMemory(
type, flags, size, priority, dedAllocInfo);
type, flags, size, hints, dedAllocInfo);
if (devMem.memHandle != VK_NULL_HANDLE)
memory = DxvkMemory(this, nullptr, type, devMem.memHandle, 0, size, devMem.memPointer);
} else {
for (uint32_t i = 0; i < type->chunks.size() && !memory; i++)
memory = type->chunks[i]->alloc(flags, size, align, priority);
memory = type->chunks[i]->alloc(flags, size, align, hints);
if (!memory) {
DxvkDeviceMemory devMem;
for (uint32_t i = 0; i < 6 && (type->chunkSize >> i) >= size && !devMem.memHandle; i++)
devMem = tryAllocDeviceMemory(type, flags, type->chunkSize >> i, priority, nullptr);
for (uint32_t i = 0; i < 6 && (chunkSize >> i) >= size && !devMem.memHandle; i++)
devMem = tryAllocDeviceMemory(type, flags, chunkSize >> i, hints, nullptr);
if (devMem.memHandle) {
Rc<DxvkMemoryChunk> chunk = new DxvkMemoryChunk(this, type, devMem);
memory = chunk->alloc(flags, size, align, priority);
Rc<DxvkMemoryChunk> chunk = new DxvkMemoryChunk(this, type, devMem, hints);
memory = chunk->alloc(flags, size, align, hints);
type->chunks.push_back(std::move(chunk));
}
@ -344,7 +369,7 @@ namespace dxvk {
DxvkMemoryType* type,
VkMemoryPropertyFlags flags,
VkDeviceSize size,
float priority,
DxvkMemoryFlags hints,
const VkMemoryDedicatedAllocateInfo* dedAllocInfo) {
bool useMemoryPriority = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
&& (m_device->features().extMemoryPriority.memoryPriority);
@ -352,6 +377,13 @@ namespace dxvk {
if (type->heap->budget && type->heap->stats.memoryAllocated + size > type->heap->budget)
return DxvkDeviceMemory();
float priority = 0.0f;
if (hints.test(DxvkMemoryFlag::GpuReadable))
priority = 0.5f;
if (hints.test(DxvkMemoryFlag::GpuWritable))
priority = 1.0f;
DxvkDeviceMemory result;
result.memSize = size;
result.memFlags = flags;
@ -433,19 +465,20 @@ namespace dxvk {
}
VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId) const {
VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, DxvkMemoryFlags hints) const {
VkMemoryType type = m_memProps.memoryTypes[memTypeId];
VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex];
// Default to a chunk size of 128 MiB
VkDeviceSize chunkSize = 128 << 20;
// Try to waste a bit less system memory in 32-bit
// applications due to address space constraints
if (env::is32BitHostPlatform()) {
if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
chunkSize = 32 << 20;
}
if (hints.test(DxvkMemoryFlag::Small))
chunkSize = 16 << 20;
// Try to waste a bit less system memory especially in
// 32-bit applications due to address space constraints
if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
chunkSize = 16 << 20;
// Reduce the chunk size on small heaps so
// we can at least fit in 15 allocations

View File

@ -62,8 +62,6 @@ namespace dxvk {
VkMemoryType memType;
uint32_t memTypeId;
VkDeviceSize chunkSize;
std::vector<Rc<DxvkMemoryChunk>> chunks;
};
@ -155,6 +153,22 @@ namespace dxvk {
void free();
};
/**
* \brief Memory allocation flags
*
* Used to batch similar allocations into the same
* set of chunks, which may help with fragmentation.
*/
enum class DxvkMemoryFlag : uint32_t {
Small = 0, ///< Small allocation
GpuReadable = 1, ///< Medium-priority resource
GpuWritable = 2, ///< High-priority resource
IgnoreConstraints = 3, ///< Ignore most allocation flags
};
using DxvkMemoryFlags = Flags<DxvkMemoryFlag>;
/**
@ -170,7 +184,8 @@ namespace dxvk {
DxvkMemoryChunk(
DxvkMemoryAllocator* alloc,
DxvkMemoryType* type,
DxvkDeviceMemory memory);
DxvkDeviceMemory memory,
DxvkMemoryFlags m_hints);
~DxvkMemoryChunk();
@ -179,17 +194,17 @@ namespace dxvk {
*
* On failure, this returns a slice with
* \c VK_NULL_HANDLE as the memory handle.
* \param [in] flags Requested memory flags
* \param [in] flags Requested memory type flags
* \param [in] size Number of bytes to allocate
* \param [in] align Required alignment
* \param [in] priority Requested priority
* \param [in] hints Memory category
* \returns The allocated memory slice
*/
DxvkMemory alloc(
VkMemoryPropertyFlags flags,
VkDeviceSize size,
VkDeviceSize align,
float priority);
DxvkMemoryFlags hints);
/**
* \brief Frees memory
@ -220,8 +235,11 @@ namespace dxvk {
DxvkMemoryAllocator* m_alloc;
DxvkMemoryType* m_type;
DxvkDeviceMemory m_memory;
DxvkMemoryFlags m_hints;
std::vector<FreeSlice> m_freeList;
bool checkHints(DxvkMemoryFlags hints) const;
};
@ -235,6 +253,8 @@ namespace dxvk {
class DxvkMemoryAllocator {
friend class DxvkMemory;
friend class DxvkMemoryChunk;
constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10;
public:
DxvkMemoryAllocator(const DxvkDevice* device);
@ -259,7 +279,7 @@ namespace dxvk {
* \param [in] dedAllocReq Dedicated allocation requirements
* \param [in] dedAllocInfo Dedicated allocation info
* \param [in] flags Memory type flags
* \param [in] priority Device-local memory priority
* \param [in] hints Memory hints
* \returns Allocated memory slice
*/
DxvkMemory alloc(
@ -267,7 +287,7 @@ namespace dxvk {
const VkMemoryDedicatedRequirements& dedAllocReq,
const VkMemoryDedicatedAllocateInfo& dedAllocInfo,
VkMemoryPropertyFlags flags,
float priority);
DxvkMemoryFlags hints);
/**
* \brief Queries memory stats
@ -296,21 +316,21 @@ namespace dxvk {
const VkMemoryRequirements* req,
const VkMemoryDedicatedAllocateInfo* dedAllocInfo,
VkMemoryPropertyFlags flags,
float priority);
DxvkMemoryFlags hints);
DxvkMemory tryAllocFromType(
DxvkMemoryType* type,
VkMemoryPropertyFlags flags,
VkDeviceSize size,
VkDeviceSize align,
float priority,
DxvkMemoryFlags hints,
const VkMemoryDedicatedAllocateInfo* dedAllocInfo);
DxvkDeviceMemory tryAllocDeviceMemory(
DxvkMemoryType* type,
VkMemoryPropertyFlags flags,
VkDeviceSize size,
float priority,
DxvkMemoryFlags hints,
const VkMemoryDedicatedAllocateInfo* dedAllocInfo);
void free(
@ -327,7 +347,8 @@ namespace dxvk {
DxvkDeviceMemory memory);
VkDeviceSize pickChunkSize(
uint32_t memTypeId) const;
uint32_t memTypeId,
DxvkMemoryFlags hints) const;
};