From c3dbb6429f0fef00695c175cee0080bdd99352ab Mon Sep 17 00:00:00 2001 From: Robin Kertels Date: Thu, 2 Jun 2022 03:22:22 +0200 Subject: [PATCH] [d3d9] Implement memory allocator for memory mapped files --- src/d3d9/d3d9_device.cpp | 27 ++-- src/d3d9/d3d9_device.h | 7 + src/d3d9/d3d9_mem.cpp | 292 +++++++++++++++++++++++++++++++++++++++ src/d3d9/d3d9_mem.h | 160 +++++++++++++++++++++ src/d3d9/meson.build | 3 +- 5 files changed, 475 insertions(+), 14 deletions(-) create mode 100644 src/d3d9/d3d9_mem.cpp create mode 100644 src/d3d9/d3d9_mem.h diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 6f7a3ddee..4b9954181 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -39,19 +39,20 @@ namespace dxvk { HWND hFocusWindow, DWORD BehaviorFlags, Rc dxvkDevice) - : m_parent ( pParent ) - , m_deviceType ( DeviceType ) - , m_window ( hFocusWindow ) - , m_behaviorFlags ( BehaviorFlags ) - , m_adapter ( pAdapter ) - , m_dxvkDevice ( dxvkDevice ) - , m_shaderModules ( new D3D9ShaderModuleSet ) - , m_stagingBuffer ( dxvkDevice, StagingBufferSize ) - , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() ) - , m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED ) - , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? true : false ) - , m_csThread ( dxvkDevice, dxvkDevice->createContext(DxvkContextType::Primary) ) - , m_csChunk ( AllocCsChunk() ) { + : m_parent ( pParent ) + , m_deviceType ( DeviceType ) + , m_window ( hFocusWindow ) + , m_behaviorFlags ( BehaviorFlags ) + , m_adapter ( pAdapter ) + , m_dxvkDevice ( dxvkDevice ) + , m_memoryAllocator ( ) + , m_shaderModules ( new D3D9ShaderModuleSet ) + , m_stagingBuffer ( dxvkDevice, StagingBufferSize ) + , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() ) + , m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED ) + , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? true : false ) + , m_csThread ( dxvkDevice, dxvkDevice->createContext(DxvkContextType::Primary) ) + , m_csChunk ( AllocCsChunk() ) { // If we can SWVP, then we use an extended constant set // as SWVP has many more slots available than HWVP. bool canSWVP = CanSWVP(); diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 8090f1b55..2d1e3dcc8 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -11,6 +11,7 @@ #include "d3d9_adapter.h" #include "d3d9_constant_buffer.h" #include "d3d9_constant_set.h" +#include "d3d9_mem.h" #include "d3d9_state.h" @@ -929,6 +930,10 @@ namespace dxvk { return m_samplerCount.load(); } + D3D9MemoryAllocator* GetAllocator() { + return &m_memoryAllocator; + } + private: DxvkCsChunkRef AllocCsChunk() { @@ -1148,6 +1153,8 @@ namespace dxvk { D3D9Adapter* m_adapter; Rc m_dxvkDevice; + D3D9MemoryAllocator m_memoryAllocator; + uint32_t m_frameLatency = DefaultFrameLatency; D3D9Initializer* m_initializer = nullptr; diff --git a/src/d3d9/d3d9_mem.cpp b/src/d3d9/d3d9_mem.cpp new file mode 100644 index 000000000..c288fd3ac --- /dev/null +++ b/src/d3d9/d3d9_mem.cpp @@ -0,0 +1,292 @@ +#include "d3d9_mem.h" +#include "../util/util_string.h" +#include "../util/util_math.h" +#include "../util/log/log.h" +#include "../util/util_likely.h" +#include + +#ifdef D3D9_ALLOW_UNMAPPING +#include +#endif + +namespace dxvk { + +#ifdef D3D9_ALLOW_UNMAPPING + D3D9MemoryAllocator::D3D9MemoryAllocator() { + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + m_allocationGranularity = sysInfo.dwAllocationGranularity; + } + + D3D9Memory D3D9MemoryAllocator::Alloc(uint32_t Size) { + std::lock_guard lock(m_mutex); + + uint32_t alignedSize = align(Size, CACHE_LINE_SIZE); + for (auto& chunk : m_chunks) { + D3D9Memory memory = chunk->Alloc(alignedSize); + if (memory) { + m_usedMemory += memory.GetSize(); + return memory; + } + } + + uint32_t chunkSize = std::max(D3D9ChunkSize, alignedSize); + m_allocatedMemory += chunkSize; + + D3D9MemoryChunk* chunk = new D3D9MemoryChunk(this, chunkSize); + std::unique_ptr uniqueChunk(chunk); + D3D9Memory memory = uniqueChunk->Alloc(alignedSize); + m_usedMemory += memory.GetSize(); + + m_chunks.push_back(std::move(uniqueChunk)); + return memory; + } + + void D3D9MemoryAllocator::FreeChunk(D3D9MemoryChunk *Chunk) { + std::lock_guard lock(m_mutex); + + m_allocatedMemory -= Chunk->Size(); + + m_chunks.erase(std::remove_if(m_chunks.begin(), m_chunks.end(), [&](auto& item) { + return item.get() == Chunk; + }), m_chunks.end()); + } + + void D3D9MemoryAllocator::NotifyMapped(uint32_t Size) { + m_mappedMemory += Size; + } + + void D3D9MemoryAllocator::NotifyUnmapped(uint32_t Size) { + m_mappedMemory -= Size; + } + + void D3D9MemoryAllocator::NotifyFreed(uint32_t Size) { + m_usedMemory -= Size; + } + + uint32_t D3D9MemoryAllocator::MappedMemory() { + return m_mappedMemory.load(); + } + + uint32_t D3D9MemoryAllocator::UsedMemory() { + return m_usedMemory.load(); + } + + uint32_t D3D9MemoryAllocator::AllocatedMemory() { + return m_allocatedMemory.load(); + } + + D3D9MemoryChunk::D3D9MemoryChunk(D3D9MemoryAllocator* Allocator, uint32_t Size) + : m_allocator(Allocator), m_size(Size), m_mappingGranularity(m_allocator->MemoryGranularity() * 16) { + m_mapping = CreateFileMappingA(INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE | SEC_COMMIT, 0, Size, nullptr); + m_freeRanges.push_back({ 0, Size }); + m_mappingRanges.resize(((Size + m_mappingGranularity - 1) / m_mappingGranularity)); + } + + D3D9MemoryChunk::~D3D9MemoryChunk() { + std::lock_guard lock(m_mutex); + + CloseHandle(m_mapping); + } + + void* D3D9MemoryChunk::Map(D3D9Memory* memory) { + std::lock_guard lock(m_mutex); + + uint32_t alignedOffset = alignDown(memory->GetOffset(), m_mappingGranularity); + uint32_t alignmentDelta = memory->GetOffset() - alignedOffset; + uint32_t alignedSize = memory->GetSize() + alignmentDelta; + if (alignedSize > m_mappingGranularity) { + // The allocation crosses the boundary of the internal mapping page it's a part of + // so we map it on it's own. + alignedOffset = alignDown(memory->GetOffset(), m_allocator->MemoryGranularity()); + alignmentDelta = memory->GetOffset() - alignedOffset; + alignedSize = memory->GetSize() + alignmentDelta; + + m_allocator->NotifyMapped(alignedSize); + uint8_t* basePtr = static_cast(MapViewOfFile(m_mapping, FILE_MAP_ALL_ACCESS, 0, alignedOffset, alignedSize)); + if (unlikely(basePtr == nullptr)) { + DWORD error = GetLastError(); + Logger::err(str::format("Mapping non-persisted file failed: ", error, ", Mapped memory: ", m_allocator->MappedMemory())); + return nullptr; + } + return basePtr + alignmentDelta; + } + + // For small allocations we map the entire mapping page to minimize the overhead from having the align the offset to 65k bytes. + // This should hopefully also reduce the amount of MapViewOfFile calls we do for tiny allocations. + auto& mappingRange = m_mappingRanges[memory->GetOffset() / m_mappingGranularity]; + if (unlikely(mappingRange.refCount == 0)) { + m_allocator->NotifyMapped(m_mappingGranularity); + mappingRange.ptr = static_cast(MapViewOfFile(m_mapping, FILE_MAP_ALL_ACCESS, 0, alignedOffset, m_mappingGranularity)); + if (unlikely(mappingRange.ptr == nullptr)) { + DWORD error = GetLastError(); + LPTSTR buffer = nullptr; + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, nullptr, error, MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), (LPTSTR)&buffer, 0, nullptr); + Logger::err(str::format("Mapping non-persisted file failed: ", error, ", Mapped memory: ", m_allocator->MappedMemory(), ", Msg: ", buffer)); + if (buffer) { + LocalFree(buffer); + } + } + } + mappingRange.refCount++; + uint8_t* basePtr = static_cast(mappingRange.ptr); + return basePtr + alignmentDelta; + } + + void D3D9MemoryChunk::Unmap(D3D9Memory* memory) { + std::lock_guard lock(m_mutex); + + uint32_t alignedOffset = alignDown(memory->GetOffset(), m_mappingGranularity); + uint32_t alignmentDelta = memory->GetOffset() - alignedOffset; + uint32_t alignedSize = memory->GetSize() + alignmentDelta; + if (alignedSize > m_mappingGranularity) { + // Single use mapping + alignedOffset = alignDown(memory->GetOffset(), m_allocator->MemoryGranularity()); + alignmentDelta = memory->GetOffset() - alignedOffset; + alignedSize = memory->GetSize() + alignmentDelta; + + uint8_t* basePtr = static_cast(memory->Ptr()) - alignmentDelta; + UnmapViewOfFile(basePtr); + m_allocator->NotifyUnmapped(alignedSize); + return; + } + auto& mappingRange = m_mappingRanges[memory->GetOffset() / m_mappingGranularity]; + mappingRange.refCount--; + if (unlikely(mappingRange.refCount == 0)) { + UnmapViewOfFile(mappingRange.ptr); + mappingRange.ptr = nullptr; + m_allocator->NotifyUnmapped(m_mappingGranularity); + } + } + + D3D9Memory D3D9MemoryChunk::Alloc(uint32_t Size) { + std::lock_guard lock(m_mutex); + + uint32_t offset = 0; + uint32_t size = 0; + + for (auto range = m_freeRanges.begin(); range != m_freeRanges.end(); range++) { + if (range->length >= Size) { + offset = range->offset; + size = Size; + range->offset += Size; + range->length -= Size; + if (range->length < (4 << 10)) { + size += range->length; + m_freeRanges.erase(range); + } + break; + } + } + + if (size != 0) + return D3D9Memory(this, offset, Size); + + return {}; + } + + void D3D9MemoryChunk::Free(D3D9Memory *Memory) { + std::lock_guard lock(m_mutex); + + uint32_t offset = Memory->GetOffset(); + uint32_t size = Memory->GetSize(); + + auto curr = m_freeRanges.begin(); + + // shamelessly stolen from dxvk_memory.cpp + while (curr != m_freeRanges.end()) { + if (curr->offset == offset + size) { + size += curr->length; + curr = m_freeRanges.erase(curr); + } else if (curr->offset + curr->length == offset) { + offset -= curr->length; + size += curr->length; + curr = m_freeRanges.erase(curr); + } else { + curr++; + } + } + + m_freeRanges.push_back({ offset, size }); + m_allocator->NotifyFreed(Memory->GetSize()); + } + + bool D3D9MemoryChunk::IsEmpty() { + std::lock_guard lock(m_mutex); + + return m_freeRanges.size() == 1 + && m_freeRanges[0].length == m_size; + } + + D3D9MemoryAllocator* D3D9MemoryChunk::Allocator() const { + return m_allocator; + } + + HANDLE D3D9MemoryChunk::FileHandle() const { + return m_mapping; + } + + + D3D9Memory::D3D9Memory(D3D9MemoryChunk* Chunk, size_t Offset, size_t Size) + : m_chunk(Chunk), m_offset(Offset), m_size(Size) {} + + D3D9Memory::D3D9Memory(D3D9Memory&& other) + : m_chunk(std::exchange(other.m_chunk, nullptr)), + m_ptr(std::exchange(other.m_ptr, nullptr)), + m_offset(std::exchange(other.m_offset, 0)), + m_size(std::exchange(other.m_size, 0)) {} + + D3D9Memory::~D3D9Memory() { + this->Free(); + } + + D3D9Memory& D3D9Memory::operator = (D3D9Memory&& other) { + this->Free(); + + m_chunk = std::exchange(other.m_chunk, nullptr); + m_ptr = std::exchange(other.m_ptr, nullptr); + m_offset = std::exchange(other.m_offset, 0); + m_size = std::exchange(other.m_size, 0); + return *this; + } + + void D3D9Memory::Free() { + if (unlikely(m_chunk == nullptr)) + return; + + if (m_ptr != nullptr) + Unmap(); + + m_chunk->Free(this); + if (m_chunk->IsEmpty()) { + D3D9MemoryAllocator* allocator = m_chunk->Allocator(); + allocator->FreeChunk(m_chunk); + } + m_chunk = nullptr; + } + + void D3D9Memory::Map() { + if (unlikely(m_ptr != nullptr)) + return; + + if (unlikely(m_chunk == nullptr)) + return; + + m_ptr = m_chunk->Map(this); + } + + void D3D9Memory::Unmap() { + if (unlikely(m_ptr == nullptr)) + return; + + m_chunk->Unmap(this); + m_ptr = nullptr; + } + + void* D3D9Memory::Ptr() { + return m_ptr; + } + +#endif + +} diff --git a/src/d3d9/d3d9_mem.h b/src/d3d9/d3d9_mem.h new file mode 100644 index 000000000..f2ffde0a6 --- /dev/null +++ b/src/d3d9/d3d9_mem.h @@ -0,0 +1,160 @@ + +#pragma once + +#include "../util/thread.h" + +#if defined(_WIN32) && !defined(_WIN64) + #define D3D9_ALLOW_UNMAPPING +#endif + +#ifdef D3D9_ALLOW_UNMAPPING + #define WIN32_LEAN_AND_MEAN + #include +#endif + +namespace dxvk { + + class D3D9MemoryAllocator; + class D3D9Memory; + +#ifdef D3D9_ALLOW_UNMAPPING + + class D3D9MemoryChunk; + + constexpr uint32_t D3D9ChunkSize = 64 << 20; + + struct D3D9MemoryRange { + uint32_t offset; + uint32_t length; + }; + + struct D3D9MappingRange { + uint32_t refCount = 0; + void* ptr = nullptr; + }; + + class D3D9MemoryChunk { + friend D3D9MemoryAllocator; + + public: + ~D3D9MemoryChunk(); + + D3D9MemoryChunk (const D3D9MemoryChunk&) = delete; + D3D9MemoryChunk& operator = (const D3D9MemoryChunk&) = delete; + + D3D9MemoryChunk (D3D9MemoryChunk&& other) = delete; + D3D9MemoryChunk& operator = (D3D9MemoryChunk&& other) = delete; + +#ifdef D3D9_MEM_MAP_CHUNKS + void IncMapCounter(); + void DecMapCounter(); + void* Ptr() const { return m_ptr; } +#endif + D3D9Memory Alloc(uint32_t Size); + void Free(D3D9Memory* Memory); + bool IsEmpty(); + uint32_t Size() const { return m_size; } + D3D9MemoryAllocator* Allocator() const; + HANDLE FileHandle() const; + void* Map(D3D9Memory* memory); + void Unmap(D3D9Memory* memory); + + private: + D3D9MemoryChunk(D3D9MemoryAllocator* Allocator, uint32_t Size); + + dxvk::mutex m_mutex; + D3D9MemoryAllocator* m_allocator; + HANDLE m_mapping; + uint32_t m_size; + uint32_t m_mappingGranularity; + std::vector m_freeRanges; + std::vector m_mappingRanges; + +#ifdef D3D9_MEM_MAP_CHUNKS + uint32_t m_mapCounter = 0; + void* m_ptr; +#endif + }; + + class D3D9Memory { + friend D3D9MemoryChunk; + + public: + D3D9Memory() {} + ~D3D9Memory(); + + D3D9Memory (const D3D9Memory&) = delete; + D3D9Memory& operator = (const D3D9Memory&) = delete; + + D3D9Memory (D3D9Memory&& other); + D3D9Memory& operator = (D3D9Memory&& other); + + operator bool() const { return m_chunk != nullptr; } + + void Map(); + void Unmap(); + void* Ptr(); + D3D9MemoryChunk* GetChunk() const { return m_chunk; } + size_t GetOffset() const { return m_offset; } + size_t GetSize() const { return m_size; } + + private: + D3D9Memory(D3D9MemoryChunk* Chunk, size_t Offset, size_t Size); + void Free(); + + D3D9MemoryChunk* m_chunk = nullptr; + void* m_ptr = nullptr; + size_t m_offset = 0; + size_t m_size = 0; + }; + + class D3D9MemoryAllocator { + friend D3D9MemoryChunk; + + public: + D3D9MemoryAllocator(); + ~D3D9MemoryAllocator() = default; + D3D9Memory Alloc(uint32_t Size); + void FreeChunk(D3D9MemoryChunk* Chunk); + void NotifyMapped(uint32_t Size); + void NotifyUnmapped(uint32_t Size); + void NotifyFreed(uint32_t Size); + uint32_t MappedMemory(); + uint32_t UsedMemory(); + uint32_t AllocatedMemory(); + uint32_t MemoryGranularity() { return m_allocationGranularity; } + + private: + dxvk::mutex m_mutex; + std::vector> m_chunks; + std::atomic m_mappedMemory = 0; + std::atomic m_allocatedMemory = 0; + std::atomic m_usedMemory = 0; + uint32_t m_allocationGranularity; + }; + +#else + class D3D9Memory { + public: + operator bool() const { return false; } + + void Map() {} + void Unmap() {} + void* Ptr() { return nullptr; } + + private: + void Free(); + }; + + class D3D9MemoryAllocator { + + public: + D3D9Memory Alloc(uint32_t Size) { return { }; } + uint32_t MappedMemory() { return 0; } + uint32_t UsedMemory() { return 0; } + uint32_t AllocatedMemory() { return 0; } + }; + +#endif + +} diff --git a/src/d3d9/meson.build b/src/d3d9/meson.build index 476525cc4..2641f58ee 100644 --- a/src/d3d9/meson.build +++ b/src/d3d9/meson.build @@ -40,7 +40,8 @@ d3d9_src = [ 'd3d9_swvp_emu.cpp', 'd3d9_format_helpers.cpp', 'd3d9_hud.cpp', - 'd3d9_annotation.cpp' + 'd3d9_annotation.cpp', + 'd3d9_mem.cpp' ] d3d9_dll = shared_library('d3d9'+dll_ext, d3d9_src, glsl_generator.process(d3d9_shaders), d3d9_res,