From fc3d3ae331c0a0e4a094fc716fa82c616cc8ba7d Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Fri, 21 Feb 2025 13:48:03 +0100 Subject: [PATCH] [dxvk,d3d11] Refactor CS command data allocation Allows us to allocate a (potentially growing) array of arbitrary data structures for a CS command. --- src/d3d11/d3d11_cmd.h | 18 ++--- src/d3d11/d3d11_context.cpp | 83 +++++++++++----------- src/d3d11/d3d11_context.h | 29 ++++---- src/dxvk/dxvk_cs.h | 135 ++++++++++++++++++++++++++++-------- 4 files changed, 169 insertions(+), 96 deletions(-) diff --git a/src/d3d11/d3d11_cmd.h b/src/d3d11/d3d11_cmd.h index 52f2f4dbc..635f63b72 100644 --- a/src/d3d11/d3d11_cmd.h +++ b/src/d3d11/d3d11_cmd.h @@ -10,23 +10,13 @@ namespace dxvk { * Used to identify the type of command * data most recently added to a CS chunk. */ - enum class D3D11CmdType { + enum class D3D11CmdType : uint32_t { + None, DrawIndirect, DrawIndirectIndexed, }; - /** - * \brief Command data header - * - * Stores the command type. All command - * data structs must inherit this struct. - */ - struct D3D11CmdData { - D3D11CmdType type; - }; - - /** * \brief Indirect draw command data * @@ -34,10 +24,10 @@ namespace dxvk { * the first draw, as well as the number of * draws to execute. */ - struct D3D11CmdDrawIndirectData : public D3D11CmdData { + struct D3D11CmdDrawIndirectData { uint32_t offset; uint32_t count; uint32_t stride; }; -} \ No newline at end of file +} diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index ae2edcc49..c99c40178 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -19,8 +19,7 @@ namespace dxvk { m_flags (ContextFlags), m_staging (Device, StagingBufferSize), m_csFlags (CsFlags), - m_csChunk (AllocCsChunk()), - m_cmdData (nullptr) { + m_csChunk (AllocCsChunk()) { // Create local allocation cache with the same properties // that we will use for common dynamic buffer types uint32_t cachedDynamic = pParent->GetOptions()->cachedDynamicResources; @@ -1125,28 +1124,28 @@ namespace dxvk { if (unlikely(HasDirtyGraphicsBindings())) ApplyDirtyGraphicsBindings(); - // If possible, batch up multiple indirect draw calls of - // the same type into one single multiDrawIndirect call - auto cmdData = static_cast(m_cmdData); - auto stride = 0u; + // If possible, batch multiple indirect draw calls into one single multidraw call + if (m_csDataType == D3D11CmdType::DrawIndirectIndexed) { + auto cmdData = static_cast(m_csData->first()); + auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand)); - if (cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed) - stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand)); - - if (stride) { - cmdData->count += 1; - cmdData->stride = stride; - } else { - cmdData = EmitCsCmd( - [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { - ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true); - }); - - cmdData->type = D3D11CmdType::DrawIndirectIndexed; - cmdData->offset = AlignedByteOffsetForArgs; - cmdData->count = 1; - cmdData->stride = 0; + if (stride) { + cmdData->count += 1; + cmdData->stride = stride; + return; + } } + + // Need to start a new draw sequence + EmitCsCmd(D3D11CmdType::DrawIndirectIndexed, 1u, + [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) { + ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true); + }); + + auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData(); + cmdData->offset = AlignedByteOffsetForArgs; + cmdData->count = 1; + cmdData->stride = 0; } @@ -1163,28 +1162,28 @@ namespace dxvk { if (unlikely(HasDirtyGraphicsBindings())) ApplyDirtyGraphicsBindings(); - // If possible, batch up multiple indirect draw calls of - // the same type into one single multiDrawIndirect call - auto cmdData = static_cast(m_cmdData); - auto stride = 0u; + // If possible, batch multiple indirect draw calls into one single multidraw call + if (m_csDataType == D3D11CmdType::DrawIndirect) { + auto cmdData = static_cast(m_csData->first()); + auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand)); - if (cmdData && cmdData->type == D3D11CmdType::DrawIndirect) - stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand)); - - if (stride) { - cmdData->count += 1; - cmdData->stride = stride; - } else { - cmdData = EmitCsCmd( - [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { - ctx->drawIndirect(data->offset, data->count, data->stride, true); - }); - - cmdData->type = D3D11CmdType::DrawIndirect; - cmdData->offset = AlignedByteOffsetForArgs; - cmdData->count = 1; - cmdData->stride = 0; + if (stride) { + cmdData->count += 1; + cmdData->stride = stride; + return; + } } + + // Need to start a new draw sequence + EmitCsCmd(D3D11CmdType::DrawIndirect, 1u, + [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) { + ctx->drawIndirect(data->offset, data->count, data->stride, true); + }); + + auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData(); + cmdData->offset = AlignedByteOffsetForArgs; + cmdData->count = 1; + cmdData->stride = 0; } diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index d4b06e2d8..02dd816ab 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -793,9 +793,11 @@ namespace dxvk { DxvkStagingBuffer m_staging; + D3D11CmdType m_csDataType = D3D11CmdType::None; + DxvkCsChunkFlags m_csFlags; DxvkCsChunkRef m_csChunk; - D3D11CmdData* m_cmdData; + DxvkCsDataBlock* m_csData = nullptr; DxvkLocalAllocationCache m_allocationCache; @@ -1152,7 +1154,10 @@ namespace dxvk { template void EmitCs(Cmd&& command) { - m_cmdData = nullptr; + if (unlikely(m_csDataType != D3D11CmdType::None)) { + m_csData = nullptr; + m_csDataType = D3D11CmdType::None; + } if (unlikely(!m_csChunk->push(command))) { GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); @@ -1165,12 +1170,12 @@ namespace dxvk { } } - template - M* EmitCsCmd(Cmd&& command, Args&&... args) { - M* data = m_csChunk->pushCmd( - command, std::forward(args)...); + template + void EmitCsCmd(D3D11CmdType type, size_t count, Cmd&& command) { + m_csDataType = type; + m_csData = m_csChunk->pushCmd(command, count); - if (unlikely(!data)) { + if (unlikely(!m_csData)) { GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); m_csChunk = AllocCsChunk(); @@ -1179,19 +1184,17 @@ namespace dxvk { // We must record this command after the potential // flush since the caller may still access the data - data = m_csChunk->pushCmd( - command, std::forward(args)...); + m_csData = m_csChunk->pushCmd(command, count); } - - m_cmdData = data; - return data; } void FlushCsChunk() { if (likely(!m_csChunk->empty())) { + m_csData = nullptr; + m_csDataType = D3D11CmdType::None; + GetTypedContext()->EmitCsChunk(std::move(m_csChunk)); m_csChunk = AllocCsChunk(); - m_cmdData = nullptr; } } diff --git a/src/dxvk/dxvk_cs.h b/src/dxvk/dxvk_cs.h index 48f12b623..1a102e1d0 100644 --- a/src/dxvk/dxvk_cs.h +++ b/src/dxvk/dxvk_cs.h @@ -11,7 +11,9 @@ #include "dxvk_context.h" namespace dxvk { - + + constexpr static size_t DxvkCsChunkSize = 16384; + /** * \brief Command stream operation * @@ -86,6 +88,41 @@ namespace dxvk { }; + /** + * \brief Command data block + * + * Provides functionality to allocate a potentially growing + * array of structures for a command to traverse. + */ + class DxvkCsDataBlock { + friend class DxvkCsChunk; + public: + + /** + * \brief Number of structures allocated + * \returns Number of structures allocated + */ + size_t count() const { + return m_structCount; + } + + /** + * \brief Retrieves pointer to first structure + * \returns Untyped pointer to first structure + */ + void* first() { + return reinterpret_cast(this) + m_dataOffset; + } + + private: + + uint32_t m_dataOffset = 0u; + uint16_t m_structSize = 0u; + uint16_t m_structCount = 0u; + + }; + + /** * \brief Typed command with metadata * @@ -98,26 +135,33 @@ namespace dxvk { public: - template - DxvkCsDataCmd(T&& cmd, Args&&... args) - : m_command (std::move(cmd)), - m_data (std::forward(args)...) { } - + DxvkCsDataCmd(T&& cmd) + : m_command(std::move(cmd)) { } + + ~DxvkCsDataCmd() { + auto data = reinterpret_cast(m_data.first()); + + for (size_t i = 0; i < m_data.count(); i++) + data[i].~M(); + } + DxvkCsDataCmd (DxvkCsDataCmd&&) = delete; DxvkCsDataCmd& operator = (DxvkCsDataCmd&&) = delete; void exec(DxvkContext* ctx) { - m_command(ctx, &m_data); + // No const here so that the function can move objects efficiently + m_command(ctx, reinterpret_cast(m_data.first()), m_data.count()); } - M* data() { + DxvkCsDataBlock* data() { return &m_data; } private: - T m_command; - M m_data; + alignas(M) + T m_command; + DxvkCsDataBlock m_data; }; @@ -140,12 +184,12 @@ namespace dxvk { * Stores a list of commands. */ class DxvkCsChunk : public RcObject { - constexpr static size_t MaxBlockSize = 16384; + public: - + DxvkCsChunk(); ~DxvkCsChunk(); - + /** * \brief Checks whether the chunk is empty * \returns \c true if the chunk is empty @@ -167,7 +211,7 @@ namespace dxvk { template bool push(T& command) { using FuncType = DxvkCsTypedCmd; - void* ptr = alloc(); + void* ptr = alloc(0u); if (unlikely(!ptr)) return false; @@ -186,23 +230,60 @@ namespace dxvk { * \brief Adds a command with data to the chunk * * \param [in] command The command to add - * \param [in] args Constructor args for the data object + * \param [in] count Number of items to allocate. Should be at least + * 1 in order to avoid the possibility of an empty command. Note + * that all allocated structures \e must be initialized before + * handing off the command to the worker thread. * \returns Pointer to the data object, or \c nullptr */ - template - M* pushCmd(T& command, Args&&... args) { + template + DxvkCsDataBlock* pushCmd(T& command, size_t count) { + size_t dataSize = count * sizeof(M); + + // DxvkCsDataCmd is aligned to M using FuncType = DxvkCsDataCmd; - void* ptr = alloc(); + void* ptr = alloc(dataSize); if (unlikely(!ptr)) return nullptr; - auto next = new (ptr) FuncType(std::move(command), std::forward(args)...); + // Command data is always packed tightly after the function object + auto next = new (ptr) FuncType(std::move(command)); append(next); - return next->data(); + // Do some cursed pointer math here so that the block can figure out + // where its data is stored based on its own address. This saves a + // decent amount of CS chunk memory compared to storing a pointer. + auto block = next->data(); + block->m_dataOffset = reinterpret_cast(&m_data[m_commandOffset - dataSize]) + - reinterpret_cast(block); + block->m_structSize = sizeof(M); + block->m_structCount = count; + return block; } - + + /** + * \brief Allocates more storage for a data block + * + * The data bock \e must be owned by the last command added to + * the CS chunk, or this may override subsequent command data. + * \param [in] block Data block + * \param [in] count Number of structures to allocate + * \returns Pointer to first allocated structure, or \c nullptr + */ + void* pushData(DxvkCsDataBlock* block, uint32_t count) { + uint32_t dataSize = block->m_structSize * count; + + if (unlikely(m_commandOffset + dataSize > DxvkCsChunkSize)) + return nullptr; + + void* ptr = &m_data[m_commandOffset]; + m_commandOffset += dataSize; + + block->m_structCount += count; + return ptr; + } + /** * \brief Initializes chunk for recording * \param [in] flags Chunk flags @@ -237,18 +318,18 @@ namespace dxvk { DxvkCsChunkFlags m_flags; alignas(64) - char m_data[MaxBlockSize]; + char m_data[DxvkCsChunkSize]; template - void* alloc() { + void* alloc(size_t extra) { if (alignof(T) > alignof(DxvkCsCmd)) m_commandOffset = dxvk::align(m_commandOffset, alignof(T)); - if (unlikely(m_commandOffset + sizeof(T) > MaxBlockSize)) + if (unlikely(m_commandOffset + sizeof(T) + extra > DxvkCsChunkSize)) return nullptr; void* result = &m_data[m_commandOffset]; - m_commandOffset += sizeof(T); + m_commandOffset += sizeof(T) + extra; return result; } @@ -420,7 +501,7 @@ namespace dxvk { * commands on a DXVK context. */ class DxvkCsThread { - + public: constexpr static uint64_t SynchronizeAll = ~0ull; @@ -515,5 +596,5 @@ namespace dxvk { void threadFunc(); }; - + }