1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-27 04:54:15 +01:00

[dxvk,d3d11] Refactor CS command data allocation

Allows us to allocate a (potentially growing) array of
arbitrary data structures for a CS command.
This commit is contained in:
Philip Rebohle 2025-02-21 13:48:03 +01:00 committed by Philip Rebohle
parent 20dc389ab7
commit fc3d3ae331
4 changed files with 169 additions and 96 deletions

View File

@ -10,23 +10,13 @@ namespace dxvk {
* Used to identify the type of command
* data most recently added to a CS chunk.
*/
enum class D3D11CmdType {
enum class D3D11CmdType : uint32_t {
None,
DrawIndirect,
DrawIndirectIndexed,
};
/**
* \brief Command data header
*
* Stores the command type. All command
* data structs must inherit this struct.
*/
struct D3D11CmdData {
D3D11CmdType type;
};
/**
* \brief Indirect draw command data
*
@ -34,10 +24,10 @@ namespace dxvk {
* the first draw, as well as the number of
* draws to execute.
*/
struct D3D11CmdDrawIndirectData : public D3D11CmdData {
struct D3D11CmdDrawIndirectData {
uint32_t offset;
uint32_t count;
uint32_t stride;
};
}
}

View File

@ -19,8 +19,7 @@ namespace dxvk {
m_flags (ContextFlags),
m_staging (Device, StagingBufferSize),
m_csFlags (CsFlags),
m_csChunk (AllocCsChunk()),
m_cmdData (nullptr) {
m_csChunk (AllocCsChunk()) {
// Create local allocation cache with the same properties
// that we will use for common dynamic buffer types
uint32_t cachedDynamic = pParent->GetOptions()->cachedDynamicResources;
@ -1125,28 +1124,28 @@ namespace dxvk {
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
// If possible, batch up multiple indirect draw calls of
// the same type into one single multiDrawIndirect call
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
auto stride = 0u;
// If possible, batch multiple indirect draw calls into one single multidraw call
if (m_csDataType == D3D11CmdType::DrawIndirectIndexed) {
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_csData->first());
auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand));
if (cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed)
stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand));
if (stride) {
cmdData->count += 1;
cmdData->stride = stride;
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
});
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
if (stride) {
cmdData->count += 1;
cmdData->stride = stride;
return;
}
}
// Need to start a new draw sequence
EmitCsCmd<D3D11CmdDrawIndirectData>(D3D11CmdType::DrawIndirectIndexed, 1u,
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) {
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
});
auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData();
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
}
@ -1163,28 +1162,28 @@ namespace dxvk {
if (unlikely(HasDirtyGraphicsBindings()))
ApplyDirtyGraphicsBindings();
// If possible, batch up multiple indirect draw calls of
// the same type into one single multiDrawIndirect call
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
auto stride = 0u;
// If possible, batch multiple indirect draw calls into one single multidraw call
if (m_csDataType == D3D11CmdType::DrawIndirect) {
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_csData->first());
auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand));
if (cmdData && cmdData->type == D3D11CmdType::DrawIndirect)
stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand));
if (stride) {
cmdData->count += 1;
cmdData->stride = stride;
} else {
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
ctx->drawIndirect(data->offset, data->count, data->stride, true);
});
cmdData->type = D3D11CmdType::DrawIndirect;
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
if (stride) {
cmdData->count += 1;
cmdData->stride = stride;
return;
}
}
// Need to start a new draw sequence
EmitCsCmd<D3D11CmdDrawIndirectData>(D3D11CmdType::DrawIndirect, 1u,
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) {
ctx->drawIndirect(data->offset, data->count, data->stride, true);
});
auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData();
cmdData->offset = AlignedByteOffsetForArgs;
cmdData->count = 1;
cmdData->stride = 0;
}

View File

@ -793,9 +793,11 @@ namespace dxvk {
DxvkStagingBuffer m_staging;
D3D11CmdType m_csDataType = D3D11CmdType::None;
DxvkCsChunkFlags m_csFlags;
DxvkCsChunkRef m_csChunk;
D3D11CmdData* m_cmdData;
DxvkCsDataBlock* m_csData = nullptr;
DxvkLocalAllocationCache m_allocationCache;
@ -1152,7 +1154,10 @@ namespace dxvk {
template<bool AllowFlush = true, typename Cmd>
void EmitCs(Cmd&& command) {
m_cmdData = nullptr;
if (unlikely(m_csDataType != D3D11CmdType::None)) {
m_csData = nullptr;
m_csDataType = D3D11CmdType::None;
}
if (unlikely(!m_csChunk->push(command))) {
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
@ -1165,12 +1170,12 @@ namespace dxvk {
}
}
template<typename M, bool AllowFlush = true, typename Cmd, typename... Args>
M* EmitCsCmd(Cmd&& command, Args&&... args) {
M* data = m_csChunk->pushCmd<M, Cmd, Args...>(
command, std::forward<Args>(args)...);
template<typename M, bool AllowFlush = true, typename Cmd>
void EmitCsCmd(D3D11CmdType type, size_t count, Cmd&& command) {
m_csDataType = type;
m_csData = m_csChunk->pushCmd<M, Cmd>(command, count);
if (unlikely(!data)) {
if (unlikely(!m_csData)) {
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk();
@ -1179,19 +1184,17 @@ namespace dxvk {
// We must record this command after the potential
// flush since the caller may still access the data
data = m_csChunk->pushCmd<M, Cmd, Args...>(
command, std::forward<Args>(args)...);
m_csData = m_csChunk->pushCmd<M, Cmd>(command, count);
}
m_cmdData = data;
return data;
}
void FlushCsChunk() {
if (likely(!m_csChunk->empty())) {
m_csData = nullptr;
m_csDataType = D3D11CmdType::None;
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk();
m_cmdData = nullptr;
}
}

View File

@ -11,7 +11,9 @@
#include "dxvk_context.h"
namespace dxvk {
constexpr static size_t DxvkCsChunkSize = 16384;
/**
* \brief Command stream operation
*
@ -86,6 +88,41 @@ namespace dxvk {
};
/**
* \brief Command data block
*
* Provides functionality to allocate a potentially growing
* array of structures for a command to traverse.
*/
class DxvkCsDataBlock {
friend class DxvkCsChunk;
public:
/**
* \brief Number of structures allocated
* \returns Number of structures allocated
*/
size_t count() const {
return m_structCount;
}
/**
* \brief Retrieves pointer to first structure
* \returns Untyped pointer to first structure
*/
void* first() {
return reinterpret_cast<char*>(this) + m_dataOffset;
}
private:
uint32_t m_dataOffset = 0u;
uint16_t m_structSize = 0u;
uint16_t m_structCount = 0u;
};
/**
* \brief Typed command with metadata
*
@ -98,26 +135,33 @@ namespace dxvk {
public:
template<typename... Args>
DxvkCsDataCmd(T&& cmd, Args&&... args)
: m_command (std::move(cmd)),
m_data (std::forward<Args>(args)...) { }
DxvkCsDataCmd(T&& cmd)
: m_command(std::move(cmd)) { }
~DxvkCsDataCmd() {
auto data = reinterpret_cast<M*>(m_data.first());
for (size_t i = 0; i < m_data.count(); i++)
data[i].~M();
}
DxvkCsDataCmd (DxvkCsDataCmd&&) = delete;
DxvkCsDataCmd& operator = (DxvkCsDataCmd&&) = delete;
void exec(DxvkContext* ctx) {
m_command(ctx, &m_data);
// No const here so that the function can move objects efficiently
m_command(ctx, reinterpret_cast<M*>(m_data.first()), m_data.count());
}
M* data() {
DxvkCsDataBlock* data() {
return &m_data;
}
private:
T m_command;
M m_data;
alignas(M)
T m_command;
DxvkCsDataBlock m_data;
};
@ -140,12 +184,12 @@ namespace dxvk {
* Stores a list of commands.
*/
class DxvkCsChunk : public RcObject {
constexpr static size_t MaxBlockSize = 16384;
public:
DxvkCsChunk();
~DxvkCsChunk();
/**
* \brief Checks whether the chunk is empty
* \returns \c true if the chunk is empty
@ -167,7 +211,7 @@ namespace dxvk {
template<typename T>
bool push(T& command) {
using FuncType = DxvkCsTypedCmd<T>;
void* ptr = alloc<FuncType>();
void* ptr = alloc<FuncType>(0u);
if (unlikely(!ptr))
return false;
@ -186,23 +230,60 @@ namespace dxvk {
* \brief Adds a command with data to the chunk
*
* \param [in] command The command to add
* \param [in] args Constructor args for the data object
* \param [in] count Number of items to allocate. Should be at least
* 1 in order to avoid the possibility of an empty command. Note
* that all allocated structures \e must be initialized before
* handing off the command to the worker thread.
* \returns Pointer to the data object, or \c nullptr
*/
template<typename M, typename T, typename... Args>
M* pushCmd(T& command, Args&&... args) {
template<typename M, typename T>
DxvkCsDataBlock* pushCmd(T& command, size_t count) {
size_t dataSize = count * sizeof(M);
// DxvkCsDataCmd is aligned to M
using FuncType = DxvkCsDataCmd<T, M>;
void* ptr = alloc<FuncType>();
void* ptr = alloc<FuncType>(dataSize);
if (unlikely(!ptr))
return nullptr;
auto next = new (ptr) FuncType(std::move(command), std::forward<Args>(args)...);
// Command data is always packed tightly after the function object
auto next = new (ptr) FuncType(std::move(command));
append(next);
return next->data();
// Do some cursed pointer math here so that the block can figure out
// where its data is stored based on its own address. This saves a
// decent amount of CS chunk memory compared to storing a pointer.
auto block = next->data();
block->m_dataOffset = reinterpret_cast<uintptr_t>(&m_data[m_commandOffset - dataSize])
- reinterpret_cast<uintptr_t>(block);
block->m_structSize = sizeof(M);
block->m_structCount = count;
return block;
}
/**
* \brief Allocates more storage for a data block
*
* The data bock \e must be owned by the last command added to
* the CS chunk, or this may override subsequent command data.
* \param [in] block Data block
* \param [in] count Number of structures to allocate
* \returns Pointer to first allocated structure, or \c nullptr
*/
void* pushData(DxvkCsDataBlock* block, uint32_t count) {
uint32_t dataSize = block->m_structSize * count;
if (unlikely(m_commandOffset + dataSize > DxvkCsChunkSize))
return nullptr;
void* ptr = &m_data[m_commandOffset];
m_commandOffset += dataSize;
block->m_structCount += count;
return ptr;
}
/**
* \brief Initializes chunk for recording
* \param [in] flags Chunk flags
@ -237,18 +318,18 @@ namespace dxvk {
DxvkCsChunkFlags m_flags;
alignas(64)
char m_data[MaxBlockSize];
char m_data[DxvkCsChunkSize];
template<typename T>
void* alloc() {
void* alloc(size_t extra) {
if (alignof(T) > alignof(DxvkCsCmd))
m_commandOffset = dxvk::align(m_commandOffset, alignof(T));
if (unlikely(m_commandOffset + sizeof(T) > MaxBlockSize))
if (unlikely(m_commandOffset + sizeof(T) + extra > DxvkCsChunkSize))
return nullptr;
void* result = &m_data[m_commandOffset];
m_commandOffset += sizeof(T);
m_commandOffset += sizeof(T) + extra;
return result;
}
@ -420,7 +501,7 @@ namespace dxvk {
* commands on a DXVK context.
*/
class DxvkCsThread {
public:
constexpr static uint64_t SynchronizeAll = ~0ull;
@ -515,5 +596,5 @@ namespace dxvk {
void threadFunc();
};
}