mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-27 04:54:15 +01:00
[dxvk,d3d11] Refactor CS command data allocation
Allows us to allocate a (potentially growing) array of arbitrary data structures for a CS command.
This commit is contained in:
parent
20dc389ab7
commit
fc3d3ae331
@ -10,23 +10,13 @@ namespace dxvk {
|
||||
* Used to identify the type of command
|
||||
* data most recently added to a CS chunk.
|
||||
*/
|
||||
enum class D3D11CmdType {
|
||||
enum class D3D11CmdType : uint32_t {
|
||||
None,
|
||||
DrawIndirect,
|
||||
DrawIndirectIndexed,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Command data header
|
||||
*
|
||||
* Stores the command type. All command
|
||||
* data structs must inherit this struct.
|
||||
*/
|
||||
struct D3D11CmdData {
|
||||
D3D11CmdType type;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Indirect draw command data
|
||||
*
|
||||
@ -34,10 +24,10 @@ namespace dxvk {
|
||||
* the first draw, as well as the number of
|
||||
* draws to execute.
|
||||
*/
|
||||
struct D3D11CmdDrawIndirectData : public D3D11CmdData {
|
||||
struct D3D11CmdDrawIndirectData {
|
||||
uint32_t offset;
|
||||
uint32_t count;
|
||||
uint32_t stride;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -19,8 +19,7 @@ namespace dxvk {
|
||||
m_flags (ContextFlags),
|
||||
m_staging (Device, StagingBufferSize),
|
||||
m_csFlags (CsFlags),
|
||||
m_csChunk (AllocCsChunk()),
|
||||
m_cmdData (nullptr) {
|
||||
m_csChunk (AllocCsChunk()) {
|
||||
// Create local allocation cache with the same properties
|
||||
// that we will use for common dynamic buffer types
|
||||
uint32_t cachedDynamic = pParent->GetOptions()->cachedDynamicResources;
|
||||
@ -1125,28 +1124,28 @@ namespace dxvk {
|
||||
if (unlikely(HasDirtyGraphicsBindings()))
|
||||
ApplyDirtyGraphicsBindings();
|
||||
|
||||
// If possible, batch up multiple indirect draw calls of
|
||||
// the same type into one single multiDrawIndirect call
|
||||
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
|
||||
auto stride = 0u;
|
||||
// If possible, batch multiple indirect draw calls into one single multidraw call
|
||||
if (m_csDataType == D3D11CmdType::DrawIndirectIndexed) {
|
||||
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_csData->first());
|
||||
auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand));
|
||||
|
||||
if (cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed)
|
||||
stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand));
|
||||
|
||||
if (stride) {
|
||||
cmdData->count += 1;
|
||||
cmdData->stride = stride;
|
||||
} else {
|
||||
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
||||
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
|
||||
});
|
||||
|
||||
cmdData->type = D3D11CmdType::DrawIndirectIndexed;
|
||||
cmdData->offset = AlignedByteOffsetForArgs;
|
||||
cmdData->count = 1;
|
||||
cmdData->stride = 0;
|
||||
if (stride) {
|
||||
cmdData->count += 1;
|
||||
cmdData->stride = stride;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Need to start a new draw sequence
|
||||
EmitCsCmd<D3D11CmdDrawIndirectData>(D3D11CmdType::DrawIndirectIndexed, 1u,
|
||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) {
|
||||
ctx->drawIndexedIndirect(data->offset, data->count, data->stride, true);
|
||||
});
|
||||
|
||||
auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData();
|
||||
cmdData->offset = AlignedByteOffsetForArgs;
|
||||
cmdData->count = 1;
|
||||
cmdData->stride = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -1163,28 +1162,28 @@ namespace dxvk {
|
||||
if (unlikely(HasDirtyGraphicsBindings()))
|
||||
ApplyDirtyGraphicsBindings();
|
||||
|
||||
// If possible, batch up multiple indirect draw calls of
|
||||
// the same type into one single multiDrawIndirect call
|
||||
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_cmdData);
|
||||
auto stride = 0u;
|
||||
// If possible, batch multiple indirect draw calls into one single multidraw call
|
||||
if (m_csDataType == D3D11CmdType::DrawIndirect) {
|
||||
auto cmdData = static_cast<D3D11CmdDrawIndirectData*>(m_csData->first());
|
||||
auto stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand));
|
||||
|
||||
if (cmdData && cmdData->type == D3D11CmdType::DrawIndirect)
|
||||
stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand));
|
||||
|
||||
if (stride) {
|
||||
cmdData->count += 1;
|
||||
cmdData->stride = stride;
|
||||
} else {
|
||||
cmdData = EmitCsCmd<D3D11CmdDrawIndirectData>(
|
||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) {
|
||||
ctx->drawIndirect(data->offset, data->count, data->stride, true);
|
||||
});
|
||||
|
||||
cmdData->type = D3D11CmdType::DrawIndirect;
|
||||
cmdData->offset = AlignedByteOffsetForArgs;
|
||||
cmdData->count = 1;
|
||||
cmdData->stride = 0;
|
||||
if (stride) {
|
||||
cmdData->count += 1;
|
||||
cmdData->stride = stride;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Need to start a new draw sequence
|
||||
EmitCsCmd<D3D11CmdDrawIndirectData>(D3D11CmdType::DrawIndirect, 1u,
|
||||
[] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data, size_t) {
|
||||
ctx->drawIndirect(data->offset, data->count, data->stride, true);
|
||||
});
|
||||
|
||||
auto cmdData = new (m_csData->first()) D3D11CmdDrawIndirectData();
|
||||
cmdData->offset = AlignedByteOffsetForArgs;
|
||||
cmdData->count = 1;
|
||||
cmdData->stride = 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -793,9 +793,11 @@ namespace dxvk {
|
||||
|
||||
DxvkStagingBuffer m_staging;
|
||||
|
||||
D3D11CmdType m_csDataType = D3D11CmdType::None;
|
||||
|
||||
DxvkCsChunkFlags m_csFlags;
|
||||
DxvkCsChunkRef m_csChunk;
|
||||
D3D11CmdData* m_cmdData;
|
||||
DxvkCsDataBlock* m_csData = nullptr;
|
||||
|
||||
DxvkLocalAllocationCache m_allocationCache;
|
||||
|
||||
@ -1152,7 +1154,10 @@ namespace dxvk {
|
||||
|
||||
template<bool AllowFlush = true, typename Cmd>
|
||||
void EmitCs(Cmd&& command) {
|
||||
m_cmdData = nullptr;
|
||||
if (unlikely(m_csDataType != D3D11CmdType::None)) {
|
||||
m_csData = nullptr;
|
||||
m_csDataType = D3D11CmdType::None;
|
||||
}
|
||||
|
||||
if (unlikely(!m_csChunk->push(command))) {
|
||||
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
|
||||
@ -1165,12 +1170,12 @@ namespace dxvk {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename M, bool AllowFlush = true, typename Cmd, typename... Args>
|
||||
M* EmitCsCmd(Cmd&& command, Args&&... args) {
|
||||
M* data = m_csChunk->pushCmd<M, Cmd, Args...>(
|
||||
command, std::forward<Args>(args)...);
|
||||
template<typename M, bool AllowFlush = true, typename Cmd>
|
||||
void EmitCsCmd(D3D11CmdType type, size_t count, Cmd&& command) {
|
||||
m_csDataType = type;
|
||||
m_csData = m_csChunk->pushCmd<M, Cmd>(command, count);
|
||||
|
||||
if (unlikely(!data)) {
|
||||
if (unlikely(!m_csData)) {
|
||||
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
|
||||
m_csChunk = AllocCsChunk();
|
||||
|
||||
@ -1179,19 +1184,17 @@ namespace dxvk {
|
||||
|
||||
// We must record this command after the potential
|
||||
// flush since the caller may still access the data
|
||||
data = m_csChunk->pushCmd<M, Cmd, Args...>(
|
||||
command, std::forward<Args>(args)...);
|
||||
m_csData = m_csChunk->pushCmd<M, Cmd>(command, count);
|
||||
}
|
||||
|
||||
m_cmdData = data;
|
||||
return data;
|
||||
}
|
||||
|
||||
void FlushCsChunk() {
|
||||
if (likely(!m_csChunk->empty())) {
|
||||
m_csData = nullptr;
|
||||
m_csDataType = D3D11CmdType::None;
|
||||
|
||||
GetTypedContext()->EmitCsChunk(std::move(m_csChunk));
|
||||
m_csChunk = AllocCsChunk();
|
||||
m_cmdData = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,9 @@
|
||||
#include "dxvk_context.h"
|
||||
|
||||
namespace dxvk {
|
||||
|
||||
|
||||
constexpr static size_t DxvkCsChunkSize = 16384;
|
||||
|
||||
/**
|
||||
* \brief Command stream operation
|
||||
*
|
||||
@ -86,6 +88,41 @@ namespace dxvk {
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Command data block
|
||||
*
|
||||
* Provides functionality to allocate a potentially growing
|
||||
* array of structures for a command to traverse.
|
||||
*/
|
||||
class DxvkCsDataBlock {
|
||||
friend class DxvkCsChunk;
|
||||
public:
|
||||
|
||||
/**
|
||||
* \brief Number of structures allocated
|
||||
* \returns Number of structures allocated
|
||||
*/
|
||||
size_t count() const {
|
||||
return m_structCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Retrieves pointer to first structure
|
||||
* \returns Untyped pointer to first structure
|
||||
*/
|
||||
void* first() {
|
||||
return reinterpret_cast<char*>(this) + m_dataOffset;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
uint32_t m_dataOffset = 0u;
|
||||
uint16_t m_structSize = 0u;
|
||||
uint16_t m_structCount = 0u;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Typed command with metadata
|
||||
*
|
||||
@ -98,26 +135,33 @@ namespace dxvk {
|
||||
|
||||
public:
|
||||
|
||||
template<typename... Args>
|
||||
DxvkCsDataCmd(T&& cmd, Args&&... args)
|
||||
: m_command (std::move(cmd)),
|
||||
m_data (std::forward<Args>(args)...) { }
|
||||
|
||||
DxvkCsDataCmd(T&& cmd)
|
||||
: m_command(std::move(cmd)) { }
|
||||
|
||||
~DxvkCsDataCmd() {
|
||||
auto data = reinterpret_cast<M*>(m_data.first());
|
||||
|
||||
for (size_t i = 0; i < m_data.count(); i++)
|
||||
data[i].~M();
|
||||
}
|
||||
|
||||
DxvkCsDataCmd (DxvkCsDataCmd&&) = delete;
|
||||
DxvkCsDataCmd& operator = (DxvkCsDataCmd&&) = delete;
|
||||
|
||||
void exec(DxvkContext* ctx) {
|
||||
m_command(ctx, &m_data);
|
||||
// No const here so that the function can move objects efficiently
|
||||
m_command(ctx, reinterpret_cast<M*>(m_data.first()), m_data.count());
|
||||
}
|
||||
|
||||
M* data() {
|
||||
DxvkCsDataBlock* data() {
|
||||
return &m_data;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
T m_command;
|
||||
M m_data;
|
||||
alignas(M)
|
||||
T m_command;
|
||||
DxvkCsDataBlock m_data;
|
||||
|
||||
};
|
||||
|
||||
@ -140,12 +184,12 @@ namespace dxvk {
|
||||
* Stores a list of commands.
|
||||
*/
|
||||
class DxvkCsChunk : public RcObject {
|
||||
constexpr static size_t MaxBlockSize = 16384;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
DxvkCsChunk();
|
||||
~DxvkCsChunk();
|
||||
|
||||
|
||||
/**
|
||||
* \brief Checks whether the chunk is empty
|
||||
* \returns \c true if the chunk is empty
|
||||
@ -167,7 +211,7 @@ namespace dxvk {
|
||||
template<typename T>
|
||||
bool push(T& command) {
|
||||
using FuncType = DxvkCsTypedCmd<T>;
|
||||
void* ptr = alloc<FuncType>();
|
||||
void* ptr = alloc<FuncType>(0u);
|
||||
|
||||
if (unlikely(!ptr))
|
||||
return false;
|
||||
@ -186,23 +230,60 @@ namespace dxvk {
|
||||
* \brief Adds a command with data to the chunk
|
||||
*
|
||||
* \param [in] command The command to add
|
||||
* \param [in] args Constructor args for the data object
|
||||
* \param [in] count Number of items to allocate. Should be at least
|
||||
* 1 in order to avoid the possibility of an empty command. Note
|
||||
* that all allocated structures \e must be initialized before
|
||||
* handing off the command to the worker thread.
|
||||
* \returns Pointer to the data object, or \c nullptr
|
||||
*/
|
||||
template<typename M, typename T, typename... Args>
|
||||
M* pushCmd(T& command, Args&&... args) {
|
||||
template<typename M, typename T>
|
||||
DxvkCsDataBlock* pushCmd(T& command, size_t count) {
|
||||
size_t dataSize = count * sizeof(M);
|
||||
|
||||
// DxvkCsDataCmd is aligned to M
|
||||
using FuncType = DxvkCsDataCmd<T, M>;
|
||||
void* ptr = alloc<FuncType>();
|
||||
void* ptr = alloc<FuncType>(dataSize);
|
||||
|
||||
if (unlikely(!ptr))
|
||||
return nullptr;
|
||||
|
||||
auto next = new (ptr) FuncType(std::move(command), std::forward<Args>(args)...);
|
||||
// Command data is always packed tightly after the function object
|
||||
auto next = new (ptr) FuncType(std::move(command));
|
||||
append(next);
|
||||
|
||||
return next->data();
|
||||
// Do some cursed pointer math here so that the block can figure out
|
||||
// where its data is stored based on its own address. This saves a
|
||||
// decent amount of CS chunk memory compared to storing a pointer.
|
||||
auto block = next->data();
|
||||
block->m_dataOffset = reinterpret_cast<uintptr_t>(&m_data[m_commandOffset - dataSize])
|
||||
- reinterpret_cast<uintptr_t>(block);
|
||||
block->m_structSize = sizeof(M);
|
||||
block->m_structCount = count;
|
||||
return block;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Allocates more storage for a data block
|
||||
*
|
||||
* The data bock \e must be owned by the last command added to
|
||||
* the CS chunk, or this may override subsequent command data.
|
||||
* \param [in] block Data block
|
||||
* \param [in] count Number of structures to allocate
|
||||
* \returns Pointer to first allocated structure, or \c nullptr
|
||||
*/
|
||||
void* pushData(DxvkCsDataBlock* block, uint32_t count) {
|
||||
uint32_t dataSize = block->m_structSize * count;
|
||||
|
||||
if (unlikely(m_commandOffset + dataSize > DxvkCsChunkSize))
|
||||
return nullptr;
|
||||
|
||||
void* ptr = &m_data[m_commandOffset];
|
||||
m_commandOffset += dataSize;
|
||||
|
||||
block->m_structCount += count;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Initializes chunk for recording
|
||||
* \param [in] flags Chunk flags
|
||||
@ -237,18 +318,18 @@ namespace dxvk {
|
||||
DxvkCsChunkFlags m_flags;
|
||||
|
||||
alignas(64)
|
||||
char m_data[MaxBlockSize];
|
||||
char m_data[DxvkCsChunkSize];
|
||||
|
||||
template<typename T>
|
||||
void* alloc() {
|
||||
void* alloc(size_t extra) {
|
||||
if (alignof(T) > alignof(DxvkCsCmd))
|
||||
m_commandOffset = dxvk::align(m_commandOffset, alignof(T));
|
||||
|
||||
if (unlikely(m_commandOffset + sizeof(T) > MaxBlockSize))
|
||||
if (unlikely(m_commandOffset + sizeof(T) + extra > DxvkCsChunkSize))
|
||||
return nullptr;
|
||||
|
||||
void* result = &m_data[m_commandOffset];
|
||||
m_commandOffset += sizeof(T);
|
||||
m_commandOffset += sizeof(T) + extra;
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -420,7 +501,7 @@ namespace dxvk {
|
||||
* commands on a DXVK context.
|
||||
*/
|
||||
class DxvkCsThread {
|
||||
|
||||
|
||||
public:
|
||||
|
||||
constexpr static uint64_t SynchronizeAll = ~0ull;
|
||||
@ -515,5 +596,5 @@ namespace dxvk {
|
||||
void threadFunc();
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user