1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-03-14 04:29:15 +01:00

[d3d9] Build shader constant UBOs on CS thread

This commit is contained in:
Robin Kertels 2025-03-07 03:51:12 +01:00
parent 35e2ee3518
commit eb999a0194
No known key found for this signature in database
GPG Key ID: 3824904F14D40757
14 changed files with 524 additions and 210 deletions

View File

@ -35,7 +35,6 @@ namespace dxvk {
}
D3D9ConstantBuffer::~D3D9ConstantBuffer() {
}
@ -136,4 +135,116 @@ namespace dxvk {
device->properties().extRobustness2.robustUniformBufferAccessSizeAlignment);
}
// Constant Buffer living on the CS thread
D3D9CSConstantBuffer::D3D9CSConstantBuffer() {
}
D3D9CSConstantBuffer::D3D9CSConstantBuffer(
const Rc<DxvkDevice>& Device,
DxsoProgramType ShaderStage,
DxsoConstantBuffers BufferType,
VkDeviceSize Size,
bool UseDeviceLocalBuffer)
: D3D9CSConstantBuffer(Device, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, GetShaderStage(ShaderStage),
computeResourceSlotId(ShaderStage, DxsoBindingType::ConstantBuffer, BufferType),
Size, UseDeviceLocalBuffer) {
}
D3D9CSConstantBuffer::D3D9CSConstantBuffer(
const Rc<DxvkDevice>& Device,
VkBufferUsageFlags Usage,
VkShaderStageFlags Stages,
uint32_t ResourceSlot,
VkDeviceSize Size,
bool UseDeviceLocalBuffer)
: m_device (Device)
, m_binding (ResourceSlot)
, m_usage (Usage)
, m_stages (Stages)
, m_size (Size)
, m_align (getAlignment(Device))
, m_useDeviceLocalBuffer(UseDeviceLocalBuffer) {
}
D3D9CSConstantBuffer::~D3D9CSConstantBuffer() {
}
void* D3D9CSConstantBuffer::Alloc(DxvkContext* ctx, VkDeviceSize size) {
if (unlikely(m_buffer == nullptr))
m_slice = this->createBuffer(ctx);
size = align(size, m_align);
if (unlikely(m_offset + size > m_size)) {
Rc<DxvkResourceAllocation> newSlice = m_buffer->allocateStorage();
m_offset = 0;
m_slice = newSlice;
ctx->invalidateBuffer(m_buffer, std::move(newSlice));
}
ctx->bindUniformBufferRange(m_stages, m_binding, m_offset, size);
void* mapPtr = reinterpret_cast<char*>(m_slice->mapPtr()) + m_offset;
m_offset += size;
return mapPtr;
}
void* D3D9CSConstantBuffer::AllocSlice(DxvkContext* ctx) {
if (unlikely(m_buffer == nullptr))
m_slice = this->createBuffer(ctx);
else
m_slice = m_buffer->allocateStorage();
ctx->invalidateBuffer(m_buffer, std::move(m_slice));
return m_slice->mapPtr();
}
Rc<DxvkResourceAllocation> D3D9CSConstantBuffer::createBuffer(DxvkContext* ctx) {
// Buffer usage and access flags don't make much of a difference
// in the backend, so set both STORAGE and UNIFORM usage/access.
DxvkBufferCreateInfo bufferInfo;
bufferInfo.size = align(m_size, m_align);
bufferInfo.usage = m_usage;
bufferInfo.access = 0;
bufferInfo.stages = util::pipelineStages(m_stages);
bufferInfo.debugName = "Constant buffer";
if (m_usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
bufferInfo.access |= VK_ACCESS_UNIFORM_READ_BIT;
if (m_usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
bufferInfo.access |= VK_ACCESS_SHADER_READ_BIT;
VkMemoryPropertyFlags memoryFlags
= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
if (m_useDeviceLocalBuffer)
memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
m_buffer = m_device->createBuffer(bufferInfo, memoryFlags);
ctx->bindUniformBuffer(m_stages, m_binding, DxvkBufferSlice(m_buffer));
return m_buffer->storage();
}
VkDeviceSize D3D9CSConstantBuffer::getAlignment(const Rc<DxvkDevice>& device) const {
return std::max(std::max(
device->properties().core.properties.limits.minUniformBufferOffsetAlignment,
device->properties().core.properties.limits.minStorageBufferOffsetAlignment),
device->properties().extRobustness2.robustUniformBufferAccessSizeAlignment);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include "../dxvk/dxvk_buffer.h"
#include "../dxvk/dxvk_context.h"
#include "../dxso/dxso_util.h"
@ -82,4 +83,80 @@ namespace dxvk {
};
/**
* \brief Constant buffer living on the CS thread
*/
class D3D9CSConstantBuffer {
public:
D3D9CSConstantBuffer();
D3D9CSConstantBuffer(
const Rc<DxvkDevice>& Device,
DxsoProgramType ShaderStage,
DxsoConstantBuffers BufferType,
VkDeviceSize Size,
bool UseDeviceLocalBuffer);
D3D9CSConstantBuffer(
const Rc<DxvkDevice>& Device,
VkBufferUsageFlags Usage,
VkShaderStageFlags Stages,
uint32_t ResourceSlot,
VkDeviceSize Size,
bool UseDeviceLocalBuffer);
~D3D9CSConstantBuffer();
/**
* \brief Queries alignment
*
* Useful to pad copies with initialized data.
* \returns Data alignment
*/
VkDeviceSize GetAlignment() const {
return m_align;
}
/**
* \brief Allocates a given amount of memory
*
* \param [in] size Number of bytes to allocate
* \returns Map pointer of the allocated region
*/
void* Alloc(DxvkContext* ctx, VkDeviceSize size);
/**
* \brief Allocates a full buffer slice
*
* This must not be called if \ref Alloc is used.
* \returns Map pointer of the allocated region
*/
void* AllocSlice(DxvkContext* ctx);
private:
Rc<DxvkDevice> m_device;
uint32_t m_binding = 0u;
VkBufferUsageFlags m_usage = 0u;
VkShaderStageFlags m_stages = 0u;
VkDeviceSize m_size = 0ull;
VkDeviceSize m_align = 0ull;
VkDeviceSize m_offset = 0ull;
bool m_useDeviceLocalBuffer = false;
Rc<DxvkBuffer> m_buffer = nullptr;
Rc<DxvkResourceAllocation> m_slice = nullptr;
Rc<DxvkResourceAllocation> createBuffer(DxvkContext* ctx);
VkDeviceSize getAlignment(const Rc<DxvkDevice>& device) const;
};
}

View File

@ -40,15 +40,30 @@ namespace dxvk {
};
struct D3D9SwvpConstantBuffers {
D3D9ConstantBuffer intBuffer;
D3D9ConstantBuffer boolBuffer;
D3D9CSConstantBuffer intBuffer;
D3D9CSConstantBuffer boolBuffer;
};
struct D3D9ConstantSets {
D3D9SwvpConstantBuffers swvp;
D3D9ConstantBuffer buffer;
template<typename ShaderConstantsStorage>
struct D3D9CSShaderConstants {
ShaderConstantsStorage constants;
// Primary buffer (contains HWVP or pixel shaders: Ints + Floats, SWVP: Floats)
D3D9CSConstantBuffer buffer;
// Secondary buffers for SWVP (one for Ints, one for Bools)
D3D9SwvpConstantBuffers swvp;
// Shader related
DxsoShaderMetaInfo meta = {};
DxsoDefinedConstants shaderDefinedConsts;
// Tracking
bool dirty = true;
uint32_t floatConstsCount = 0;
// The highest changed int and bool constants are only tracked for SWVP.
// For HWVP or pixel shaders, the maximum amount is only 16 anyway.
uint32_t intConstsCount = 0;
uint32_t boolConstsCount = 0;
};
}

View File

@ -115,10 +115,10 @@ namespace dxvk {
if (!useRobustConstantAccess) {
// Disable optimized constant copies, we always have to copy all constants.
m_vsFloatConstsCount = m_vsLayout.floatCount;
m_vsIntConstsCount = m_vsLayout.intCount;
m_vsBoolConstsCount = m_vsLayout.boolCount;
m_psFloatConstsCount = m_psLayout.floatCount;
m_csVSConsts.floatConstsCount = m_vsLayout.floatCount;
m_csVSConsts.intConstsCount = m_vsLayout.intCount;
m_csVSConsts.boolConstsCount = m_vsLayout.boolCount;
m_csPSConsts.floatConstsCount = m_psLayout.floatCount;
if (supportsRobustness2) {
Logger::warn("Disabling robust constant buffer access because of alignment.");
@ -3379,15 +3379,23 @@ namespace dxvk {
bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader;
m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo();
if (newShader && oldShader) {
m_consts[DxsoProgramTypes::VertexShader].dirty
|= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
bool dirty = oldCopies || newCopies || !oldShader;
dirty |= newShader && oldShader && (
newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|| newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
}
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB
);
EmitCs([
&cShaderConsts = m_csVSConsts,
cDirty = dirty,
cMeta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(),
cShaderDefinedConsts = newShader ? newShader->GetConstants() : DxsoDefinedConstants()
](DxvkContext* ctx) {
cShaderConsts.meta = cMeta;
cShaderConsts.dirty |= cDirty;
cShaderConsts.shaderDefinedConsts = cShaderDefinedConsts;
});
m_state.vertexShader = shader;
@ -3754,15 +3762,23 @@ namespace dxvk {
bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader;
m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo();
if (newShader && oldShader) {
m_consts[DxsoProgramTypes::PixelShader].dirty
|= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
bool dirty = oldCopies || newCopies || !oldShader;
dirty |= newShader && oldShader && (
newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|| newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
}
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB
);
EmitCs([
&cShaderConsts = m_csPSConsts,
cDirty = dirty,
cMeta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(),
cShaderDefinedConsts = newShader ? newShader->GetConstants() : DxsoDefinedConstants()
](DxvkContext* ctx) {
cShaderConsts.meta = cMeta;
cShaderConsts.dirty |= cDirty;
cShaderConsts.shaderDefinedConsts = cShaderDefinedConsts;
});
m_state.pixelShader = shader;
@ -5783,25 +5799,37 @@ namespace dxvk {
constexpr VkDeviceSize DefaultConstantBufferSize = 1024ull << 10;
constexpr VkDeviceSize SmallConstantBufferSize = 64ull << 10;
m_consts[DxsoProgramTypes::VertexShader].buffer = D3D9ConstantBuffer(this,
DxsoProgramType::VertexShader,
DxsoConstantBuffers::VSConstantBuffer,
DefaultConstantBufferSize);
EmitCs([
cDevice = m_dxvkDevice,
&cCSVSConsts = m_csVSConsts,
&cCSPSConsts = m_csPSConsts,
cUseDeviceLocalBuffers = m_d3d9Options.deviceLocalConstantBuffers
] (DxvkContext* ctx) {
cCSVSConsts.buffer = D3D9CSConstantBuffer(cDevice,
DxsoProgramType::VertexShader,
DxsoConstantBuffers::VSConstantBuffer,
DefaultConstantBufferSize,
cUseDeviceLocalBuffers);
m_consts[DxsoProgramTypes::VertexShader].swvp.intBuffer = D3D9ConstantBuffer(this,
DxsoProgramType::VertexShader,
DxsoConstantBuffers::VSIntConstantBuffer,
SmallConstantBufferSize);
cCSVSConsts.swvp.intBuffer = D3D9CSConstantBuffer(cDevice,
DxsoProgramType::VertexShader,
DxsoConstantBuffers::VSIntConstantBuffer,
SmallConstantBufferSize,
cUseDeviceLocalBuffers);
m_consts[DxsoProgramTypes::VertexShader].swvp.boolBuffer = D3D9ConstantBuffer(this,
DxsoProgramType::VertexShader,
DxsoConstantBuffers::VSBoolConstantBuffer,
SmallConstantBufferSize);
cCSVSConsts.swvp.boolBuffer = D3D9CSConstantBuffer(cDevice,
DxsoProgramType::VertexShader,
DxsoConstantBuffers::VSBoolConstantBuffer,
SmallConstantBufferSize,
cUseDeviceLocalBuffers);
cCSPSConsts.buffer = D3D9CSConstantBuffer(cDevice,
DxsoProgramType::PixelShader,
DxsoConstantBuffers::PSConstantBuffer,
DefaultConstantBufferSize,
cUseDeviceLocalBuffers);
});
m_consts[DxsoProgramTypes::PixelShader].buffer = D3D9ConstantBuffer(this,
DxsoProgramType::PixelShader,
DxsoConstantBuffers::PSConstantBuffer,
DefaultConstantBufferSize);
m_vsClipPlanes = D3D9ConstantBuffer(this,
DxsoProgramType::VertexShader,
@ -5841,7 +5869,7 @@ namespace dxvk {
}
inline void D3D9DeviceEx::UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout) {
inline void D3D9DeviceEx::UploadSoftwareConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<D3D9ShaderConstantsVSSoftware>& ShaderConsts) {
/*
* SWVP raises the amount of constants by a lot.
* To avoid copying huge amounts of data for every draw call,
@ -5849,42 +5877,37 @@ namespace dxvk {
* to fit that. We rely on robustness to return 0 for OOB reads.
*/
D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader];
if (!constSet.dirty)
if (!ShaderConsts.dirty)
return;
constSet.dirty = false;
ShaderConsts.dirty = false;
uint32_t floatCount = m_vsFloatConstsCount;
if (constSet.meta.needsConstantCopies) {
uint32_t floatCount = ShaderConsts.floatConstsCount;
if (ShaderConsts.meta.needsConstantCopies) {
// If the shader requires us to preserve shader defined constants,
// we copy those over. We need to adjust the amount of used floats accordingly.
auto shader = GetCommonShader(m_state.vertexShader);
floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1);
floatCount = std::max(floatCount, ShaderConsts.meta.maxShaderDefinedFloatConstant + 1);
}
// If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest.
floatCount = std::min(floatCount, constSet.meta.maxConstIndexF);
floatCount = std::min(floatCount, ShaderConsts.meta.maxConstIndexF);
// Calculate data sizes for each constant type.
const uint32_t floatDataSize = floatCount * sizeof(Vector4);
const uint32_t intDataSize = std::min(constSet.meta.maxConstIndexI, m_vsIntConstsCount) * sizeof(Vector4i);
const uint32_t boolDataSize = divCeil(std::min(constSet.meta.maxConstIndexB, m_vsBoolConstsCount), 32u) * uint32_t(sizeof(uint32_t));
const uint32_t intDataSize = std::min(ShaderConsts.meta.maxConstIndexI, ShaderConsts.intConstsCount) * sizeof(Vector4i);
const uint32_t boolDataSize = divCeil(std::min(ShaderConsts.meta.maxConstIndexB, ShaderConsts.boolConstsCount), 32u) * uint32_t(sizeof(uint32_t));
// Max copy source size is 8192 * 16 => always aligned to any plausible value
// => we won't copy out of bounds
if (likely(constSet.meta.maxConstIndexF != 0)) {
auto mapPtr = CopySoftwareConstants(constSet.buffer, Src.fConsts, floatDataSize);
if (likely(ShaderConsts.meta.maxConstIndexF != 0)) {
auto mapPtr = CopySoftwareConstants(ctx, ShaderConsts.buffer, ShaderConsts.constants.fConsts, floatDataSize);
if (constSet.meta.needsConstantCopies) {
if (ShaderConsts.meta.needsConstantCopies) {
// Copy shader defined constants over so they can be accessed
// with relative addressing.
Vector4* data = reinterpret_cast<Vector4*>(mapPtr);
auto& shaderConsts = GetCommonShader(m_state.vertexShader)->GetConstants();
for (const auto& constant : shaderConsts) {
if (constant.uboIdx < constSet.meta.maxConstIndexF)
for (const auto& constant : ShaderConsts.shaderDefinedConsts) {
if (constant.uboIdx < ShaderConsts.meta.maxConstIndexF)
data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
}
}
@ -5892,90 +5915,84 @@ namespace dxvk {
// Max copy source size is 2048 * 16 => always aligned to any plausible value
// => we won't copy out of bounds
if (likely(constSet.meta.maxConstIndexI != 0))
CopySoftwareConstants(constSet.swvp.intBuffer, Src.iConsts, intDataSize);
if (likely(ShaderConsts.meta.maxConstIndexI != 0))
CopySoftwareConstants(ctx, ShaderConsts.swvp.intBuffer, ShaderConsts.constants.iConsts, intDataSize);
if (likely(constSet.meta.maxConstIndexB != 0))
CopySoftwareConstants(constSet.swvp.boolBuffer, Src.bConsts, boolDataSize);
if (likely(ShaderConsts.meta.maxConstIndexB != 0))
CopySoftwareConstants(ctx, ShaderConsts.swvp.boolBuffer, ShaderConsts.constants.bConsts, boolDataSize);
}
inline void* D3D9DeviceEx::CopySoftwareConstants(D3D9ConstantBuffer& dstBuffer, const void* src, uint32_t size) {
inline void* D3D9DeviceEx::CopySoftwareConstants(DxvkContext* ctx, D3D9CSConstantBuffer& dstBuffer, const void* src, uint32_t size) {
uint32_t alignment = dstBuffer.GetAlignment();
size = std::max(size, alignment);
size = align(size, alignment);
auto mapPtr = dstBuffer.Alloc(size);
auto mapPtr = dstBuffer.Alloc(ctx, size);
std::memcpy(mapPtr, src, size);
return mapPtr;
}
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
template <typename ShaderConstantsStorage, typename GPUShaderConstantsStorage>
inline void D3D9DeviceEx::UploadConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts) {
/*
* We just copy the float constants that have been set by the application and rely on robustness
* to return 0 on OOB reads.
*/
D3D9ConstantSets& constSet = m_consts[ShaderStage];
if (!constSet.dirty)
if (!ShaderConsts.dirty)
return;
constSet.dirty = false;
ShaderConsts.dirty = false;
uint32_t floatCount = ShaderStage == DxsoProgramType::VertexShader ? m_vsFloatConstsCount : m_psFloatConstsCount;
if (constSet.meta.needsConstantCopies) {
uint32_t floatCount = ShaderConsts.floatConstsCount;
if (ShaderConsts.meta.needsConstantCopies) {
// If the shader requires us to preserve shader defined constants,
// we copy those over. We need to adjust the amount of used floats accordingly.
auto shader = GetCommonShader(Shader);
floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1);
floatCount = std::max(floatCount, ShaderConsts.meta.maxShaderDefinedFloatConstant + 1);
}
// If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest.
floatCount = std::min(constSet.meta.maxConstIndexF, floatCount);
floatCount = std::min(floatCount, ShaderConsts.meta.maxConstIndexF);
// There are very few int constants, so we put those into the same buffer at the start.
// We always allocate memory for all possible int constants to make sure alignment works out.
const uint32_t intRange = caps::MaxOtherConstants * sizeof(Vector4i);
uint32_t floatDataSize = floatCount * sizeof(Vector4);
// Determine amount of floats and buffer size based on highest used float constant and alignment
const uint32_t alignment = constSet.buffer.GetAlignment();
const uint32_t alignment = ShaderConsts.buffer.GetAlignment();
const uint32_t bufferSize = align(std::max(floatDataSize + intRange, alignment), alignment);
floatDataSize = bufferSize - intRange;
void* mapPtr = constSet.buffer.Alloc(bufferSize);
auto* dst = reinterpret_cast<HardwareLayoutType*>(mapPtr);
void* mapPtr = ShaderConsts.buffer.Alloc(ctx, bufferSize);
auto* dst = reinterpret_cast<GPUShaderConstantsStorage*>(mapPtr);
const uint32_t intDataSize = constSet.meta.maxConstIndexI * sizeof(Vector4i);
if (constSet.meta.maxConstIndexI != 0)
std::memcpy(dst->iConsts, Src.iConsts, intDataSize);
if (constSet.meta.maxConstIndexF != 0)
std::memcpy(dst->fConsts, Src.fConsts, floatDataSize);
const uint32_t intDataSize = ShaderConsts.meta.maxConstIndexI * sizeof(Vector4i);
if (ShaderConsts.meta.maxConstIndexI != 0)
std::memcpy(dst->iConsts, ShaderConsts.constants.iConsts, intDataSize);
if (ShaderConsts.meta.maxConstIndexF != 0)
std::memcpy(dst->fConsts, ShaderConsts.constants.fConsts, floatDataSize);
if (constSet.meta.needsConstantCopies) {
if (ShaderConsts.meta.needsConstantCopies) {
// Copy shader defined constants over so they can be accessed
// with relative addressing.
Vector4* data = reinterpret_cast<Vector4*>(dst->fConsts);
auto& shaderConsts = GetCommonShader(Shader)->GetConstants();
for (const auto& constant : shaderConsts) {
if (constant.uboIdx < constSet.meta.maxConstIndexF)
for (const auto& constant : ShaderConsts.shaderDefinedConsts) {
if (constant.uboIdx < ShaderConsts.meta.maxConstIndexF)
data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
}
}
}
template <DxsoProgramType ShaderStage>
void D3D9DeviceEx::UploadConstants() {
if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) {
if (CanSWVP())
return UploadSoftwareConstantSet(m_state.vsConsts.get(), m_vsLayout);
template <typename ShaderConstantsStorage>
void D3D9DeviceEx::UploadConstants(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts, bool canSWVP) {
if constexpr (std::is_same<ShaderConstantsStorage, D3D9ShaderConstantsVSSoftware>::value) {
if (canSWVP)
return UploadSoftwareConstantSet(ctx, ShaderConsts);
else
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsVSHardware>(m_state.vsConsts.get(), m_vsLayout, m_state.vertexShader);
return UploadConstantSet<D3D9ShaderConstantsVSSoftware, D3D9ShaderConstantsVSHardware>(ctx, ShaderConsts);
} else {
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsPS> (m_state.psConsts.get(), m_psLayout, m_state.pixelShader);
return UploadConstantSet<D3D9ShaderConstantsPS> (ctx, ShaderConsts);
}
}
@ -7313,12 +7330,18 @@ namespace dxvk {
BindShader<DxsoProgramType::VertexShader>(
GetCommonShader(m_state.vertexShader));
}
UploadConstants<DxsoProgramTypes::VertexShader>();
EmitCs([
&cShaderConsts = m_csVSConsts,
cCanSWVP = CanSWVP()
](DxvkContext* ctx) {
UploadConstants(ctx, cShaderConsts, cCanSWVP);
});
if (likely(!CanSWVP())) {
const D3D9CommonShader* shader = GetCommonShader(m_state.vertexShader);
UpdateVertexBoolSpec(
m_state.vsConsts->bConsts[0] &
m_consts[DxsoProgramType::VertexShader].meta.boolConstantMask);
shader->GetMeta().boolConstantMask);
} else
UpdateVertexBoolSpec(0);
}
@ -7331,13 +7354,18 @@ namespace dxvk {
BindInputLayout();
if (likely(UseProgrammablePS())) {
UploadConstants<DxsoProgramTypes::PixelShader>();
EmitCs([
&cShaderConsts = m_csPSConsts
](DxvkContext* ctx) {
UploadConstants(ctx, cShaderConsts, false);
});
const uint32_t psTextureMask = usedTextureMask & ((1u << caps::MaxTexturesPS) - 1u);
const uint32_t fetch4 = m_fetch4 & psTextureMask;
const uint32_t projected = m_projectionBitfield & psTextureMask;
const auto& programInfo = GetCommonShader(m_state.pixelShader)->GetInfo();
const D3D9CommonShader* shader = GetCommonShader(m_state.pixelShader);
const auto& programInfo = shader->GetInfo();
if (programInfo.majorVersion() >= 2)
UpdatePixelShaderSamplerSpec(m_d3d9Options.forceSamplerTypeSpecConstants ? m_textureTypes : 0u, 0u, fetch4);
@ -7346,7 +7374,7 @@ namespace dxvk {
UpdatePixelBoolSpec(
m_state.psConsts->bConsts[0] &
m_consts[DxsoProgramType::PixelShader].meta.boolConstantMask);
shader->GetMeta().boolConstantMask);
}
else {
UpdatePixelBoolSpec(0);
@ -7665,7 +7693,9 @@ namespace dxvk {
m_state.vsConsts->bConsts[idx] &= ~mask;
m_state.vsConsts->bConsts[idx] |= bits & mask;
m_consts[DxsoProgramTypes::VertexShader].dirty = true;
EmitCs([&cConsts = m_csVSConsts](DxvkContext* ctx) {
cConsts.dirty = true;
});
}
@ -7673,7 +7703,9 @@ namespace dxvk {
m_state.psConsts->bConsts[idx] &= ~mask;
m_state.psConsts->bConsts[idx] |= bits & mask;
m_consts[DxsoProgramTypes::PixelShader].dirty = true;
EmitCs([&cConsts = m_csPSConsts](DxvkContext* ctx) {
cConsts.dirty = true;
});
}
@ -7731,39 +7763,108 @@ namespace dxvk {
pConstantData,
Count);
constexpr uint32_t vectorElementsCount = ConstantType != D3D9ConstantType::Bool ? 4 : 1;
const size_t dataSize = Count * vectorElementsCount * sizeof(T);
if (ProgramType == DxsoProgramType::VertexShader && (likely(ConstantType != D3D9ConstantType::Bool) || unlikely(CanSWVP()))) {
DxvkCsDataBlock* csData = EmitCsWithData<T>(Count * vectorElementsCount, [
&cShaderConsts = m_csVSConsts,
cStartRegister = StartRegister,
cFloatEmulation = m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled
] (DxvkContext* ctx, const T* data, size_t count) {
uint32_t vectorsCount;
if constexpr (ConstantType == D3D9ConstantType::Float) {
vectorsCount = count / 4;
cShaderConsts.floatConstsCount = std::max(cShaderConsts.floatConstsCount, cStartRegister + uint32_t(vectorsCount));
} else if constexpr (ConstantType == D3D9ConstantType::Int) {
vectorsCount = count / 4;
cShaderConsts.intConstsCount = std::max(cShaderConsts.intConstsCount, cStartRegister + uint32_t(vectorsCount));
} else /* if constexpr (ConstantType == D3D9ConstantType::Bool) */ {
vectorsCount = count;
cShaderConsts.boolConstsCount = std::max(cShaderConsts.boolConstsCount, cStartRegister + uint32_t(vectorsCount));
}
if constexpr (ConstantType != D3D9ConstantType::Bool) {
uint32_t maxCount = ConstantType == D3D9ConstantType::Float
? cShaderConsts.meta.maxConstIndexF
: cShaderConsts.meta.maxConstIndexI;
cShaderConsts.dirty |= cStartRegister < maxCount;
} else /* if (CanSWVP()) */ {
cShaderConsts.dirty |= cStartRegister < cShaderConsts.meta.maxConstIndexB;
}
UpdateStateConstants<
D3D9ShaderConstantsVSSoftware*,
ConstantType,
T>(
&cShaderConsts.constants,
cStartRegister,
data,
vectorsCount,
cFloatEmulation);
});
auto dst = reinterpret_cast<T*>(csData->first());
std::memcpy(dst, pConstantData, dataSize);
} else if constexpr (ProgramType == DxsoProgramType::PixelShader && ConstantType != D3D9ConstantType::Bool) {
DxvkCsDataBlock* csData = EmitCsWithData<T>(Count * vectorElementsCount, [
&cShaderConsts = m_csPSConsts,
cStartRegister = StartRegister,
cFloatEmulation = m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled
] (DxvkContext* ctx, const T* data, size_t count) {
const uint32_t vectorsCount = count / 4;
if constexpr (ConstantType == D3D9ConstantType::Float) {
cShaderConsts.floatConstsCount = std::max(cShaderConsts.floatConstsCount, cStartRegister + uint32_t(vectorsCount));
}
uint32_t maxCount = ConstantType == D3D9ConstantType::Float
? cShaderConsts.meta.maxConstIndexF
: cShaderConsts.meta.maxConstIndexI;
cShaderConsts.dirty |= cStartRegister < maxCount;
UpdateStateConstants<
D3D9ShaderConstantsPS*,
ConstantType,
T>(
&cShaderConsts.constants,
cStartRegister,
data,
vectorsCount,
cFloatEmulation);
});
auto dst = reinterpret_cast<T*>(csData->first());
std::memcpy(dst, pConstantData, dataSize);
}
if constexpr (ProgramType == DxsoProgramType::VertexShader) {
if constexpr (ConstantType == D3D9ConstantType::Float) {
m_vsFloatConstsCount = std::max(m_vsFloatConstsCount, StartRegister + Count);
} else if constexpr (ConstantType == D3D9ConstantType::Int) {
m_vsIntConstsCount = std::max(m_vsIntConstsCount, StartRegister + Count);
} else /* if constexpr (ConstantType == D3D9ConstantType::Bool) */ {
m_vsBoolConstsCount = std::max(m_vsBoolConstsCount, StartRegister + Count);
}
UpdateStateConstants<
static_item<D3D9ShaderConstantsVSSoftware>&,
ConstantType,
T>(
m_state.vsConsts,
StartRegister,
pConstantData,
Count,
false);
} else {
if constexpr (ConstantType == D3D9ConstantType::Float) {
m_psFloatConstsCount = std::max(m_psFloatConstsCount, StartRegister + Count);
}
UpdateStateConstants<
static_item<D3D9ShaderConstantsPS>&,
ConstantType,
T>(
m_state.psConsts,
StartRegister,
pConstantData,
Count,
false);
}
if constexpr (ConstantType != D3D9ConstantType::Bool) {
uint32_t maxCount = ConstantType == D3D9ConstantType::Float
? m_consts[ProgramType].meta.maxConstIndexF
: m_consts[ProgramType].meta.maxConstIndexI;
m_consts[ProgramType].dirty |= StartRegister < maxCount;
} else if constexpr (ProgramType == DxsoProgramType::VertexShader) {
if (unlikely(CanSWVP())) {
m_consts[DxsoProgramType::VertexShader].dirty |= StartRegister < m_consts[ProgramType].meta.maxConstIndexB;
}
}
UpdateStateConstants<ProgramType, ConstantType, T>(
&m_state,
StartRegister,
pConstantData,
Count,
m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled);
return D3D_OK;
}

View File

@ -941,16 +941,16 @@ namespace dxvk {
void BindDepthBias();
inline void UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout);
inline static void UploadSoftwareConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<D3D9ShaderConstantsVSSoftware>& ShaderConsts);
inline void* CopySoftwareConstants(D3D9ConstantBuffer& dstBuffer, const void* src, uint32_t size);
inline static void* CopySoftwareConstants(DxvkContext* ctx, D3D9CSConstantBuffer& dstBuffer, const void* src, uint32_t size);
template <typename ShaderConstantsStorage, typename GPUShaderConstantsStorage = ShaderConstantsStorage>
inline static void UploadConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts);
template <typename ShaderConstantsStorage>
static void UploadConstants(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts, bool canSWVP);
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader);
template <DxsoProgramType ShaderStage>
void UploadConstants();
void UpdateClipPlanes();
/**
@ -1192,6 +1192,24 @@ namespace dxvk {
}
}
template<typename M, bool AllowFlush = true, typename Cmd>
DxvkCsDataBlock* EmitCsWithData(size_t count, Cmd&& command) {
DxvkCsDataBlock* data = m_csChunk->pushCmd<M, Cmd>(command, count);
if (unlikely(!data)) {
EmitCsChunk(std::move(m_csChunk));
m_csChunk = AllocCsChunk();
if constexpr (AllowFlush)
ConsiderFlush(GpuFlushType::ImplicitWeakHint);
// We must record this command after the potential
// flush since the caller may still access the data
data = m_csChunk->pushCmd<M, Cmd>(command, count);
}
return data;
}
void EmitCsChunk(DxvkCsChunkRef&& chunk);
void FlushCsChunk() {
@ -1582,16 +1600,8 @@ namespace dxvk {
uint32_t m_robustSSBOAlignment = 1;
uint32_t m_robustUBOAlignment = 1;
uint32_t m_vsFloatConstsCount = 0;
uint32_t m_vsIntConstsCount = 0;
uint32_t m_vsBoolConstsCount = 0;
uint32_t m_psFloatConstsCount = 0;
VkDeviceSize m_boundVSConstantsBufferSize = 0;
VkDeviceSize m_boundPSConstantsBufferSize = 0;
D3D9ConstantLayout m_vsLayout;
D3D9ConstantLayout m_psLayout;
D3D9ConstantSets m_consts[DxsoProgramTypes::Count];
D3D9UserDefinedAnnotation* m_annotation = nullptr;
@ -1641,6 +1651,10 @@ namespace dxvk {
// Written by CS thread
alignas(CACHE_LINE_SIZE)
std::atomic<uint64_t> m_lastSamplerStats = { 0u };
D3D9CSShaderConstants<D3D9ShaderConstantsVSSoftware> m_csVSConsts;
D3D9CSShaderConstants<D3D9ShaderConstantsPS> m_csPSConsts;
};
}

View File

@ -70,7 +70,6 @@ namespace dxvk {
m_info = pModule->info();
m_meta = pModule->meta();
m_constants = pModule->constants();
m_maxDefinedConst = pModule->maxDefinedConstant();
m_shader->setShaderKey(Key);

View File

@ -52,8 +52,6 @@ namespace dxvk {
const DxsoProgramInfo& GetInfo() const { return m_info; }
uint32_t GetMaxDefinedConstant() const { return m_maxDefinedConst; }
VkImageViewType GetImageViewType(uint32_t samplerSlot) const {
const uint32_t offset = samplerSlot * 2;
const uint32_t mask = 0b11;
@ -70,7 +68,6 @@ namespace dxvk {
DxsoProgramInfo m_info;
DxsoShaderMetaInfo m_meta;
DxsoDefinedConstants m_constants;
uint32_t m_maxDefinedConst;
Rc<DxvkShader> m_shader;

View File

@ -318,54 +318,46 @@ namespace dxvk {
using D3D9DeviceState = D3D9State<static_item>;
template <
DxsoProgramType ProgramType,
typename ShaderConstantsStorage,
D3D9ConstantType ConstantType,
typename T,
typename StateType>
typename T>
HRESULT UpdateStateConstants(
StateType* pState,
UINT StartRegister,
const T* pConstantData,
UINT Count,
bool FloatEmu) {
auto UpdateHelper = [&] (auto& set) {
if constexpr (ConstantType == D3D9ConstantType::Float) {
ShaderConstantsStorage ConstantSet,
UINT StartRegister,
const T* pConstantData,
UINT Count,
bool FloatEmu) {
if constexpr (ConstantType == D3D9ConstantType::Float) {
if (!FloatEmu) {
size_t size = Count * sizeof(Vector4);
if (!FloatEmu) {
size_t size = Count * sizeof(Vector4);
std::memcpy(set->fConsts[StartRegister].data, pConstantData, size);
}
else {
for (UINT i = 0; i < Count; i++)
set->fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4));
}
}
else if constexpr (ConstantType == D3D9ConstantType::Int) {
size_t size = Count * sizeof(Vector4i);
std::memcpy(set->iConsts[StartRegister].data, pConstantData, size);
std::memcpy(ConstantSet->fConsts[StartRegister].data, pConstantData, size);
}
else {
for (uint32_t i = 0; i < Count; i++) {
const uint32_t constantIdx = StartRegister + i;
const uint32_t arrayIdx = constantIdx / 32;
const uint32_t bitIdx = constantIdx % 32;
const uint32_t bit = 1u << bitIdx;
set->bConsts[arrayIdx] &= ~bit;
if (pConstantData[i])
set->bConsts[arrayIdx] |= bit;
}
for (UINT i = 0; i < Count; i++)
ConstantSet->fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4));
}
}
else if constexpr (ConstantType == D3D9ConstantType::Int) {
size_t size = Count * sizeof(Vector4i);
return D3D_OK;
};
std::memcpy(ConstantSet->iConsts[StartRegister].data, pConstantData, size);
}
else {
for (uint32_t i = 0; i < Count; i++) {
const uint32_t constantIdx = StartRegister + i;
const uint32_t arrayIdx = constantIdx / 32;
const uint32_t bitIdx = constantIdx % 32;
return ProgramType == DxsoProgramTypes::VertexShader
? UpdateHelper(pState->vsConsts)
: UpdateHelper(pState->psConsts);
const uint32_t bit = 1u << bitIdx;
ConstantSet->bConsts[arrayIdx] &= ~bit;
if (pConstantData[i])
ConstantSet->bConsts[arrayIdx] |= bit;
}
}
return D3D_OK;
}
struct Direct3DState9 : public D3D9DeviceState {

View File

@ -367,15 +367,27 @@ namespace dxvk {
setCaptures.bConsts.set(reg, true);
}
UpdateStateConstants<
ProgramType,
ConstantType,
T>(
&m_state,
StartRegister,
pConstantData,
Count,
false);
if constexpr (ProgramType == DxsoProgramType::VertexShader) {
UpdateStateConstants<
dynamic_item<D3D9ShaderConstantsVSSoftware>&,
ConstantType,
T>(
m_state.vsConsts,
StartRegister,
pConstantData,
Count,
false);
} else {
UpdateStateConstants<
dynamic_item<D3D9ShaderConstantsPS>&,
ConstantType,
T>(
m_state.psConsts,
StartRegister,
pConstantData,
Count,
false);
}
return D3D_OK;
};

View File

@ -1797,7 +1797,7 @@ namespace dxvk {
for (uint32_t i = 0; i < 4; i++)
constant.float32[i] = data[i];
m_constants.push_back(constant);
m_maxDefinedConstant = std::max(constant.uboIdx, m_maxDefinedConstant);
m_meta.maxShaderDefinedFloatConstant = std::max(constant.uboIdx, m_meta.maxShaderDefinedFloatConstant);
}
void DxsoCompiler::emitDefI(const DxsoInstructionContext& ctx) {

View File

@ -247,7 +247,6 @@ namespace dxvk {
const DxsoDefinedConstants& constants() { return m_constants; }
uint32_t usedSamplers() const { return m_usedSamplers; }
uint32_t usedRTs() const { return m_usedRTs; }
uint32_t maxDefinedConstant() const { return m_maxDefinedConstant; }
uint32_t textureTypes() const { return m_textureTypes; }
private:

View File

@ -31,6 +31,7 @@ namespace dxvk {
struct DxsoShaderMetaInfo {
bool needsConstantCopies = false;
uint32_t maxShaderDefinedFloatConstant = 0;
uint32_t maxConstIndexF = 0;
uint32_t maxConstIndexI = 0;
uint32_t maxConstIndexB = 0;

View File

@ -36,7 +36,6 @@ namespace dxvk {
m_meta = compiler->meta();
m_constants = compiler->constants();
m_maxDefinedConst = compiler->maxDefinedConstant();
m_usedSamplers = compiler->usedSamplers();
m_textureTypes = compiler->textureTypes();

View File

@ -59,8 +59,6 @@ namespace dxvk {
uint32_t usedRTs() { return m_usedRTs; }
uint32_t maxDefinedConstant() { return m_maxDefinedConst; }
uint32_t textureTypes() { return m_textureTypes; }
private:
@ -82,7 +80,6 @@ namespace dxvk {
uint32_t m_textureTypes;
DxsoShaderMetaInfo m_meta;
uint32_t m_maxDefinedConst;
DxsoDefinedConstants m_constants;
};