mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-03-14 04:29:15 +01:00
[d3d9] Build shader constant UBOs on CS thread
This commit is contained in:
parent
35e2ee3518
commit
eb999a0194
@ -35,7 +35,6 @@ namespace dxvk {
|
||||
|
||||
}
|
||||
|
||||
|
||||
D3D9ConstantBuffer::~D3D9ConstantBuffer() {
|
||||
|
||||
}
|
||||
@ -136,4 +135,116 @@ namespace dxvk {
|
||||
device->properties().extRobustness2.robustUniformBufferAccessSizeAlignment);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Constant Buffer living on the CS thread
|
||||
|
||||
D3D9CSConstantBuffer::D3D9CSConstantBuffer() {
|
||||
|
||||
}
|
||||
|
||||
D3D9CSConstantBuffer::D3D9CSConstantBuffer(
|
||||
const Rc<DxvkDevice>& Device,
|
||||
DxsoProgramType ShaderStage,
|
||||
DxsoConstantBuffers BufferType,
|
||||
VkDeviceSize Size,
|
||||
bool UseDeviceLocalBuffer)
|
||||
: D3D9CSConstantBuffer(Device, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, GetShaderStage(ShaderStage),
|
||||
computeResourceSlotId(ShaderStage, DxsoBindingType::ConstantBuffer, BufferType),
|
||||
Size, UseDeviceLocalBuffer) {
|
||||
|
||||
}
|
||||
|
||||
D3D9CSConstantBuffer::D3D9CSConstantBuffer(
|
||||
const Rc<DxvkDevice>& Device,
|
||||
VkBufferUsageFlags Usage,
|
||||
VkShaderStageFlags Stages,
|
||||
uint32_t ResourceSlot,
|
||||
VkDeviceSize Size,
|
||||
bool UseDeviceLocalBuffer)
|
||||
: m_device (Device)
|
||||
, m_binding (ResourceSlot)
|
||||
, m_usage (Usage)
|
||||
, m_stages (Stages)
|
||||
, m_size (Size)
|
||||
, m_align (getAlignment(Device))
|
||||
, m_useDeviceLocalBuffer(UseDeviceLocalBuffer) {
|
||||
|
||||
}
|
||||
|
||||
D3D9CSConstantBuffer::~D3D9CSConstantBuffer() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
void* D3D9CSConstantBuffer::Alloc(DxvkContext* ctx, VkDeviceSize size) {
|
||||
if (unlikely(m_buffer == nullptr))
|
||||
m_slice = this->createBuffer(ctx);
|
||||
|
||||
size = align(size, m_align);
|
||||
|
||||
if (unlikely(m_offset + size > m_size)) {
|
||||
Rc<DxvkResourceAllocation> newSlice = m_buffer->allocateStorage();
|
||||
m_offset = 0;
|
||||
m_slice = newSlice;
|
||||
ctx->invalidateBuffer(m_buffer, std::move(newSlice));
|
||||
}
|
||||
|
||||
ctx->bindUniformBufferRange(m_stages, m_binding, m_offset, size);
|
||||
|
||||
void* mapPtr = reinterpret_cast<char*>(m_slice->mapPtr()) + m_offset;
|
||||
m_offset += size;
|
||||
return mapPtr;
|
||||
}
|
||||
|
||||
|
||||
void* D3D9CSConstantBuffer::AllocSlice(DxvkContext* ctx) {
|
||||
if (unlikely(m_buffer == nullptr))
|
||||
m_slice = this->createBuffer(ctx);
|
||||
else
|
||||
m_slice = m_buffer->allocateStorage();
|
||||
|
||||
ctx->invalidateBuffer(m_buffer, std::move(m_slice));
|
||||
|
||||
return m_slice->mapPtr();
|
||||
}
|
||||
|
||||
|
||||
Rc<DxvkResourceAllocation> D3D9CSConstantBuffer::createBuffer(DxvkContext* ctx) {
|
||||
// Buffer usage and access flags don't make much of a difference
|
||||
// in the backend, so set both STORAGE and UNIFORM usage/access.
|
||||
DxvkBufferCreateInfo bufferInfo;
|
||||
bufferInfo.size = align(m_size, m_align);
|
||||
bufferInfo.usage = m_usage;
|
||||
bufferInfo.access = 0;
|
||||
bufferInfo.stages = util::pipelineStages(m_stages);
|
||||
bufferInfo.debugName = "Constant buffer";
|
||||
|
||||
if (m_usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
|
||||
bufferInfo.access |= VK_ACCESS_UNIFORM_READ_BIT;
|
||||
if (m_usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
|
||||
bufferInfo.access |= VK_ACCESS_SHADER_READ_BIT;
|
||||
|
||||
VkMemoryPropertyFlags memoryFlags
|
||||
= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
||||
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
if (m_useDeviceLocalBuffer)
|
||||
memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
|
||||
m_buffer = m_device->createBuffer(bufferInfo, memoryFlags);
|
||||
|
||||
ctx->bindUniformBuffer(m_stages, m_binding, DxvkBufferSlice(m_buffer));
|
||||
|
||||
return m_buffer->storage();
|
||||
}
|
||||
|
||||
|
||||
VkDeviceSize D3D9CSConstantBuffer::getAlignment(const Rc<DxvkDevice>& device) const {
|
||||
return std::max(std::max(
|
||||
device->properties().core.properties.limits.minUniformBufferOffsetAlignment,
|
||||
device->properties().core.properties.limits.minStorageBufferOffsetAlignment),
|
||||
device->properties().extRobustness2.robustUniformBufferAccessSizeAlignment);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "../dxvk/dxvk_buffer.h"
|
||||
#include "../dxvk/dxvk_context.h"
|
||||
|
||||
#include "../dxso/dxso_util.h"
|
||||
|
||||
@ -82,4 +83,80 @@ namespace dxvk {
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* \brief Constant buffer living on the CS thread
|
||||
*/
|
||||
class D3D9CSConstantBuffer {
|
||||
|
||||
public:
|
||||
|
||||
D3D9CSConstantBuffer();
|
||||
|
||||
D3D9CSConstantBuffer(
|
||||
const Rc<DxvkDevice>& Device,
|
||||
DxsoProgramType ShaderStage,
|
||||
DxsoConstantBuffers BufferType,
|
||||
VkDeviceSize Size,
|
||||
bool UseDeviceLocalBuffer);
|
||||
|
||||
D3D9CSConstantBuffer(
|
||||
const Rc<DxvkDevice>& Device,
|
||||
VkBufferUsageFlags Usage,
|
||||
VkShaderStageFlags Stages,
|
||||
uint32_t ResourceSlot,
|
||||
VkDeviceSize Size,
|
||||
bool UseDeviceLocalBuffer);
|
||||
|
||||
~D3D9CSConstantBuffer();
|
||||
|
||||
/**
|
||||
* \brief Queries alignment
|
||||
*
|
||||
* Useful to pad copies with initialized data.
|
||||
* \returns Data alignment
|
||||
*/
|
||||
VkDeviceSize GetAlignment() const {
|
||||
return m_align;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Allocates a given amount of memory
|
||||
*
|
||||
* \param [in] size Number of bytes to allocate
|
||||
* \returns Map pointer of the allocated region
|
||||
*/
|
||||
void* Alloc(DxvkContext* ctx, VkDeviceSize size);
|
||||
|
||||
/**
|
||||
* \brief Allocates a full buffer slice
|
||||
*
|
||||
* This must not be called if \ref Alloc is used.
|
||||
* \returns Map pointer of the allocated region
|
||||
*/
|
||||
void* AllocSlice(DxvkContext* ctx);
|
||||
|
||||
private:
|
||||
|
||||
Rc<DxvkDevice> m_device;
|
||||
|
||||
uint32_t m_binding = 0u;
|
||||
VkBufferUsageFlags m_usage = 0u;
|
||||
VkShaderStageFlags m_stages = 0u;
|
||||
VkDeviceSize m_size = 0ull;
|
||||
VkDeviceSize m_align = 0ull;
|
||||
VkDeviceSize m_offset = 0ull;
|
||||
|
||||
bool m_useDeviceLocalBuffer = false;
|
||||
|
||||
Rc<DxvkBuffer> m_buffer = nullptr;
|
||||
Rc<DxvkResourceAllocation> m_slice = nullptr;
|
||||
|
||||
Rc<DxvkResourceAllocation> createBuffer(DxvkContext* ctx);
|
||||
|
||||
VkDeviceSize getAlignment(const Rc<DxvkDevice>& device) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -40,15 +40,30 @@ namespace dxvk {
|
||||
};
|
||||
|
||||
struct D3D9SwvpConstantBuffers {
|
||||
D3D9ConstantBuffer intBuffer;
|
||||
D3D9ConstantBuffer boolBuffer;
|
||||
D3D9CSConstantBuffer intBuffer;
|
||||
D3D9CSConstantBuffer boolBuffer;
|
||||
};
|
||||
|
||||
struct D3D9ConstantSets {
|
||||
D3D9SwvpConstantBuffers swvp;
|
||||
D3D9ConstantBuffer buffer;
|
||||
template<typename ShaderConstantsStorage>
|
||||
struct D3D9CSShaderConstants {
|
||||
ShaderConstantsStorage constants;
|
||||
|
||||
// Primary buffer (contains HWVP or pixel shaders: Ints + Floats, SWVP: Floats)
|
||||
D3D9CSConstantBuffer buffer;
|
||||
// Secondary buffers for SWVP (one for Ints, one for Bools)
|
||||
D3D9SwvpConstantBuffers swvp;
|
||||
|
||||
// Shader related
|
||||
DxsoShaderMetaInfo meta = {};
|
||||
DxsoDefinedConstants shaderDefinedConsts;
|
||||
|
||||
// Tracking
|
||||
bool dirty = true;
|
||||
uint32_t floatConstsCount = 0;
|
||||
// The highest changed int and bool constants are only tracked for SWVP.
|
||||
// For HWVP or pixel shaders, the maximum amount is only 16 anyway.
|
||||
uint32_t intConstsCount = 0;
|
||||
uint32_t boolConstsCount = 0;
|
||||
};
|
||||
|
||||
}
|
@ -115,10 +115,10 @@ namespace dxvk {
|
||||
|
||||
if (!useRobustConstantAccess) {
|
||||
// Disable optimized constant copies, we always have to copy all constants.
|
||||
m_vsFloatConstsCount = m_vsLayout.floatCount;
|
||||
m_vsIntConstsCount = m_vsLayout.intCount;
|
||||
m_vsBoolConstsCount = m_vsLayout.boolCount;
|
||||
m_psFloatConstsCount = m_psLayout.floatCount;
|
||||
m_csVSConsts.floatConstsCount = m_vsLayout.floatCount;
|
||||
m_csVSConsts.intConstsCount = m_vsLayout.intCount;
|
||||
m_csVSConsts.boolConstsCount = m_vsLayout.boolCount;
|
||||
m_csPSConsts.floatConstsCount = m_psLayout.floatCount;
|
||||
|
||||
if (supportsRobustness2) {
|
||||
Logger::warn("Disabling robust constant buffer access because of alignment.");
|
||||
@ -3379,15 +3379,23 @@ namespace dxvk {
|
||||
bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
|
||||
bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
|
||||
|
||||
m_consts[DxsoProgramTypes::VertexShader].dirty |= oldCopies || newCopies || !oldShader;
|
||||
m_consts[DxsoProgramTypes::VertexShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo();
|
||||
|
||||
if (newShader && oldShader) {
|
||||
m_consts[DxsoProgramTypes::VertexShader].dirty
|
||||
|= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|
||||
bool dirty = oldCopies || newCopies || !oldShader;
|
||||
dirty |= newShader && oldShader && (
|
||||
newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|
||||
|| newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
|
||||
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
|
||||
}
|
||||
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB
|
||||
);
|
||||
|
||||
EmitCs([
|
||||
&cShaderConsts = m_csVSConsts,
|
||||
cDirty = dirty,
|
||||
cMeta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(),
|
||||
cShaderDefinedConsts = newShader ? newShader->GetConstants() : DxsoDefinedConstants()
|
||||
](DxvkContext* ctx) {
|
||||
cShaderConsts.meta = cMeta;
|
||||
cShaderConsts.dirty |= cDirty;
|
||||
cShaderConsts.shaderDefinedConsts = cShaderDefinedConsts;
|
||||
});
|
||||
|
||||
m_state.vertexShader = shader;
|
||||
|
||||
@ -3754,15 +3762,23 @@ namespace dxvk {
|
||||
bool oldCopies = oldShader && oldShader->GetMeta().needsConstantCopies;
|
||||
bool newCopies = newShader && newShader->GetMeta().needsConstantCopies;
|
||||
|
||||
m_consts[DxsoProgramTypes::PixelShader].dirty |= oldCopies || newCopies || !oldShader;
|
||||
m_consts[DxsoProgramTypes::PixelShader].meta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo();
|
||||
|
||||
if (newShader && oldShader) {
|
||||
m_consts[DxsoProgramTypes::PixelShader].dirty
|
||||
|= newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|
||||
bool dirty = oldCopies || newCopies || !oldShader;
|
||||
dirty |= newShader && oldShader && (
|
||||
newShader->GetMeta().maxConstIndexF > oldShader->GetMeta().maxConstIndexF
|
||||
|| newShader->GetMeta().maxConstIndexI > oldShader->GetMeta().maxConstIndexI
|
||||
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB;
|
||||
}
|
||||
|| newShader->GetMeta().maxConstIndexB > oldShader->GetMeta().maxConstIndexB
|
||||
);
|
||||
|
||||
EmitCs([
|
||||
&cShaderConsts = m_csPSConsts,
|
||||
cDirty = dirty,
|
||||
cMeta = newShader ? newShader->GetMeta() : DxsoShaderMetaInfo(),
|
||||
cShaderDefinedConsts = newShader ? newShader->GetConstants() : DxsoDefinedConstants()
|
||||
](DxvkContext* ctx) {
|
||||
cShaderConsts.meta = cMeta;
|
||||
cShaderConsts.dirty |= cDirty;
|
||||
cShaderConsts.shaderDefinedConsts = cShaderDefinedConsts;
|
||||
});
|
||||
|
||||
m_state.pixelShader = shader;
|
||||
|
||||
@ -5783,25 +5799,37 @@ namespace dxvk {
|
||||
constexpr VkDeviceSize DefaultConstantBufferSize = 1024ull << 10;
|
||||
constexpr VkDeviceSize SmallConstantBufferSize = 64ull << 10;
|
||||
|
||||
m_consts[DxsoProgramTypes::VertexShader].buffer = D3D9ConstantBuffer(this,
|
||||
DxsoProgramType::VertexShader,
|
||||
DxsoConstantBuffers::VSConstantBuffer,
|
||||
DefaultConstantBufferSize);
|
||||
EmitCs([
|
||||
cDevice = m_dxvkDevice,
|
||||
&cCSVSConsts = m_csVSConsts,
|
||||
&cCSPSConsts = m_csPSConsts,
|
||||
cUseDeviceLocalBuffers = m_d3d9Options.deviceLocalConstantBuffers
|
||||
] (DxvkContext* ctx) {
|
||||
cCSVSConsts.buffer = D3D9CSConstantBuffer(cDevice,
|
||||
DxsoProgramType::VertexShader,
|
||||
DxsoConstantBuffers::VSConstantBuffer,
|
||||
DefaultConstantBufferSize,
|
||||
cUseDeviceLocalBuffers);
|
||||
|
||||
m_consts[DxsoProgramTypes::VertexShader].swvp.intBuffer = D3D9ConstantBuffer(this,
|
||||
DxsoProgramType::VertexShader,
|
||||
DxsoConstantBuffers::VSIntConstantBuffer,
|
||||
SmallConstantBufferSize);
|
||||
cCSVSConsts.swvp.intBuffer = D3D9CSConstantBuffer(cDevice,
|
||||
DxsoProgramType::VertexShader,
|
||||
DxsoConstantBuffers::VSIntConstantBuffer,
|
||||
SmallConstantBufferSize,
|
||||
cUseDeviceLocalBuffers);
|
||||
|
||||
m_consts[DxsoProgramTypes::VertexShader].swvp.boolBuffer = D3D9ConstantBuffer(this,
|
||||
DxsoProgramType::VertexShader,
|
||||
DxsoConstantBuffers::VSBoolConstantBuffer,
|
||||
SmallConstantBufferSize);
|
||||
cCSVSConsts.swvp.boolBuffer = D3D9CSConstantBuffer(cDevice,
|
||||
DxsoProgramType::VertexShader,
|
||||
DxsoConstantBuffers::VSBoolConstantBuffer,
|
||||
SmallConstantBufferSize,
|
||||
cUseDeviceLocalBuffers);
|
||||
|
||||
cCSPSConsts.buffer = D3D9CSConstantBuffer(cDevice,
|
||||
DxsoProgramType::PixelShader,
|
||||
DxsoConstantBuffers::PSConstantBuffer,
|
||||
DefaultConstantBufferSize,
|
||||
cUseDeviceLocalBuffers);
|
||||
});
|
||||
|
||||
m_consts[DxsoProgramTypes::PixelShader].buffer = D3D9ConstantBuffer(this,
|
||||
DxsoProgramType::PixelShader,
|
||||
DxsoConstantBuffers::PSConstantBuffer,
|
||||
DefaultConstantBufferSize);
|
||||
|
||||
m_vsClipPlanes = D3D9ConstantBuffer(this,
|
||||
DxsoProgramType::VertexShader,
|
||||
@ -5841,7 +5869,7 @@ namespace dxvk {
|
||||
}
|
||||
|
||||
|
||||
inline void D3D9DeviceEx::UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout) {
|
||||
inline void D3D9DeviceEx::UploadSoftwareConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<D3D9ShaderConstantsVSSoftware>& ShaderConsts) {
|
||||
/*
|
||||
* SWVP raises the amount of constants by a lot.
|
||||
* To avoid copying huge amounts of data for every draw call,
|
||||
@ -5849,42 +5877,37 @@ namespace dxvk {
|
||||
* to fit that. We rely on robustness to return 0 for OOB reads.
|
||||
*/
|
||||
|
||||
D3D9ConstantSets& constSet = m_consts[DxsoProgramType::VertexShader];
|
||||
|
||||
if (!constSet.dirty)
|
||||
if (!ShaderConsts.dirty)
|
||||
return;
|
||||
|
||||
constSet.dirty = false;
|
||||
ShaderConsts.dirty = false;
|
||||
|
||||
uint32_t floatCount = m_vsFloatConstsCount;
|
||||
if (constSet.meta.needsConstantCopies) {
|
||||
uint32_t floatCount = ShaderConsts.floatConstsCount;
|
||||
if (ShaderConsts.meta.needsConstantCopies) {
|
||||
// If the shader requires us to preserve shader defined constants,
|
||||
// we copy those over. We need to adjust the amount of used floats accordingly.
|
||||
auto shader = GetCommonShader(m_state.vertexShader);
|
||||
floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1);
|
||||
floatCount = std::max(floatCount, ShaderConsts.meta.maxShaderDefinedFloatConstant + 1);
|
||||
}
|
||||
// If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest.
|
||||
floatCount = std::min(floatCount, constSet.meta.maxConstIndexF);
|
||||
floatCount = std::min(floatCount, ShaderConsts.meta.maxConstIndexF);
|
||||
|
||||
// Calculate data sizes for each constant type.
|
||||
const uint32_t floatDataSize = floatCount * sizeof(Vector4);
|
||||
const uint32_t intDataSize = std::min(constSet.meta.maxConstIndexI, m_vsIntConstsCount) * sizeof(Vector4i);
|
||||
const uint32_t boolDataSize = divCeil(std::min(constSet.meta.maxConstIndexB, m_vsBoolConstsCount), 32u) * uint32_t(sizeof(uint32_t));
|
||||
const uint32_t intDataSize = std::min(ShaderConsts.meta.maxConstIndexI, ShaderConsts.intConstsCount) * sizeof(Vector4i);
|
||||
const uint32_t boolDataSize = divCeil(std::min(ShaderConsts.meta.maxConstIndexB, ShaderConsts.boolConstsCount), 32u) * uint32_t(sizeof(uint32_t));
|
||||
|
||||
// Max copy source size is 8192 * 16 => always aligned to any plausible value
|
||||
// => we won't copy out of bounds
|
||||
if (likely(constSet.meta.maxConstIndexF != 0)) {
|
||||
auto mapPtr = CopySoftwareConstants(constSet.buffer, Src.fConsts, floatDataSize);
|
||||
if (likely(ShaderConsts.meta.maxConstIndexF != 0)) {
|
||||
auto mapPtr = CopySoftwareConstants(ctx, ShaderConsts.buffer, ShaderConsts.constants.fConsts, floatDataSize);
|
||||
|
||||
if (constSet.meta.needsConstantCopies) {
|
||||
if (ShaderConsts.meta.needsConstantCopies) {
|
||||
// Copy shader defined constants over so they can be accessed
|
||||
// with relative addressing.
|
||||
Vector4* data = reinterpret_cast<Vector4*>(mapPtr);
|
||||
|
||||
auto& shaderConsts = GetCommonShader(m_state.vertexShader)->GetConstants();
|
||||
|
||||
for (const auto& constant : shaderConsts) {
|
||||
if (constant.uboIdx < constSet.meta.maxConstIndexF)
|
||||
for (const auto& constant : ShaderConsts.shaderDefinedConsts) {
|
||||
if (constant.uboIdx < ShaderConsts.meta.maxConstIndexF)
|
||||
data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
|
||||
}
|
||||
}
|
||||
@ -5892,90 +5915,84 @@ namespace dxvk {
|
||||
|
||||
// Max copy source size is 2048 * 16 => always aligned to any plausible value
|
||||
// => we won't copy out of bounds
|
||||
if (likely(constSet.meta.maxConstIndexI != 0))
|
||||
CopySoftwareConstants(constSet.swvp.intBuffer, Src.iConsts, intDataSize);
|
||||
if (likely(ShaderConsts.meta.maxConstIndexI != 0))
|
||||
CopySoftwareConstants(ctx, ShaderConsts.swvp.intBuffer, ShaderConsts.constants.iConsts, intDataSize);
|
||||
|
||||
if (likely(constSet.meta.maxConstIndexB != 0))
|
||||
CopySoftwareConstants(constSet.swvp.boolBuffer, Src.bConsts, boolDataSize);
|
||||
if (likely(ShaderConsts.meta.maxConstIndexB != 0))
|
||||
CopySoftwareConstants(ctx, ShaderConsts.swvp.boolBuffer, ShaderConsts.constants.bConsts, boolDataSize);
|
||||
}
|
||||
|
||||
|
||||
inline void* D3D9DeviceEx::CopySoftwareConstants(D3D9ConstantBuffer& dstBuffer, const void* src, uint32_t size) {
|
||||
inline void* D3D9DeviceEx::CopySoftwareConstants(DxvkContext* ctx, D3D9CSConstantBuffer& dstBuffer, const void* src, uint32_t size) {
|
||||
uint32_t alignment = dstBuffer.GetAlignment();
|
||||
size = std::max(size, alignment);
|
||||
size = align(size, alignment);
|
||||
|
||||
auto mapPtr = dstBuffer.Alloc(size);
|
||||
auto mapPtr = dstBuffer.Alloc(ctx, size);
|
||||
std::memcpy(mapPtr, src, size);
|
||||
return mapPtr;
|
||||
}
|
||||
|
||||
|
||||
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
|
||||
inline void D3D9DeviceEx::UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader) {
|
||||
template <typename ShaderConstantsStorage, typename GPUShaderConstantsStorage>
|
||||
inline void D3D9DeviceEx::UploadConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts) {
|
||||
/*
|
||||
* We just copy the float constants that have been set by the application and rely on robustness
|
||||
* to return 0 on OOB reads.
|
||||
*/
|
||||
D3D9ConstantSets& constSet = m_consts[ShaderStage];
|
||||
|
||||
if (!constSet.dirty)
|
||||
if (!ShaderConsts.dirty)
|
||||
return;
|
||||
|
||||
constSet.dirty = false;
|
||||
ShaderConsts.dirty = false;
|
||||
|
||||
uint32_t floatCount = ShaderStage == DxsoProgramType::VertexShader ? m_vsFloatConstsCount : m_psFloatConstsCount;
|
||||
if (constSet.meta.needsConstantCopies) {
|
||||
uint32_t floatCount = ShaderConsts.floatConstsCount;
|
||||
if (ShaderConsts.meta.needsConstantCopies) {
|
||||
// If the shader requires us to preserve shader defined constants,
|
||||
// we copy those over. We need to adjust the amount of used floats accordingly.
|
||||
auto shader = GetCommonShader(Shader);
|
||||
floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1);
|
||||
floatCount = std::max(floatCount, ShaderConsts.meta.maxShaderDefinedFloatConstant + 1);
|
||||
}
|
||||
// If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest.
|
||||
floatCount = std::min(constSet.meta.maxConstIndexF, floatCount);
|
||||
floatCount = std::min(floatCount, ShaderConsts.meta.maxConstIndexF);
|
||||
|
||||
// There are very few int constants, so we put those into the same buffer at the start.
|
||||
// We always allocate memory for all possible int constants to make sure alignment works out.
|
||||
const uint32_t intRange = caps::MaxOtherConstants * sizeof(Vector4i);
|
||||
uint32_t floatDataSize = floatCount * sizeof(Vector4);
|
||||
// Determine amount of floats and buffer size based on highest used float constant and alignment
|
||||
const uint32_t alignment = constSet.buffer.GetAlignment();
|
||||
const uint32_t alignment = ShaderConsts.buffer.GetAlignment();
|
||||
const uint32_t bufferSize = align(std::max(floatDataSize + intRange, alignment), alignment);
|
||||
floatDataSize = bufferSize - intRange;
|
||||
|
||||
void* mapPtr = constSet.buffer.Alloc(bufferSize);
|
||||
auto* dst = reinterpret_cast<HardwareLayoutType*>(mapPtr);
|
||||
void* mapPtr = ShaderConsts.buffer.Alloc(ctx, bufferSize);
|
||||
auto* dst = reinterpret_cast<GPUShaderConstantsStorage*>(mapPtr);
|
||||
|
||||
const uint32_t intDataSize = constSet.meta.maxConstIndexI * sizeof(Vector4i);
|
||||
if (constSet.meta.maxConstIndexI != 0)
|
||||
std::memcpy(dst->iConsts, Src.iConsts, intDataSize);
|
||||
if (constSet.meta.maxConstIndexF != 0)
|
||||
std::memcpy(dst->fConsts, Src.fConsts, floatDataSize);
|
||||
const uint32_t intDataSize = ShaderConsts.meta.maxConstIndexI * sizeof(Vector4i);
|
||||
if (ShaderConsts.meta.maxConstIndexI != 0)
|
||||
std::memcpy(dst->iConsts, ShaderConsts.constants.iConsts, intDataSize);
|
||||
if (ShaderConsts.meta.maxConstIndexF != 0)
|
||||
std::memcpy(dst->fConsts, ShaderConsts.constants.fConsts, floatDataSize);
|
||||
|
||||
if (constSet.meta.needsConstantCopies) {
|
||||
if (ShaderConsts.meta.needsConstantCopies) {
|
||||
// Copy shader defined constants over so they can be accessed
|
||||
// with relative addressing.
|
||||
Vector4* data = reinterpret_cast<Vector4*>(dst->fConsts);
|
||||
|
||||
auto& shaderConsts = GetCommonShader(Shader)->GetConstants();
|
||||
|
||||
for (const auto& constant : shaderConsts) {
|
||||
if (constant.uboIdx < constSet.meta.maxConstIndexF)
|
||||
for (const auto& constant : ShaderConsts.shaderDefinedConsts) {
|
||||
if (constant.uboIdx < ShaderConsts.meta.maxConstIndexF)
|
||||
data[constant.uboIdx] = *reinterpret_cast<const Vector4*>(constant.float32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <DxsoProgramType ShaderStage>
|
||||
void D3D9DeviceEx::UploadConstants() {
|
||||
if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) {
|
||||
if (CanSWVP())
|
||||
return UploadSoftwareConstantSet(m_state.vsConsts.get(), m_vsLayout);
|
||||
template <typename ShaderConstantsStorage>
|
||||
void D3D9DeviceEx::UploadConstants(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts, bool canSWVP) {
|
||||
if constexpr (std::is_same<ShaderConstantsStorage, D3D9ShaderConstantsVSSoftware>::value) {
|
||||
if (canSWVP)
|
||||
return UploadSoftwareConstantSet(ctx, ShaderConsts);
|
||||
else
|
||||
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsVSHardware>(m_state.vsConsts.get(), m_vsLayout, m_state.vertexShader);
|
||||
return UploadConstantSet<D3D9ShaderConstantsVSSoftware, D3D9ShaderConstantsVSHardware>(ctx, ShaderConsts);
|
||||
} else {
|
||||
return UploadConstantSet<ShaderStage, D3D9ShaderConstantsPS> (m_state.psConsts.get(), m_psLayout, m_state.pixelShader);
|
||||
return UploadConstantSet<D3D9ShaderConstantsPS> (ctx, ShaderConsts);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7313,12 +7330,18 @@ namespace dxvk {
|
||||
BindShader<DxsoProgramType::VertexShader>(
|
||||
GetCommonShader(m_state.vertexShader));
|
||||
}
|
||||
UploadConstants<DxsoProgramTypes::VertexShader>();
|
||||
EmitCs([
|
||||
&cShaderConsts = m_csVSConsts,
|
||||
cCanSWVP = CanSWVP()
|
||||
](DxvkContext* ctx) {
|
||||
UploadConstants(ctx, cShaderConsts, cCanSWVP);
|
||||
});
|
||||
|
||||
if (likely(!CanSWVP())) {
|
||||
const D3D9CommonShader* shader = GetCommonShader(m_state.vertexShader);
|
||||
UpdateVertexBoolSpec(
|
||||
m_state.vsConsts->bConsts[0] &
|
||||
m_consts[DxsoProgramType::VertexShader].meta.boolConstantMask);
|
||||
shader->GetMeta().boolConstantMask);
|
||||
} else
|
||||
UpdateVertexBoolSpec(0);
|
||||
}
|
||||
@ -7331,13 +7354,18 @@ namespace dxvk {
|
||||
BindInputLayout();
|
||||
|
||||
if (likely(UseProgrammablePS())) {
|
||||
UploadConstants<DxsoProgramTypes::PixelShader>();
|
||||
EmitCs([
|
||||
&cShaderConsts = m_csPSConsts
|
||||
](DxvkContext* ctx) {
|
||||
UploadConstants(ctx, cShaderConsts, false);
|
||||
});
|
||||
|
||||
const uint32_t psTextureMask = usedTextureMask & ((1u << caps::MaxTexturesPS) - 1u);
|
||||
const uint32_t fetch4 = m_fetch4 & psTextureMask;
|
||||
const uint32_t projected = m_projectionBitfield & psTextureMask;
|
||||
|
||||
const auto& programInfo = GetCommonShader(m_state.pixelShader)->GetInfo();
|
||||
const D3D9CommonShader* shader = GetCommonShader(m_state.pixelShader);
|
||||
const auto& programInfo = shader->GetInfo();
|
||||
|
||||
if (programInfo.majorVersion() >= 2)
|
||||
UpdatePixelShaderSamplerSpec(m_d3d9Options.forceSamplerTypeSpecConstants ? m_textureTypes : 0u, 0u, fetch4);
|
||||
@ -7346,7 +7374,7 @@ namespace dxvk {
|
||||
|
||||
UpdatePixelBoolSpec(
|
||||
m_state.psConsts->bConsts[0] &
|
||||
m_consts[DxsoProgramType::PixelShader].meta.boolConstantMask);
|
||||
shader->GetMeta().boolConstantMask);
|
||||
}
|
||||
else {
|
||||
UpdatePixelBoolSpec(0);
|
||||
@ -7665,7 +7693,9 @@ namespace dxvk {
|
||||
m_state.vsConsts->bConsts[idx] &= ~mask;
|
||||
m_state.vsConsts->bConsts[idx] |= bits & mask;
|
||||
|
||||
m_consts[DxsoProgramTypes::VertexShader].dirty = true;
|
||||
EmitCs([&cConsts = m_csVSConsts](DxvkContext* ctx) {
|
||||
cConsts.dirty = true;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -7673,7 +7703,9 @@ namespace dxvk {
|
||||
m_state.psConsts->bConsts[idx] &= ~mask;
|
||||
m_state.psConsts->bConsts[idx] |= bits & mask;
|
||||
|
||||
m_consts[DxsoProgramTypes::PixelShader].dirty = true;
|
||||
EmitCs([&cConsts = m_csPSConsts](DxvkContext* ctx) {
|
||||
cConsts.dirty = true;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -7731,39 +7763,108 @@ namespace dxvk {
|
||||
pConstantData,
|
||||
Count);
|
||||
|
||||
constexpr uint32_t vectorElementsCount = ConstantType != D3D9ConstantType::Bool ? 4 : 1;
|
||||
const size_t dataSize = Count * vectorElementsCount * sizeof(T);
|
||||
|
||||
if (ProgramType == DxsoProgramType::VertexShader && (likely(ConstantType != D3D9ConstantType::Bool) || unlikely(CanSWVP()))) {
|
||||
|
||||
DxvkCsDataBlock* csData = EmitCsWithData<T>(Count * vectorElementsCount, [
|
||||
&cShaderConsts = m_csVSConsts,
|
||||
cStartRegister = StartRegister,
|
||||
cFloatEmulation = m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled
|
||||
] (DxvkContext* ctx, const T* data, size_t count) {
|
||||
uint32_t vectorsCount;
|
||||
if constexpr (ConstantType == D3D9ConstantType::Float) {
|
||||
vectorsCount = count / 4;
|
||||
cShaderConsts.floatConstsCount = std::max(cShaderConsts.floatConstsCount, cStartRegister + uint32_t(vectorsCount));
|
||||
} else if constexpr (ConstantType == D3D9ConstantType::Int) {
|
||||
vectorsCount = count / 4;
|
||||
cShaderConsts.intConstsCount = std::max(cShaderConsts.intConstsCount, cStartRegister + uint32_t(vectorsCount));
|
||||
} else /* if constexpr (ConstantType == D3D9ConstantType::Bool) */ {
|
||||
vectorsCount = count;
|
||||
cShaderConsts.boolConstsCount = std::max(cShaderConsts.boolConstsCount, cStartRegister + uint32_t(vectorsCount));
|
||||
}
|
||||
|
||||
if constexpr (ConstantType != D3D9ConstantType::Bool) {
|
||||
uint32_t maxCount = ConstantType == D3D9ConstantType::Float
|
||||
? cShaderConsts.meta.maxConstIndexF
|
||||
: cShaderConsts.meta.maxConstIndexI;
|
||||
|
||||
cShaderConsts.dirty |= cStartRegister < maxCount;
|
||||
} else /* if (CanSWVP()) */ {
|
||||
cShaderConsts.dirty |= cStartRegister < cShaderConsts.meta.maxConstIndexB;
|
||||
}
|
||||
|
||||
UpdateStateConstants<
|
||||
D3D9ShaderConstantsVSSoftware*,
|
||||
ConstantType,
|
||||
T>(
|
||||
&cShaderConsts.constants,
|
||||
cStartRegister,
|
||||
data,
|
||||
vectorsCount,
|
||||
cFloatEmulation);
|
||||
});
|
||||
|
||||
auto dst = reinterpret_cast<T*>(csData->first());
|
||||
std::memcpy(dst, pConstantData, dataSize);
|
||||
|
||||
} else if constexpr (ProgramType == DxsoProgramType::PixelShader && ConstantType != D3D9ConstantType::Bool) {
|
||||
|
||||
DxvkCsDataBlock* csData = EmitCsWithData<T>(Count * vectorElementsCount, [
|
||||
&cShaderConsts = m_csPSConsts,
|
||||
cStartRegister = StartRegister,
|
||||
cFloatEmulation = m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled
|
||||
] (DxvkContext* ctx, const T* data, size_t count) {
|
||||
const uint32_t vectorsCount = count / 4;
|
||||
if constexpr (ConstantType == D3D9ConstantType::Float) {
|
||||
cShaderConsts.floatConstsCount = std::max(cShaderConsts.floatConstsCount, cStartRegister + uint32_t(vectorsCount));
|
||||
}
|
||||
|
||||
uint32_t maxCount = ConstantType == D3D9ConstantType::Float
|
||||
? cShaderConsts.meta.maxConstIndexF
|
||||
: cShaderConsts.meta.maxConstIndexI;
|
||||
|
||||
cShaderConsts.dirty |= cStartRegister < maxCount;
|
||||
|
||||
UpdateStateConstants<
|
||||
D3D9ShaderConstantsPS*,
|
||||
ConstantType,
|
||||
T>(
|
||||
&cShaderConsts.constants,
|
||||
cStartRegister,
|
||||
data,
|
||||
vectorsCount,
|
||||
cFloatEmulation);
|
||||
});
|
||||
|
||||
auto dst = reinterpret_cast<T*>(csData->first());
|
||||
std::memcpy(dst, pConstantData, dataSize);
|
||||
|
||||
}
|
||||
|
||||
if constexpr (ProgramType == DxsoProgramType::VertexShader) {
|
||||
if constexpr (ConstantType == D3D9ConstantType::Float) {
|
||||
m_vsFloatConstsCount = std::max(m_vsFloatConstsCount, StartRegister + Count);
|
||||
} else if constexpr (ConstantType == D3D9ConstantType::Int) {
|
||||
m_vsIntConstsCount = std::max(m_vsIntConstsCount, StartRegister + Count);
|
||||
} else /* if constexpr (ConstantType == D3D9ConstantType::Bool) */ {
|
||||
m_vsBoolConstsCount = std::max(m_vsBoolConstsCount, StartRegister + Count);
|
||||
}
|
||||
UpdateStateConstants<
|
||||
static_item<D3D9ShaderConstantsVSSoftware>&,
|
||||
ConstantType,
|
||||
T>(
|
||||
m_state.vsConsts,
|
||||
StartRegister,
|
||||
pConstantData,
|
||||
Count,
|
||||
false);
|
||||
} else {
|
||||
if constexpr (ConstantType == D3D9ConstantType::Float) {
|
||||
m_psFloatConstsCount = std::max(m_psFloatConstsCount, StartRegister + Count);
|
||||
}
|
||||
UpdateStateConstants<
|
||||
static_item<D3D9ShaderConstantsPS>&,
|
||||
ConstantType,
|
||||
T>(
|
||||
m_state.psConsts,
|
||||
StartRegister,
|
||||
pConstantData,
|
||||
Count,
|
||||
false);
|
||||
}
|
||||
|
||||
if constexpr (ConstantType != D3D9ConstantType::Bool) {
|
||||
uint32_t maxCount = ConstantType == D3D9ConstantType::Float
|
||||
? m_consts[ProgramType].meta.maxConstIndexF
|
||||
: m_consts[ProgramType].meta.maxConstIndexI;
|
||||
|
||||
m_consts[ProgramType].dirty |= StartRegister < maxCount;
|
||||
} else if constexpr (ProgramType == DxsoProgramType::VertexShader) {
|
||||
if (unlikely(CanSWVP())) {
|
||||
m_consts[DxsoProgramType::VertexShader].dirty |= StartRegister < m_consts[ProgramType].meta.maxConstIndexB;
|
||||
}
|
||||
}
|
||||
|
||||
UpdateStateConstants<ProgramType, ConstantType, T>(
|
||||
&m_state,
|
||||
StartRegister,
|
||||
pConstantData,
|
||||
Count,
|
||||
m_d3d9Options.d3d9FloatEmulation == D3D9FloatEmulation::Enabled);
|
||||
|
||||
return D3D_OK;
|
||||
}
|
||||
|
||||
|
@ -941,16 +941,16 @@ namespace dxvk {
|
||||
|
||||
void BindDepthBias();
|
||||
|
||||
inline void UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout);
|
||||
inline static void UploadSoftwareConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<D3D9ShaderConstantsVSSoftware>& ShaderConsts);
|
||||
|
||||
inline void* CopySoftwareConstants(D3D9ConstantBuffer& dstBuffer, const void* src, uint32_t size);
|
||||
inline static void* CopySoftwareConstants(DxvkContext* ctx, D3D9CSConstantBuffer& dstBuffer, const void* src, uint32_t size);
|
||||
|
||||
template <typename ShaderConstantsStorage, typename GPUShaderConstantsStorage = ShaderConstantsStorage>
|
||||
inline static void UploadConstantSet(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts);
|
||||
|
||||
template <typename ShaderConstantsStorage>
|
||||
static void UploadConstants(DxvkContext* ctx, D3D9CSShaderConstants<ShaderConstantsStorage>& ShaderConsts, bool canSWVP);
|
||||
|
||||
template <DxsoProgramType ShaderStage, typename HardwareLayoutType, typename SoftwareLayoutType, typename ShaderType>
|
||||
inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader);
|
||||
|
||||
template <DxsoProgramType ShaderStage>
|
||||
void UploadConstants();
|
||||
|
||||
void UpdateClipPlanes();
|
||||
|
||||
/**
|
||||
@ -1192,6 +1192,24 @@ namespace dxvk {
|
||||
}
|
||||
}
|
||||
|
||||
template<typename M, bool AllowFlush = true, typename Cmd>
|
||||
DxvkCsDataBlock* EmitCsWithData(size_t count, Cmd&& command) {
|
||||
DxvkCsDataBlock* data = m_csChunk->pushCmd<M, Cmd>(command, count);
|
||||
|
||||
if (unlikely(!data)) {
|
||||
EmitCsChunk(std::move(m_csChunk));
|
||||
m_csChunk = AllocCsChunk();
|
||||
|
||||
if constexpr (AllowFlush)
|
||||
ConsiderFlush(GpuFlushType::ImplicitWeakHint);
|
||||
|
||||
// We must record this command after the potential
|
||||
// flush since the caller may still access the data
|
||||
data = m_csChunk->pushCmd<M, Cmd>(command, count);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
void EmitCsChunk(DxvkCsChunkRef&& chunk);
|
||||
|
||||
void FlushCsChunk() {
|
||||
@ -1582,16 +1600,8 @@ namespace dxvk {
|
||||
uint32_t m_robustSSBOAlignment = 1;
|
||||
uint32_t m_robustUBOAlignment = 1;
|
||||
|
||||
uint32_t m_vsFloatConstsCount = 0;
|
||||
uint32_t m_vsIntConstsCount = 0;
|
||||
uint32_t m_vsBoolConstsCount = 0;
|
||||
uint32_t m_psFloatConstsCount = 0;
|
||||
VkDeviceSize m_boundVSConstantsBufferSize = 0;
|
||||
VkDeviceSize m_boundPSConstantsBufferSize = 0;
|
||||
|
||||
D3D9ConstantLayout m_vsLayout;
|
||||
D3D9ConstantLayout m_psLayout;
|
||||
D3D9ConstantSets m_consts[DxsoProgramTypes::Count];
|
||||
|
||||
D3D9UserDefinedAnnotation* m_annotation = nullptr;
|
||||
|
||||
@ -1641,6 +1651,10 @@ namespace dxvk {
|
||||
// Written by CS thread
|
||||
alignas(CACHE_LINE_SIZE)
|
||||
std::atomic<uint64_t> m_lastSamplerStats = { 0u };
|
||||
|
||||
D3D9CSShaderConstants<D3D9ShaderConstantsVSSoftware> m_csVSConsts;
|
||||
D3D9CSShaderConstants<D3D9ShaderConstantsPS> m_csPSConsts;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -70,7 +70,6 @@ namespace dxvk {
|
||||
m_info = pModule->info();
|
||||
m_meta = pModule->meta();
|
||||
m_constants = pModule->constants();
|
||||
m_maxDefinedConst = pModule->maxDefinedConstant();
|
||||
|
||||
m_shader->setShaderKey(Key);
|
||||
|
||||
|
@ -52,8 +52,6 @@ namespace dxvk {
|
||||
|
||||
const DxsoProgramInfo& GetInfo() const { return m_info; }
|
||||
|
||||
uint32_t GetMaxDefinedConstant() const { return m_maxDefinedConst; }
|
||||
|
||||
VkImageViewType GetImageViewType(uint32_t samplerSlot) const {
|
||||
const uint32_t offset = samplerSlot * 2;
|
||||
const uint32_t mask = 0b11;
|
||||
@ -70,7 +68,6 @@ namespace dxvk {
|
||||
DxsoProgramInfo m_info;
|
||||
DxsoShaderMetaInfo m_meta;
|
||||
DxsoDefinedConstants m_constants;
|
||||
uint32_t m_maxDefinedConst;
|
||||
|
||||
Rc<DxvkShader> m_shader;
|
||||
|
||||
|
@ -318,54 +318,46 @@ namespace dxvk {
|
||||
using D3D9DeviceState = D3D9State<static_item>;
|
||||
|
||||
template <
|
||||
DxsoProgramType ProgramType,
|
||||
typename ShaderConstantsStorage,
|
||||
D3D9ConstantType ConstantType,
|
||||
typename T,
|
||||
typename StateType>
|
||||
typename T>
|
||||
HRESULT UpdateStateConstants(
|
||||
StateType* pState,
|
||||
UINT StartRegister,
|
||||
const T* pConstantData,
|
||||
UINT Count,
|
||||
bool FloatEmu) {
|
||||
auto UpdateHelper = [&] (auto& set) {
|
||||
if constexpr (ConstantType == D3D9ConstantType::Float) {
|
||||
ShaderConstantsStorage ConstantSet,
|
||||
UINT StartRegister,
|
||||
const T* pConstantData,
|
||||
UINT Count,
|
||||
bool FloatEmu) {
|
||||
if constexpr (ConstantType == D3D9ConstantType::Float) {
|
||||
if (!FloatEmu) {
|
||||
size_t size = Count * sizeof(Vector4);
|
||||
|
||||
if (!FloatEmu) {
|
||||
size_t size = Count * sizeof(Vector4);
|
||||
|
||||
std::memcpy(set->fConsts[StartRegister].data, pConstantData, size);
|
||||
}
|
||||
else {
|
||||
for (UINT i = 0; i < Count; i++)
|
||||
set->fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4));
|
||||
}
|
||||
}
|
||||
else if constexpr (ConstantType == D3D9ConstantType::Int) {
|
||||
size_t size = Count * sizeof(Vector4i);
|
||||
|
||||
std::memcpy(set->iConsts[StartRegister].data, pConstantData, size);
|
||||
std::memcpy(ConstantSet->fConsts[StartRegister].data, pConstantData, size);
|
||||
}
|
||||
else {
|
||||
for (uint32_t i = 0; i < Count; i++) {
|
||||
const uint32_t constantIdx = StartRegister + i;
|
||||
const uint32_t arrayIdx = constantIdx / 32;
|
||||
const uint32_t bitIdx = constantIdx % 32;
|
||||
|
||||
const uint32_t bit = 1u << bitIdx;
|
||||
|
||||
set->bConsts[arrayIdx] &= ~bit;
|
||||
if (pConstantData[i])
|
||||
set->bConsts[arrayIdx] |= bit;
|
||||
}
|
||||
for (UINT i = 0; i < Count; i++)
|
||||
ConstantSet->fConsts[StartRegister + i] = replaceNaN(pConstantData + (i * 4));
|
||||
}
|
||||
}
|
||||
else if constexpr (ConstantType == D3D9ConstantType::Int) {
|
||||
size_t size = Count * sizeof(Vector4i);
|
||||
|
||||
return D3D_OK;
|
||||
};
|
||||
std::memcpy(ConstantSet->iConsts[StartRegister].data, pConstantData, size);
|
||||
}
|
||||
else {
|
||||
for (uint32_t i = 0; i < Count; i++) {
|
||||
const uint32_t constantIdx = StartRegister + i;
|
||||
const uint32_t arrayIdx = constantIdx / 32;
|
||||
const uint32_t bitIdx = constantIdx % 32;
|
||||
|
||||
return ProgramType == DxsoProgramTypes::VertexShader
|
||||
? UpdateHelper(pState->vsConsts)
|
||||
: UpdateHelper(pState->psConsts);
|
||||
const uint32_t bit = 1u << bitIdx;
|
||||
|
||||
ConstantSet->bConsts[arrayIdx] &= ~bit;
|
||||
if (pConstantData[i])
|
||||
ConstantSet->bConsts[arrayIdx] |= bit;
|
||||
}
|
||||
}
|
||||
|
||||
return D3D_OK;
|
||||
}
|
||||
|
||||
struct Direct3DState9 : public D3D9DeviceState {
|
||||
|
@ -367,15 +367,27 @@ namespace dxvk {
|
||||
setCaptures.bConsts.set(reg, true);
|
||||
}
|
||||
|
||||
UpdateStateConstants<
|
||||
ProgramType,
|
||||
ConstantType,
|
||||
T>(
|
||||
&m_state,
|
||||
StartRegister,
|
||||
pConstantData,
|
||||
Count,
|
||||
false);
|
||||
if constexpr (ProgramType == DxsoProgramType::VertexShader) {
|
||||
UpdateStateConstants<
|
||||
dynamic_item<D3D9ShaderConstantsVSSoftware>&,
|
||||
ConstantType,
|
||||
T>(
|
||||
m_state.vsConsts,
|
||||
StartRegister,
|
||||
pConstantData,
|
||||
Count,
|
||||
false);
|
||||
} else {
|
||||
UpdateStateConstants<
|
||||
dynamic_item<D3D9ShaderConstantsPS>&,
|
||||
ConstantType,
|
||||
T>(
|
||||
m_state.psConsts,
|
||||
StartRegister,
|
||||
pConstantData,
|
||||
Count,
|
||||
false);
|
||||
}
|
||||
|
||||
return D3D_OK;
|
||||
};
|
||||
|
@ -1797,7 +1797,7 @@ namespace dxvk {
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
constant.float32[i] = data[i];
|
||||
m_constants.push_back(constant);
|
||||
m_maxDefinedConstant = std::max(constant.uboIdx, m_maxDefinedConstant);
|
||||
m_meta.maxShaderDefinedFloatConstant = std::max(constant.uboIdx, m_meta.maxShaderDefinedFloatConstant);
|
||||
}
|
||||
|
||||
void DxsoCompiler::emitDefI(const DxsoInstructionContext& ctx) {
|
||||
|
@ -247,7 +247,6 @@ namespace dxvk {
|
||||
const DxsoDefinedConstants& constants() { return m_constants; }
|
||||
uint32_t usedSamplers() const { return m_usedSamplers; }
|
||||
uint32_t usedRTs() const { return m_usedRTs; }
|
||||
uint32_t maxDefinedConstant() const { return m_maxDefinedConstant; }
|
||||
uint32_t textureTypes() const { return m_textureTypes; }
|
||||
|
||||
private:
|
||||
|
@ -31,6 +31,7 @@ namespace dxvk {
|
||||
|
||||
struct DxsoShaderMetaInfo {
|
||||
bool needsConstantCopies = false;
|
||||
uint32_t maxShaderDefinedFloatConstant = 0;
|
||||
uint32_t maxConstIndexF = 0;
|
||||
uint32_t maxConstIndexI = 0;
|
||||
uint32_t maxConstIndexB = 0;
|
||||
|
@ -36,7 +36,6 @@ namespace dxvk {
|
||||
|
||||
m_meta = compiler->meta();
|
||||
m_constants = compiler->constants();
|
||||
m_maxDefinedConst = compiler->maxDefinedConstant();
|
||||
m_usedSamplers = compiler->usedSamplers();
|
||||
m_textureTypes = compiler->textureTypes();
|
||||
|
||||
|
@ -59,8 +59,6 @@ namespace dxvk {
|
||||
|
||||
uint32_t usedRTs() { return m_usedRTs; }
|
||||
|
||||
uint32_t maxDefinedConstant() { return m_maxDefinedConst; }
|
||||
|
||||
uint32_t textureTypes() { return m_textureTypes; }
|
||||
|
||||
private:
|
||||
@ -82,7 +80,6 @@ namespace dxvk {
|
||||
uint32_t m_textureTypes;
|
||||
|
||||
DxsoShaderMetaInfo m_meta;
|
||||
uint32_t m_maxDefinedConst;
|
||||
DxsoDefinedConstants m_constants;
|
||||
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user