1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-03-13 19:29:14 +01:00

[d3d9] Spec-constant out writes to clip distances when disabled

Add a new spec constant with a mask of the enabled clip planes such that they can be optimized out to improve performance.

For GPL shaders, override what we return here so it's always true and don't bother putting the mask in the UBO.

Signed-off-by: Autumn Ashton <misyl@froggi.es>
This commit is contained in:
Autumn Ashton 2024-12-04 16:29:13 +00:00 committed by Autumn
parent 027fe5963a
commit 8c4c814fb7
5 changed files with 45 additions and 12 deletions

View File

@ -5783,7 +5783,7 @@ namespace dxvk {
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
getSpecConstantBufferSlot(),
sizeof(D3D9SpecializationInfo));
D3D9SpecializationInfo::UBOSize);
}
}
@ -5933,11 +5933,18 @@ namespace dxvk {
auto mapPtr = m_vsClipPlanes.AllocSlice();
auto dst = reinterpret_cast<D3D9ClipPlane*>(mapPtr);
uint32_t clipPlaneMask = 0u;
for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) {
dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i))
? m_state.clipPlanes[i]
: D3D9ClipPlane();
if (dst[i] != D3D9ClipPlane())
clipPlaneMask |= 1u << i;
}
if (m_specInfo.set<SpecClipPlaneMask>(clipPlaneMask))
m_flags.set(D3D9DeviceFlag::DirtySpecializationEntries);
}
@ -8589,8 +8596,7 @@ namespace dxvk {
if (m_usingGraphicsPipelines) {
// TODO: Make uploading specialization information less naive.
auto mapPtr = m_specBuffer.AllocSlice();
auto dst = reinterpret_cast<D3D9SpecializationInfo*>(mapPtr);
*dst = m_specInfo;
memcpy(mapPtr, m_specInfo.data.data(), D3D9SpecializationInfo::UBOSize);
}
m_flags.clr(D3D9DeviceFlag::DirtySpecializationEntries);

View File

@ -2366,6 +2366,7 @@ namespace dxvk {
uint32_t floatType = m_module.defFloatType(32);
uint32_t vec4Type = m_module.defVectorType(floatType, 4);
uint32_t boolType = m_module.defBoolType();
// Declare uniform buffer containing clip planes
uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId);
@ -2419,12 +2420,16 @@ namespace dxvk {
clipPlaneBlock, blockMembers.size(), blockMembers.data()));
uint32_t distId = m_module.opDot(floatType, worldPos, planeId);
// Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value.
uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff));
uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0));
uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, distId, m_module.constf32(0.0f));
m_module.opStore(
m_module.opAccessChain(
m_module.defPointerType(floatType, spv::StorageClassOutput),
clipDistArray, 1, &blockMembers[1]),
distId);
m_module.opStore(m_module.opAccessChain(
m_module.defPointerType(floatType, spv::StorageClassOutput),
clipDistArray, 1, &blockMembers[1]), value);
}
}

View File

@ -30,6 +30,8 @@ namespace dxvk {
SpecDrefClamp, // 1 bit for 16 PS samplers | Bits: 16
SpecFetch4, // 1 bit for 16 PS samplers | Bits: 16
SpecClipPlaneMask, // 6 bits for 6 clip planes | Bits : 6
SpecConstantCount,
};
@ -44,7 +46,10 @@ namespace dxvk {
};
struct D3D9SpecializationInfo {
static constexpr uint32_t MaxSpecDwords = 5;
static constexpr uint32_t MaxSpecDwords = 6;
static constexpr uint32_t MaxUBODwords = 5;
static constexpr size_t UBOSize = MaxUBODwords * sizeof(uint32_t);
static constexpr std::array<BitfieldPosition, SpecConstantCount> Layout{{
{ 0, 0, 32 }, // SamplerType
@ -65,6 +70,8 @@ namespace dxvk {
{ 4, 0, 16 }, // DrefClamp
{ 4, 16, 16 }, // Fetch4
{ 5, 0, 6 }, // ClipPlaneEnabled
}};
template <D3D9SpecConstantId Id, typename T>
@ -97,13 +104,13 @@ namespace dxvk {
return get(module, specUbo, id, 0, 32);
}
uint32_t get(SpirvModule &module, uint32_t specUbo, D3D9SpecConstantId id, uint32_t bitOffset, uint32_t bitCount) {
uint32_t get(SpirvModule &module, uint32_t specUbo, D3D9SpecConstantId id, uint32_t bitOffset, uint32_t bitCount, uint32_t uboOverride = 0) {
const auto &layout = D3D9SpecializationInfo::Layout[id];
uint32_t uintType = module.defIntType(32, 0);
uint32_t optimized = getOptimizedBool(module);
uint32_t quickValue = getSpecUBODword(module, specUbo, layout.dwordOffset);
uint32_t quickValue = uboOverride ? uboOverride : getSpecUBODword(module, specUbo, layout.dwordOffset);
uint32_t optimizedValue = getSpecConstDword(module, layout.dwordOffset);
uint32_t val = module.opSelect(uintType, optimized, optimizedValue, quickValue);

View File

@ -28,6 +28,14 @@ namespace dxvk {
struct D3D9ClipPlane {
float coeff[4] = {};
bool operator == (const D3D9ClipPlane& other) {
return std::memcmp(this, &other, sizeof(D3D9ClipPlane)) == 0;
}
bool operator != (const D3D9ClipPlane& other) {
return !this->operator == (other);
}
};
struct D3D9RenderStateInfo {

View File

@ -3482,6 +3482,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
uint32_t floatType = m_module.defFloatType(32);
uint32_t vec4Type = m_module.defVectorType(floatType, 4);
uint32_t boolType = m_module.defBoolType();
// Declare uniform buffer containing clip planes
uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId);
@ -3551,9 +3552,15 @@ void DxsoCompiler::emitControlFlowGenericLoop(
DxsoRegisterValue dist = emitDot(position, plane);
// Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value.
uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff));
uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0));
uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, dist.id, m_module.constf32(0.0f));
m_module.opStore(m_module.opAccessChain(
m_module.defPointerType(floatType, spv::StorageClassOutput),
clipDistArray, 1, &blockMembers[1]), dist.id);
clipDistArray, 1, &blockMembers[1]), value);
}
}