1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2024-12-01 16:24:12 +01:00

[d3d9] Add dirty texture tracking

Reduces overhead from re-binding and unnecessary binding (srgb changes) in L4D2.

I go from about 750 -> 850-900 fps in c1m2_streets with this change.
This commit is contained in:
Joshua Ashton 2021-07-14 20:52:38 +01:00 committed by Joshie
parent 8eeff90e0a
commit d3112c320b
2 changed files with 29 additions and 15 deletions

View File

@ -3621,8 +3621,8 @@ namespace dxvk {
|| Type == D3DSAMP_MAXMIPLEVEL
|| Type == D3DSAMP_BORDERCOLOR)
m_dirtySamplerStates |= 1u << StateSampler;
else if (Type == D3DSAMP_SRGBTEXTURE)
BindTexture(StateSampler);
else if (Type == D3DSAMP_SRGBTEXTURE && m_state.textures[StateSampler] != nullptr)
m_dirtyTextures |= 1u << StateSampler;
constexpr DWORD Fetch4Enabled = MAKEFOURCC('G', 'E', 'T', '4');
constexpr DWORD Fetch4Disabled = MAKEFOURCC('G', 'E', 'T', '1');
@ -3676,7 +3676,7 @@ namespace dxvk {
TextureChangePrivate(m_state.textures[StateSampler], pTexture);
BindTexture(StateSampler);
m_dirtyTextures |= 1u << StateSampler;
UpdateActiveTextures(StateSampler, combinedUsage);
@ -5779,17 +5779,7 @@ namespace dxvk {
}
}
if (commonTex == nullptr) {
EmitCs([
cColorSlot = colorSlot,
cDepthSlot = depthSlot
](DxvkContext* ctx) {
ctx->bindResourceView(cColorSlot, nullptr, nullptr);
ctx->bindResourceView(cDepthSlot, nullptr, nullptr);
});
return;
}
if (commonTex != nullptr) {
EmitCs([
cColorSlot = colorSlot,
cDepthSlot = depthSlot,
@ -5799,6 +5789,15 @@ namespace dxvk {
ctx->bindResourceView(cColorSlot, !cDepth ? cImageView : nullptr, nullptr);
ctx->bindResourceView(cDepthSlot, cDepth ? cImageView : nullptr, nullptr);
});
} else {
EmitCs([
cColorSlot = colorSlot,
cDepthSlot = depthSlot
](DxvkContext* ctx) {
ctx->bindResourceView(cColorSlot, nullptr, nullptr);
ctx->bindResourceView(cDepthSlot, nullptr, nullptr);
});
}
}
@ -5810,6 +5809,13 @@ namespace dxvk {
}
void D3D9DeviceEx::UndirtyTextures() {
for (uint32_t tex = m_dirtyTextures; tex; tex &= tex - 1)
BindTexture(bit::tzcnt(tex));
m_dirtyTextures = 0;
}
void D3D9DeviceEx::MarkSamplersDirty() {
m_dirtySamplerStates = 0x001fffff; // 21 bits.
}
@ -5881,6 +5887,9 @@ namespace dxvk {
if (m_dirtySamplerStates)
UndirtySamplers();
if (m_dirtyTextures)
UndirtyTextures();
if (m_flags.test(D3D9DeviceFlag::DirtyBlendState))
BindBlendState();
@ -6990,6 +6999,8 @@ namespace dxvk {
});
}
m_dirtyTextures = 0;
auto& ss = m_state.samplerStates;
for (uint32_t i = 0; i < ss.size(); i++) {
auto& state = ss[i];

View File

@ -838,6 +838,8 @@ namespace dxvk {
void UndirtySamplers();
void UndirtyTextures();
void MarkSamplersDirty();
D3D9DrawInfo GenerateDrawInfo(
@ -919,6 +921,7 @@ namespace dxvk {
D3D9DeviceFlags m_flags;
uint32_t m_dirtySamplerStates = 0;
uint32_t m_dirtyTextures = 0;
D3D9Adapter* m_adapter;
Rc<DxvkDevice> m_dxvkDevice;