1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-21 22:54:16 +01:00

[d3d9] Optimize late buffer uploads

... similar to what we're doing for textures.
This commit is contained in:
Robin Kertels 2024-09-19 01:15:10 +02:00 committed by Philip Rebohle
parent 5a08b3c451
commit ef8bad33a5
5 changed files with 89 additions and 46 deletions

View File

@ -46,7 +46,7 @@ namespace dxvk {
}
inline bool IsDegenerate() { return min == max; }
inline bool IsDegenerate() const { return min == max; }
inline void Conjoin(D3D9Range range) {
if (IsDegenerate())
@ -180,7 +180,7 @@ namespace dxvk {
/**
* \brief Whether or not the staging buffer needs to be copied to the actual buffer
*/
inline bool NeedsUpload() { return m_desc.Pool != D3DPOOL_DEFAULT && !m_dirtyRange.IsDegenerate(); }
inline bool NeedsUpload() const { return m_desc.Pool != D3DPOOL_DEFAULT && !m_dirtyRange.IsDegenerate(); }
void PreLoad();

View File

@ -1669,7 +1669,7 @@ namespace dxvk {
});
}
for (uint32_t i = 0; i < caps::MaxStreams; i++) {
for (uint32_t i : bit::BitMask(~m_activeVertexBuffers & ((1 << 16) - 1))) {
if (m_state.vertexBuffers[i].vertexBuffer == nullptr) {
EmitCs([cIndex = i](DxvkContext* ctx) {
ctx->bindVertexBuffer(cIndex, DxvkBufferSlice(), 0);
@ -3342,12 +3342,25 @@ namespace dxvk {
if (needsUpdate)
vbo.vertexBuffer = buffer;
const uint32_t bit = 1u << StreamNumber;
m_activeVertexBuffers &= ~bit;
m_activeVertexBuffersToUploadPerDraw &= ~bit;
m_activeVertexBuffersToUpload &= ~bit;
if (buffer != nullptr) {
needsUpdate |= vbo.offset != OffsetInBytes
|| vbo.stride != Stride;
vbo.offset = OffsetInBytes;
vbo.stride = Stride;
const D3D9CommonBuffer* commonBuffer = GetCommonBuffer(buffer);
m_activeVertexBuffers |= bit;
if (commonBuffer->DoPerDrawUpload() || CanOnlySWVP())
m_activeVertexBuffersToUploadPerDraw |= bit;
if (commonBuffer->NeedsUpload()) {
m_activeVertexBuffersToUpload |= bit;
}
} else {
// D3D9 doesn't actually unbind any vertex buffer when passing null.
// Operation Flashpoint: Red River relies on this behavior.
@ -5100,9 +5113,18 @@ namespace dxvk {
uint32_t size = respectUserBounds ? std::min(SizeToLock, desc.Size - offset) : desc.Size;
D3D9Range lockRange = D3D9Range(offset, offset + size);
if ((desc.Pool == D3DPOOL_DEFAULT || !(Flags & D3DLOCK_NO_DIRTY_UPDATE)) && !(Flags & D3DLOCK_READONLY))
bool updateDirtyRange = (desc.Pool == D3DPOOL_DEFAULT || !(Flags & D3DLOCK_NO_DIRTY_UPDATE)) && !(Flags & D3DLOCK_READONLY);
if (updateDirtyRange) {
pResource->DirtyRange().Conjoin(lockRange);
for (uint32_t i : bit::BitMask(m_activeVertexBuffers)) {
auto commonBuffer = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
if (commonBuffer == pResource) {
m_activeVertexBuffersToUpload |= 1 << i;
}
}
}
const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT;
const bool needsReadback = pResource->NeedsReadback();
@ -5237,11 +5259,9 @@ namespace dxvk {
bool* pDynamicVBOs,
bool* pDynamicIBO
) {
bool dynamicSysmemVBOs = true;
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
dynamicSysmemVBOs &= vbo == nullptr || (vbo->DoPerDrawUpload() || CanOnlySWVP());
}
const uint32_t usedBuffersMask = (m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetStreamMask() : ~0u) & m_activeVertexBuffers;
bool dynamicSysmemVBOs = usedBuffersMask == m_activeVertexBuffersToUploadPerDraw;
D3D9CommonBuffer* ibo = GetCommonBuffer(m_state.indices);
bool dynamicSysmemIBO = NumIndices != 0 && ibo != nullptr && (ibo->DoPerDrawUpload() || CanOnlySWVP());
@ -5253,6 +5273,12 @@ namespace dxvk {
if (likely(!dynamicSysmemVBOs && !dynamicSysmemIBO))
return;
uint32_t vertexBuffersToUpload;
if (likely(dynamicSysmemVBOs))
vertexBuffersToUpload = m_activeVertexBuffersToUploadPerDraw & usedBuffersMask;
else
vertexBuffersToUpload = 0;
// The UP buffer allocator will invalidate,
// so we can only use 1 UP buffer slice per draw.
// First we calculate the size of that UP buffer slice
@ -5269,7 +5295,7 @@ namespace dxvk {
uint32_t totalUpBufferSize = 0;
std::array<VBOCopy, caps::MaxStreams> vboCopies = {};
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
for (uint32_t i : bit::BitMask(vertexBuffersToUpload)) {
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
if (likely(vbo == nullptr)) {
continue;
@ -5354,7 +5380,8 @@ namespace dxvk {
auto upSlice = AllocUPBuffer(totalUpBufferSize);
// Now copy the actual data and bind it.
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
if (dynamicSysmemVBOs) {
for (uint32_t i : bit::BitMask(vertexBuffersToUpload)) {
const VBOCopy& copy = vboCopies[i];
if (likely(copy.copyBufferLength != 0)) {
@ -5389,7 +5416,6 @@ namespace dxvk {
m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers);
}
if (dynamicSysmemVBOs) {
// Change the draw call parameters to reflect the changed vertex buffers
if (NumIndices != 0) {
BaseVertexIndex = -FirstVertexIndex;
@ -6821,11 +6847,16 @@ namespace dxvk {
m_lastHazardsRT = m_activeHazardsRT;
}
for (uint32_t i = 0; i < caps::MaxStreams; i++) {
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
if (vbo != nullptr && vbo->NeedsUpload() && UploadVBOs)
if (likely(UploadVBOs)) {
const uint32_t usedBuffersMask = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetStreamMask() : ~0u;
const uint32_t buffersToUpload = m_activeVertexBuffersToUpload & usedBuffersMask;
for (uint32_t bufferIdx : bit::BitMask(buffersToUpload)) {
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[bufferIdx].vertexBuffer);
if (vbo != nullptr && vbo->NeedsUpload())
FlushBuffer(vbo);
}
m_activeVertexBuffersToUpload &= ~buffersToUpload;
}
const uint32_t usedSamplerMask = m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask;
const uint32_t usedTextureMask = m_activeTextures & usedSamplerMask;
@ -6839,7 +6870,7 @@ namespace dxvk {
GenerateTextureMips(texturesToGen);
auto* ibo = GetCommonBuffer(m_state.indices);
if (ibo != nullptr && ibo->NeedsUpload() && UploadIBO)
if (UploadIBO && ibo != nullptr && ibo->NeedsUpload())
FlushBuffer(ibo);
UpdateFog();

View File

@ -1385,6 +1385,10 @@ namespace dxvk {
uint32_t m_activeTexturesToUpload = 0;
uint32_t m_activeTexturesToGen = 0;
uint32_t m_activeVertexBuffers = 0;
uint32_t m_activeVertexBuffersToUpload = 0;
uint32_t m_activeVertexBuffersToUploadPerDraw = 0;
// m_fetch4Enabled is whether fetch4 is currently enabled
// from the application.
//

View File

@ -374,6 +374,8 @@ namespace dxvk {
if (element.Usage == D3DDECLUSAGE_TEXCOORD)
m_texcoordMask |= GetDecltypeCount(D3DDECLTYPE(element.Type)) << (element.UsageIndex * 3);
m_streamMask |= 1 << element.Stream;
}
}

View File

@ -66,6 +66,10 @@ namespace dxvk {
return m_texcoordMask;
}
uint32_t GetStreamMask() const {
return m_streamMask;
}
private:
bool MapD3DDeclToFvf(
@ -94,6 +98,8 @@ namespace dxvk {
uint32_t m_texcoordMask = 0;
uint32_t m_streamMask = 0;
std::array<uint32_t, caps::MaxStreams> m_sizes = {};
};