mirror of
https://github.com/doitsujin/dxvk.git
synced 2025-02-21 13:54:18 +01:00
[d3d9] Optimize late buffer uploads
... similar to what we're doing for textures.
This commit is contained in:
parent
5a08b3c451
commit
ef8bad33a5
@ -46,7 +46,7 @@ namespace dxvk {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool IsDegenerate() { return min == max; }
|
inline bool IsDegenerate() const { return min == max; }
|
||||||
|
|
||||||
inline void Conjoin(D3D9Range range) {
|
inline void Conjoin(D3D9Range range) {
|
||||||
if (IsDegenerate())
|
if (IsDegenerate())
|
||||||
@ -180,7 +180,7 @@ namespace dxvk {
|
|||||||
/**
|
/**
|
||||||
* \brief Whether or not the staging buffer needs to be copied to the actual buffer
|
* \brief Whether or not the staging buffer needs to be copied to the actual buffer
|
||||||
*/
|
*/
|
||||||
inline bool NeedsUpload() { return m_desc.Pool != D3DPOOL_DEFAULT && !m_dirtyRange.IsDegenerate(); }
|
inline bool NeedsUpload() const { return m_desc.Pool != D3DPOOL_DEFAULT && !m_dirtyRange.IsDegenerate(); }
|
||||||
|
|
||||||
void PreLoad();
|
void PreLoad();
|
||||||
|
|
||||||
|
@ -1668,8 +1668,8 @@ namespace dxvk {
|
|||||||
ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32);
|
ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < caps::MaxStreams; i++) {
|
for (uint32_t i : bit::BitMask(~m_activeVertexBuffers & ((1 << 16) - 1))) {
|
||||||
if (m_state.vertexBuffers[i].vertexBuffer == nullptr) {
|
if (m_state.vertexBuffers[i].vertexBuffer == nullptr) {
|
||||||
EmitCs([cIndex = i](DxvkContext* ctx) {
|
EmitCs([cIndex = i](DxvkContext* ctx) {
|
||||||
ctx->bindVertexBuffer(cIndex, DxvkBufferSlice(), 0);
|
ctx->bindVertexBuffer(cIndex, DxvkBufferSlice(), 0);
|
||||||
@ -3342,12 +3342,25 @@ namespace dxvk {
|
|||||||
if (needsUpdate)
|
if (needsUpdate)
|
||||||
vbo.vertexBuffer = buffer;
|
vbo.vertexBuffer = buffer;
|
||||||
|
|
||||||
|
const uint32_t bit = 1u << StreamNumber;
|
||||||
|
m_activeVertexBuffers &= ~bit;
|
||||||
|
m_activeVertexBuffersToUploadPerDraw &= ~bit;
|
||||||
|
m_activeVertexBuffersToUpload &= ~bit;
|
||||||
|
|
||||||
if (buffer != nullptr) {
|
if (buffer != nullptr) {
|
||||||
needsUpdate |= vbo.offset != OffsetInBytes
|
needsUpdate |= vbo.offset != OffsetInBytes
|
||||||
|| vbo.stride != Stride;
|
|| vbo.stride != Stride;
|
||||||
|
|
||||||
vbo.offset = OffsetInBytes;
|
vbo.offset = OffsetInBytes;
|
||||||
vbo.stride = Stride;
|
vbo.stride = Stride;
|
||||||
|
|
||||||
|
const D3D9CommonBuffer* commonBuffer = GetCommonBuffer(buffer);
|
||||||
|
m_activeVertexBuffers |= bit;
|
||||||
|
if (commonBuffer->DoPerDrawUpload() || CanOnlySWVP())
|
||||||
|
m_activeVertexBuffersToUploadPerDraw |= bit;
|
||||||
|
if (commonBuffer->NeedsUpload()) {
|
||||||
|
m_activeVertexBuffersToUpload |= bit;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// D3D9 doesn't actually unbind any vertex buffer when passing null.
|
// D3D9 doesn't actually unbind any vertex buffer when passing null.
|
||||||
// Operation Flashpoint: Red River relies on this behavior.
|
// Operation Flashpoint: Red River relies on this behavior.
|
||||||
@ -5100,9 +5113,18 @@ namespace dxvk {
|
|||||||
uint32_t size = respectUserBounds ? std::min(SizeToLock, desc.Size - offset) : desc.Size;
|
uint32_t size = respectUserBounds ? std::min(SizeToLock, desc.Size - offset) : desc.Size;
|
||||||
D3D9Range lockRange = D3D9Range(offset, offset + size);
|
D3D9Range lockRange = D3D9Range(offset, offset + size);
|
||||||
|
|
||||||
if ((desc.Pool == D3DPOOL_DEFAULT || !(Flags & D3DLOCK_NO_DIRTY_UPDATE)) && !(Flags & D3DLOCK_READONLY))
|
bool updateDirtyRange = (desc.Pool == D3DPOOL_DEFAULT || !(Flags & D3DLOCK_NO_DIRTY_UPDATE)) && !(Flags & D3DLOCK_READONLY);
|
||||||
|
if (updateDirtyRange) {
|
||||||
pResource->DirtyRange().Conjoin(lockRange);
|
pResource->DirtyRange().Conjoin(lockRange);
|
||||||
|
|
||||||
|
for (uint32_t i : bit::BitMask(m_activeVertexBuffers)) {
|
||||||
|
auto commonBuffer = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
|
||||||
|
if (commonBuffer == pResource) {
|
||||||
|
m_activeVertexBuffersToUpload |= 1 << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT;
|
const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT;
|
||||||
const bool needsReadback = pResource->NeedsReadback();
|
const bool needsReadback = pResource->NeedsReadback();
|
||||||
|
|
||||||
@ -5237,11 +5259,9 @@ namespace dxvk {
|
|||||||
bool* pDynamicVBOs,
|
bool* pDynamicVBOs,
|
||||||
bool* pDynamicIBO
|
bool* pDynamicIBO
|
||||||
) {
|
) {
|
||||||
bool dynamicSysmemVBOs = true;
|
const uint32_t usedBuffersMask = (m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetStreamMask() : ~0u) & m_activeVertexBuffers;
|
||||||
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
|
bool dynamicSysmemVBOs = usedBuffersMask == m_activeVertexBuffersToUploadPerDraw;
|
||||||
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
|
|
||||||
dynamicSysmemVBOs &= vbo == nullptr || (vbo->DoPerDrawUpload() || CanOnlySWVP());
|
|
||||||
}
|
|
||||||
D3D9CommonBuffer* ibo = GetCommonBuffer(m_state.indices);
|
D3D9CommonBuffer* ibo = GetCommonBuffer(m_state.indices);
|
||||||
bool dynamicSysmemIBO = NumIndices != 0 && ibo != nullptr && (ibo->DoPerDrawUpload() || CanOnlySWVP());
|
bool dynamicSysmemIBO = NumIndices != 0 && ibo != nullptr && (ibo->DoPerDrawUpload() || CanOnlySWVP());
|
||||||
|
|
||||||
@ -5253,6 +5273,12 @@ namespace dxvk {
|
|||||||
if (likely(!dynamicSysmemVBOs && !dynamicSysmemIBO))
|
if (likely(!dynamicSysmemVBOs && !dynamicSysmemIBO))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
uint32_t vertexBuffersToUpload;
|
||||||
|
if (likely(dynamicSysmemVBOs))
|
||||||
|
vertexBuffersToUpload = m_activeVertexBuffersToUploadPerDraw & usedBuffersMask;
|
||||||
|
else
|
||||||
|
vertexBuffersToUpload = 0;
|
||||||
|
|
||||||
// The UP buffer allocator will invalidate,
|
// The UP buffer allocator will invalidate,
|
||||||
// so we can only use 1 UP buffer slice per draw.
|
// so we can only use 1 UP buffer slice per draw.
|
||||||
// First we calculate the size of that UP buffer slice
|
// First we calculate the size of that UP buffer slice
|
||||||
@ -5269,7 +5295,7 @@ namespace dxvk {
|
|||||||
uint32_t totalUpBufferSize = 0;
|
uint32_t totalUpBufferSize = 0;
|
||||||
std::array<VBOCopy, caps::MaxStreams> vboCopies = {};
|
std::array<VBOCopy, caps::MaxStreams> vboCopies = {};
|
||||||
|
|
||||||
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
|
for (uint32_t i : bit::BitMask(vertexBuffersToUpload)) {
|
||||||
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
|
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
|
||||||
if (likely(vbo == nullptr)) {
|
if (likely(vbo == nullptr)) {
|
||||||
continue;
|
continue;
|
||||||
@ -5354,42 +5380,42 @@ namespace dxvk {
|
|||||||
auto upSlice = AllocUPBuffer(totalUpBufferSize);
|
auto upSlice = AllocUPBuffer(totalUpBufferSize);
|
||||||
|
|
||||||
// Now copy the actual data and bind it.
|
// Now copy the actual data and bind it.
|
||||||
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
|
if (dynamicSysmemVBOs) {
|
||||||
const VBOCopy& copy = vboCopies[i];
|
for (uint32_t i : bit::BitMask(vertexBuffersToUpload)) {
|
||||||
|
const VBOCopy& copy = vboCopies[i];
|
||||||
|
|
||||||
if (likely(copy.copyBufferLength != 0)) {
|
if (likely(copy.copyBufferLength != 0)) {
|
||||||
const auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
|
const auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
|
||||||
uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr) + copy.dstOffset;
|
uint8_t* data = reinterpret_cast<uint8_t*>(upSlice.mapPtr) + copy.dstOffset;
|
||||||
const uint8_t* src = reinterpret_cast<uint8_t*>(vbo->GetMappedSlice().mapPtr) + copy.srcOffset;
|
const uint8_t* src = reinterpret_cast<uint8_t*>(vbo->GetMappedSlice().mapPtr) + copy.srcOffset;
|
||||||
|
|
||||||
if (likely(copy.copyElementStride == copy.copyElementSize)) {
|
if (likely(copy.copyElementStride == copy.copyElementSize)) {
|
||||||
std::memcpy(data, src, copy.copyBufferLength);
|
std::memcpy(data, src, copy.copyBufferLength);
|
||||||
} else {
|
} else {
|
||||||
for (uint32_t j = 0; j < copy.copyElementCount; j++) {
|
for (uint32_t j = 0; j < copy.copyElementCount; j++) {
|
||||||
std::memcpy(data + j * copy.copyElementSize, src + j * copy.copyElementStride, copy.copyElementSize);
|
std::memcpy(data + j * copy.copyElementSize, src + j * copy.copyElementStride, copy.copyElementSize);
|
||||||
}
|
}
|
||||||
if (unlikely(copy.copyBufferLength > copy.copyElementCount * copy.copyElementSize)) {
|
if (unlikely(copy.copyBufferLength > copy.copyElementCount * copy.copyElementSize)) {
|
||||||
// Partial vertex at the end
|
// Partial vertex at the end
|
||||||
std::memcpy(
|
std::memcpy(
|
||||||
data + copy.copyElementCount * copy.copyElementSize,
|
data + copy.copyElementCount * copy.copyElementSize,
|
||||||
src + copy.copyElementCount * copy.copyElementStride,
|
src + copy.copyElementCount * copy.copyElementStride,
|
||||||
copy.copyBufferLength - copy.copyElementCount * copy.copyElementSize);
|
copy.copyBufferLength - copy.copyElementCount * copy.copyElementSize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto vboSlice = upSlice.slice.subSlice(copy.dstOffset, copy.copyBufferLength);
|
||||||
|
EmitCs([
|
||||||
|
cStream = i,
|
||||||
|
cBufferSlice = std::move(vboSlice),
|
||||||
|
cStride = copy.copyElementSize
|
||||||
|
](DxvkContext* ctx) mutable {
|
||||||
|
ctx->bindVertexBuffer(cStream, std::move(cBufferSlice), cStride);
|
||||||
|
});
|
||||||
|
m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vboSlice = upSlice.slice.subSlice(copy.dstOffset, copy.copyBufferLength);
|
|
||||||
EmitCs([
|
|
||||||
cStream = i,
|
|
||||||
cBufferSlice = std::move(vboSlice),
|
|
||||||
cStride = copy.copyElementSize
|
|
||||||
](DxvkContext* ctx) mutable {
|
|
||||||
ctx->bindVertexBuffer(cStream, std::move(cBufferSlice), cStride);
|
|
||||||
});
|
|
||||||
m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dynamicSysmemVBOs) {
|
|
||||||
// Change the draw call parameters to reflect the changed vertex buffers
|
// Change the draw call parameters to reflect the changed vertex buffers
|
||||||
if (NumIndices != 0) {
|
if (NumIndices != 0) {
|
||||||
BaseVertexIndex = -FirstVertexIndex;
|
BaseVertexIndex = -FirstVertexIndex;
|
||||||
@ -6821,10 +6847,15 @@ namespace dxvk {
|
|||||||
m_lastHazardsRT = m_activeHazardsRT;
|
m_lastHazardsRT = m_activeHazardsRT;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint32_t i = 0; i < caps::MaxStreams; i++) {
|
if (likely(UploadVBOs)) {
|
||||||
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
|
const uint32_t usedBuffersMask = m_state.vertexDecl != nullptr ? m_state.vertexDecl->GetStreamMask() : ~0u;
|
||||||
if (vbo != nullptr && vbo->NeedsUpload() && UploadVBOs)
|
const uint32_t buffersToUpload = m_activeVertexBuffersToUpload & usedBuffersMask;
|
||||||
FlushBuffer(vbo);
|
for (uint32_t bufferIdx : bit::BitMask(buffersToUpload)) {
|
||||||
|
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[bufferIdx].vertexBuffer);
|
||||||
|
if (vbo != nullptr && vbo->NeedsUpload())
|
||||||
|
FlushBuffer(vbo);
|
||||||
|
}
|
||||||
|
m_activeVertexBuffersToUpload &= ~buffersToUpload;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint32_t usedSamplerMask = m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask;
|
const uint32_t usedSamplerMask = m_psShaderMasks.samplerMask | m_vsShaderMasks.samplerMask;
|
||||||
@ -6839,7 +6870,7 @@ namespace dxvk {
|
|||||||
GenerateTextureMips(texturesToGen);
|
GenerateTextureMips(texturesToGen);
|
||||||
|
|
||||||
auto* ibo = GetCommonBuffer(m_state.indices);
|
auto* ibo = GetCommonBuffer(m_state.indices);
|
||||||
if (ibo != nullptr && ibo->NeedsUpload() && UploadIBO)
|
if (UploadIBO && ibo != nullptr && ibo->NeedsUpload())
|
||||||
FlushBuffer(ibo);
|
FlushBuffer(ibo);
|
||||||
|
|
||||||
UpdateFog();
|
UpdateFog();
|
||||||
|
@ -1385,6 +1385,10 @@ namespace dxvk {
|
|||||||
uint32_t m_activeTexturesToUpload = 0;
|
uint32_t m_activeTexturesToUpload = 0;
|
||||||
uint32_t m_activeTexturesToGen = 0;
|
uint32_t m_activeTexturesToGen = 0;
|
||||||
|
|
||||||
|
uint32_t m_activeVertexBuffers = 0;
|
||||||
|
uint32_t m_activeVertexBuffersToUpload = 0;
|
||||||
|
uint32_t m_activeVertexBuffersToUploadPerDraw = 0;
|
||||||
|
|
||||||
// m_fetch4Enabled is whether fetch4 is currently enabled
|
// m_fetch4Enabled is whether fetch4 is currently enabled
|
||||||
// from the application.
|
// from the application.
|
||||||
//
|
//
|
||||||
|
@ -374,6 +374,8 @@ namespace dxvk {
|
|||||||
|
|
||||||
if (element.Usage == D3DDECLUSAGE_TEXCOORD)
|
if (element.Usage == D3DDECLUSAGE_TEXCOORD)
|
||||||
m_texcoordMask |= GetDecltypeCount(D3DDECLTYPE(element.Type)) << (element.UsageIndex * 3);
|
m_texcoordMask |= GetDecltypeCount(D3DDECLTYPE(element.Type)) << (element.UsageIndex * 3);
|
||||||
|
|
||||||
|
m_streamMask |= 1 << element.Stream;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,6 +66,10 @@ namespace dxvk {
|
|||||||
return m_texcoordMask;
|
return m_texcoordMask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t GetStreamMask() const {
|
||||||
|
return m_streamMask;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
bool MapD3DDeclToFvf(
|
bool MapD3DDeclToFvf(
|
||||||
@ -94,6 +98,8 @@ namespace dxvk {
|
|||||||
|
|
||||||
uint32_t m_texcoordMask = 0;
|
uint32_t m_texcoordMask = 0;
|
||||||
|
|
||||||
|
uint32_t m_streamMask = 0;
|
||||||
|
|
||||||
std::array<uint32_t, caps::MaxStreams> m_sizes = {};
|
std::array<uint32_t, caps::MaxStreams> m_sizes = {};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user