1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-19 05:52:11 +01:00

[d3d9] Always use per-draw buffer uploads on pure SWVP devices

This commit is contained in:
Robin Kertels 2024-09-18 23:57:05 +02:00 committed by Philip Rebohle
parent 97fb6e4f6d
commit 5bb8d09a96
4 changed files with 52 additions and 26 deletions

View File

@ -74,14 +74,6 @@ namespace dxvk {
if (!(m_desc.Usage & (D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY)))
return D3D9_COMMON_BUFFER_MAP_MODE_BUFFER;
// Tests show that DISCARD does not work for pure SWVP devices.
// So force staging buffer path to avoid stalls.
// Dark Romance: Vampire in Love also expects draws to be synchronous
// and breaks if we respect NOOVERWRITE.
// D&D Temple of Elemental Evil breaks if we respect DISCARD.
if (m_parent->CanOnlySWVP())
return D3D9_COMMON_BUFFER_MAP_MODE_BUFFER;
if (!options->allowDirectBufferMapping)
return D3D9_COMMON_BUFFER_MAP_MODE_BUFFER;
@ -134,7 +126,8 @@ namespace dxvk {
memoryFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
}
if ((memoryFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && m_parent->GetOptions()->cachedDynamicBuffers) {
if ((memoryFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && (m_parent->GetOptions()->cachedDynamicBuffers || m_parent->CanOnlySWVP())) {
// Never use uncached memory on devices that support SWVP because we might end up reading from it.
memoryFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
memoryFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
| VK_MEMORY_PROPERTY_HOST_CACHED_BIT;

View File

@ -200,18 +200,17 @@ namespace dxvk {
/**
* \brief Queries sequence number for a given subresource
* \brief Queries sequence number
*
* Returns which CS chunk the resource was last used on.
* \param [in] Subresource Subresource index
* \returns Sequence number for the given subresource
* \returns Sequence number
*/
uint64_t GetMappingBufferSequenceNumber() const {
return HasSequenceNumber() ? m_seq
: DxvkCsThread::SynchronizeAll;
}
bool IsSysmemDynamic() const {
bool DoPerDrawUpload() const {
return m_desc.Pool == D3DPOOL_SYSTEMMEM && (m_desc.Usage & D3DUSAGE_DYNAMIC) != 0;
}

View File

@ -2698,7 +2698,7 @@ namespace dxvk {
uint32_t firstIndex = 0;
int32_t baseVertexIndex = 0;
uint32_t vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount);
UploadDynamicSysmemBuffers(
UploadPerDrawData(
StartVertex,
vertexCount,
firstIndex,
@ -2747,7 +2747,7 @@ namespace dxvk {
bool dynamicSysmemVBOs;
bool dynamicSysmemIBO;
uint32_t indexCount = GetVertexCount(PrimitiveType, PrimitiveCount);
UploadDynamicSysmemBuffers(
UploadPerDrawData(
MinVertexIndex,
NumVertices,
StartIndex,
@ -2932,7 +2932,20 @@ namespace dxvk {
D3D9CommonBuffer* dst = static_cast<D3D9VertexBuffer*>(pDestBuffer)->GetCommonBuffer();
D3D9VertexDecl* decl = static_cast<D3D9VertexDecl*> (pVertexDecl);
PrepareDraw(D3DPT_FORCE_DWORD, true, true);
bool dynamicSysmemVBOs;
uint32_t firstIndex = 0;
int32_t baseVertexIndex = 0;
UploadPerDrawData(
SrcStartIndex,
VertexCount,
firstIndex,
0,
baseVertexIndex,
&dynamicSysmemVBOs,
nullptr
);
PrepareDraw(D3DPT_FORCE_DWORD, !dynamicSysmemVBOs, false);
if (decl == nullptr) {
DWORD FVF = dst->Desc()->FVF;
@ -5057,7 +5070,7 @@ namespace dxvk {
// Ignore DISCARD and NOOVERWRITE if the buffer is not DEFAULT pool (tests + Halo 2)
// The docs say DISCARD and NOOVERWRITE are ignored if the buffer is not DYNAMIC
// but tests say otherwise!
if (desc.Pool != D3DPOOL_DEFAULT)
if (desc.Pool != D3DPOOL_DEFAULT || CanOnlySWVP())
Flags &= ~(D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE);
// Ignore DONOTWAIT if we are DYNAMIC
@ -5069,6 +5082,12 @@ namespace dxvk {
if (unlikely(m_deviceLostState != D3D9DeviceLostState::Ok))
Flags &= ~D3DLOCK_DISCARD;
// In SWVP mode, we always use the per-draw upload path.
// So the buffer will never be in use on the device.
// FVF Buffers are the exception. Those can be used as a destination for ProcessVertices.
if (unlikely(CanOnlySWVP() && !pResource->NeedsReadback()))
Flags |= D3DLOCK_NOOVERWRITE;
// We only bounds check for MANAGED.
// (TODO: Apparently this is meant to happen for DYNAMIC too but I am not sure
// how that works given it is meant to be a DIRECT access..?)
@ -5209,7 +5228,7 @@ namespace dxvk {
void D3D9DeviceEx::UploadDynamicSysmemBuffers(
void D3D9DeviceEx::UploadPerDrawData(
UINT& FirstVertexIndex,
UINT NumVertices,
UINT& FirstIndex,
@ -5221,10 +5240,10 @@ namespace dxvk {
bool dynamicSysmemVBOs = true;
for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) {
auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer);
dynamicSysmemVBOs &= vbo == nullptr || vbo->IsSysmemDynamic();
dynamicSysmemVBOs &= vbo == nullptr || (vbo->DoPerDrawUpload() || CanOnlySWVP());
}
D3D9CommonBuffer* ibo = GetCommonBuffer(m_state.indices);
bool dynamicSysmemIBO = NumIndices != 0 && ibo != nullptr && ibo->IsSysmemDynamic();
bool dynamicSysmemIBO = NumIndices != 0 && ibo != nullptr && (ibo->DoPerDrawUpload() || CanOnlySWVP());
*pDynamicVBOs = dynamicSysmemVBOs;
@ -5255,6 +5274,21 @@ namespace dxvk {
if (likely(vbo == nullptr)) {
continue;
}
if (unlikely(vbo->NeedsReadback())) {
// There's two ways the GPU can write to buffers in D3D9:
// - Copy data from a staging buffer to the primary one either on Unlock or at draw time depending on the D3DPOOL
// for buffers with MAP_MODE_STAGING.
// The backend handles inserting the required barriers.
// - Write data between Lock and Unlock to the buffer directly for buffers with MAP_MODE_DIRECT.
// - Write to the primary buffer using ProcessVertices. That is why we need to ensure the resource is idle.
// Even when using MAP_MODE_BUFFER, ProcessVertices copies the data over from the primary buffer to the staging buffer
// at the end. So it could end up writing to the buffer on the GPU while the same buffer gets read here on the CPU.
// ProcessVertices is also exceptionally rare though which is why we're using a second sequence number
// to avoid unnecessary CS thread synchronization.
WaitForResource(vbo->GetBuffer<D3D9_COMMON_BUFFER_TYPE_STAGING>(), vbo->GetMappingBufferSequenceNumber(), D3DLOCK_READONLY);
}
const uint32_t vertexSize = m_state.vertexDecl->GetSize(i);
const uint32_t vertexStride = m_state.vertexBuffers[i].stride;
const uint32_t srcStride = vertexStride;

View File

@ -774,7 +774,7 @@ namespace dxvk {
* @param FirstIndex The first index
* @param NumIndices The number of indices that will be drawn. If this is 0, the index buffer binding will not be modified.
*/
void UploadDynamicSysmemBuffers(
void UploadPerDrawData(
UINT& FirstVertexIndex,
UINT NumVertices,
UINT& FirstIndex,
@ -782,7 +782,7 @@ namespace dxvk {
INT& BaseVertexIndex,
bool* pDynamicVBOs,
bool* pDynamicIBO);
void SetupFPU();
@ -1022,6 +1022,10 @@ namespace dxvk {
return m_behaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING;
}
bool CanSWVP() const {
return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING);
}
UINT GetFixedFunctionVSCount() const {
return m_ffModules.GetVSCount();
}
@ -1063,10 +1067,6 @@ namespace dxvk {
}
}
bool CanSWVP() const {
return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING);
}
// Device Reset detection for D3D9SwapChainEx::Present
bool IsDeviceReset() {
return std::exchange(m_deviceHasBeenReset, false);