diff --git a/src/d3d9/d3d9_common_buffer.h b/src/d3d9/d3d9_common_buffer.h index 745961980..00bea9530 100644 --- a/src/d3d9/d3d9_common_buffer.h +++ b/src/d3d9/d3d9_common_buffer.h @@ -211,6 +211,10 @@ namespace dxvk { : DxvkCsThread::SynchronizeAll; } + bool IsSysmemDynamic() const { + return m_desc.Pool == D3DPOOL_SYSTEMMEM && (m_desc.Usage & D3DUSAGE_DYNAMIC) != 0; + } + private: Rc CreateBuffer() const; diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 2716f7382..d4330cc59 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -2615,7 +2615,21 @@ namespace dxvk { if (unlikely(!PrimitiveCount)) return S_OK; - PrepareDraw(PrimitiveType, true); + bool dynamicSysmemVBOs; + uint32_t firstIndex = 0; + int32_t baseVertexIndex = 0; + uint32_t vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); + UploadDynamicSysmemBuffers( + StartVertex, + vertexCount, + firstIndex, + 0, + baseVertexIndex, + &dynamicSysmemVBOs, + nullptr + ); + + PrepareDraw(PrimitiveType, !dynamicSysmemVBOs, false); EmitCs([this, cPrimType = PrimitiveType, @@ -2636,7 +2650,6 @@ namespace dxvk { return D3D_OK; } - HRESULT STDMETHODCALLTYPE D3D9DeviceEx::DrawIndexedPrimitive( D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, @@ -2651,8 +2664,21 @@ namespace dxvk { if (unlikely(!PrimitiveCount)) return S_OK; + + bool dynamicSysmemVBOs; + bool dynamicSysmemIBO; + uint32_t indexCount = GetVertexCount(PrimitiveType, PrimitiveCount); + UploadDynamicSysmemBuffers( + MinVertexIndex, + NumVertices, + StartIndex, + indexCount, + BaseVertexIndex, + &dynamicSysmemVBOs, + &dynamicSysmemIBO + ); - PrepareDraw(PrimitiveType, true); + PrepareDraw(PrimitiveType, !dynamicSysmemVBOs, !dynamicSysmemIBO); EmitCs([this, cPrimType = PrimitiveType, @@ -2688,7 +2714,7 @@ namespace dxvk { if (unlikely(!PrimitiveCount)) return S_OK; - PrepareDraw(PrimitiveType, false); + PrepareDraw(PrimitiveType, false, false); uint32_t vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); @@ -2740,7 +2766,7 @@ namespace dxvk { if (unlikely(!PrimitiveCount)) return S_OK; - PrepareDraw(PrimitiveType, false); + PrepareDraw(PrimitiveType, false, false); uint32_t vertexCount = GetVertexCount(PrimitiveType, PrimitiveCount); @@ -2827,7 +2853,7 @@ namespace dxvk { D3D9CommonBuffer* dst = static_cast(pDestBuffer)->GetCommonBuffer(); D3D9VertexDecl* decl = static_cast (pVertexDecl); - PrepareDraw(D3DPT_FORCE_DWORD, true); + PrepareDraw(D3DPT_FORCE_DWORD, true, true); if (decl == nullptr) { DWORD FVF = dst->Desc()->FVF; @@ -2842,7 +2868,7 @@ namespace dxvk { decl = iter->second.ptr(); } - uint32_t offset = DestIndex * decl->GetSize(); + uint32_t offset = DestIndex * decl->GetSize(0); auto slice = dst->GetBufferSlice(); slice = slice.subSlice(offset, slice.length() - offset); @@ -2889,7 +2915,7 @@ namespace dxvk { } if (dst->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) { - uint32_t copySize = VertexCount * decl->GetSize(); + uint32_t copySize = VertexCount * decl->GetSize(0); EmitCs([ cSrcBuffer = dst->GetBuffer(), @@ -5091,6 +5117,157 @@ namespace dxvk { return D3D_OK; } + + + void D3D9DeviceEx::UploadDynamicSysmemBuffers( + UINT& FirstVertexIndex, + UINT NumVertices, + UINT& FirstIndex, + UINT NumIndices, + INT& BaseVertexIndex, + bool* pDynamicVBOs, + bool* pDynamicIBO + ) { + bool dynamicSysmemVBOs = true; + for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) { + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + dynamicSysmemVBOs &= vbo == nullptr || vbo->IsSysmemDynamic(); + } + D3D9CommonBuffer* ibo = GetCommonBuffer(m_state.indices); + bool dynamicSysmemIBO = NumIndices != 0 && ibo->IsSysmemDynamic(); + + *pDynamicVBOs = dynamicSysmemVBOs; + + if (pDynamicIBO) + *pDynamicIBO = dynamicSysmemIBO; + + if (likely(!dynamicSysmemVBOs && !dynamicSysmemIBO)) + return; + + // The UP buffer allocator will invalidate, + // so we can only use 1 UP buffer slice per draw. + // First we calculate the size of that UP buffer slice + // and store all sizes and offsets into it. + + uint32_t upBufferSize = 0; + std::array vboUPBufferOffsets = {}; + std::array vboUPBufferSizes = {}; + for (uint32_t i = 0; i < caps::MaxStreams && dynamicSysmemVBOs; i++) { + vboUPBufferOffsets[i] = upBufferSize; + + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + if (likely(vbo == nullptr)) { + vboUPBufferSizes[i] = 0; + continue; + } + const uint32_t vertexStride = m_state.vertexDecl->GetSize(i); + uint32_t offset = (FirstVertexIndex + BaseVertexIndex) * vertexStride; + const uint32_t vertexBufferSize = vbo->Desc()->Size; + if (offset < vertexBufferSize) { + const uint32_t vertexDataSize = std::min(NumVertices * vertexStride, vertexBufferSize - offset); + vboUPBufferSizes[i] = vertexDataSize; + upBufferSize += vertexDataSize; + } + } + + uint32_t iboUPBufferSize = 0; + uint32_t iboUPBufferOffset = 0; + if (dynamicSysmemIBO) { + auto* ibo = GetCommonBuffer(m_state.indices); + if (likely(ibo != nullptr)) { + uint32_t indexStride = ibo->Desc()->Format == D3D9Format::INDEX16 ? 2 : 4; + VkIndexType indexType = DecodeIndexType(ibo->Desc()->Format); + uint32_t offset = indexStride * FirstIndex; + uint32_t indexBufferSize = ibo->Desc()->Size; + if (offset < indexBufferSize) { + iboUPBufferSize = std::min(NumIndices * indexStride, indexBufferSize - offset); + iboUPBufferOffset = upBufferSize; + upBufferSize += iboUPBufferSize; + } + } + } + + if (unlikely(upBufferSize == 0)) { + *pDynamicVBOs = false; + if (pDynamicIBO) + *pDynamicIBO = false; + + return; + } + + auto upSlice = AllocUPBuffer(upBufferSize); + + // Now copy the actual data and bind it. + if (dynamicSysmemVBOs) { + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + if (unlikely(vboUPBufferSizes[i] == 0)) { + EmitCs([ + cStream = i + ](DxvkContext* ctx) { + ctx->bindVertexBuffer(cStream, DxvkBufferSlice(), 0); + }); + m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers); + continue; + } + + auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); + + const uint32_t vertexStride = m_state.vertexDecl->GetSize(i); + uint32_t offset = (BaseVertexIndex + FirstVertexIndex) * vertexStride; + + uint8_t* data = reinterpret_cast(upSlice.mapPtr) + vboUPBufferOffsets[i]; + uint8_t* src = reinterpret_cast(vbo->GetMappedSlice().mapPtr) + offset; + std::memcpy(data, src, vboUPBufferSizes[i]); + + auto vboSlice = upSlice.slice.subSlice(vboUPBufferOffsets[i], vboUPBufferSizes[i]); + EmitCs([ + cStream = i, + cBufferSlice = std::move(vboSlice), + cStride = vertexStride + ](DxvkContext* ctx) mutable { + ctx->bindVertexBuffer(cStream, std::move(cBufferSlice), cStride); + }); + m_flags.set(D3D9DeviceFlag::DirtyVertexBuffers); + } + + // Change the draw call parameters to reflect the changed vertex buffers + if (NumIndices != 0) { + BaseVertexIndex = -FirstVertexIndex; + } else { + FirstVertexIndex = 0; + } + } + + if (dynamicSysmemIBO) { + if (unlikely(iboUPBufferSize == 0)) { + EmitCs([](DxvkContext* ctx) { + ctx->bindIndexBuffer(DxvkBufferSlice(), VK_INDEX_TYPE_UINT32); + }); + m_flags.set(D3D9DeviceFlag::DirtyIndexBuffer); + } else { + auto* ibo = GetCommonBuffer(m_state.indices); + uint32_t indexStride = ibo->Desc()->Format == D3D9Format::INDEX16 ? 2 : 4; + VkIndexType indexType = DecodeIndexType(ibo->Desc()->Format); + uint32_t offset = indexStride * FirstIndex; + uint8_t* data = reinterpret_cast(upSlice.mapPtr) + iboUPBufferOffset; + uint8_t* src = reinterpret_cast(ibo->GetMappedSlice().mapPtr) + offset; + std::memcpy(data, src, iboUPBufferSize); + + auto iboSlice = upSlice.slice.subSlice(iboUPBufferOffset, iboUPBufferSize); + EmitCs([ + cBufferSlice = std::move(iboSlice), + cIndexType = indexType + ](DxvkContext* ctx) mutable { + ctx->bindIndexBuffer(std::move(cBufferSlice), cIndexType); + }); + m_flags.set(D3D9DeviceFlag::DirtyIndexBuffer); + } + + // Change the draw call parameters to reflect the changed index buffer + FirstIndex = 0; + } + } + void D3D9DeviceEx::EmitCsChunk(DxvkCsChunkRef&& chunk) { m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); @@ -6454,7 +6631,7 @@ namespace dxvk { } - void D3D9DeviceEx::PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool UploadBuffers) { + void D3D9DeviceEx::PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool UploadVBOs, bool UploadIBO) { if (unlikely(m_activeHazardsRT != 0 || m_activeHazardsDS != 0)) MarkRenderHazards(); @@ -6467,7 +6644,7 @@ namespace dxvk { for (uint32_t i = 0; i < caps::MaxStreams; i++) { auto* vbo = GetCommonBuffer(m_state.vertexBuffers[i].vertexBuffer); - if (vbo != nullptr && vbo->NeedsUpload() && UploadBuffers) + if (vbo != nullptr && vbo->NeedsUpload() && UploadVBOs) FlushBuffer(vbo); } @@ -6483,7 +6660,7 @@ namespace dxvk { GenerateTextureMips(texturesToGen); auto* ibo = GetCommonBuffer(m_state.indices); - if (ibo != nullptr && ibo->NeedsUpload() && UploadBuffers) + if (ibo != nullptr && ibo->NeedsUpload() && UploadIBO) FlushBuffer(ibo); UpdateFog(); @@ -6616,6 +6793,19 @@ namespace dxvk { } BindSpecConstants(); + + if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyVertexBuffers) && UploadVBOs)) { + for (uint32_t i = 0; i < caps::MaxStreams; i++) { + const D3D9VBO& vbo = m_state.vertexBuffers[i]; + BindVertexBuffer(i, vbo.vertexBuffer.ptr(), vbo.offset, vbo.stride); + } + m_flags.clr(D3D9DeviceFlag::DirtyVertexBuffers); + } + + if (unlikely(m_flags.test(D3D9DeviceFlag::DirtyIndexBuffer) && UploadIBO)) { + BindIndices(); + m_flags.clr(D3D9DeviceFlag::DirtyIndexBuffer); + } } diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index b58d2910e..2fc74c836 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -66,6 +66,8 @@ namespace dxvk { DirtyInputLayout, DirtyViewportScissor, DirtyMultiSampleState, + DirtyVertexBuffers, + DirtyIndexBuffer, DirtyFogState, DirtyFogColor, @@ -764,6 +766,24 @@ namespace dxvk { HRESULT UnlockBuffer( D3D9CommonBuffer* pResource); + /** + * @brief Uploads data from D3DPOOL_SYSMEM + D3DUSAGE_DYNAMIC buffers and binds the temporary buffers. + * + * @param FirstVertexIndex The first vertex + * @param NumVertices The number of vertices that are accessed. If this is 0, the vertex buffer binding will not be modified. + * @param FirstIndex The first index + * @param NumIndices The number of indices that will be drawn. If this is 0, the index buffer binding will not be modified. + */ + void UploadDynamicSysmemBuffers( + UINT& FirstVertexIndex, + UINT NumVertices, + UINT& FirstIndex, + UINT NumIndices, + INT& BaseVertexIndex, + bool* pDynamicVBOs, + bool* pDynamicIBO); + + void SetupFPU(); int64_t DetermineInitialTextureMemory(); @@ -895,7 +915,7 @@ namespace dxvk { uint32_t GetInstanceCount() const; - void PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool UploadBuffers); + void PrepareDraw(D3DPRIMITIVETYPE PrimitiveType, bool UploadVBOs, bool UploadIBOs); template void BindShader( @@ -1063,7 +1083,7 @@ namespace dxvk { } inline uint32_t GetUPBufferSize(uint32_t vertexCount, uint32_t stride) { - return (vertexCount - 1) * stride + std::max(m_state.vertexDecl->GetSize(), stride); + return (vertexCount - 1) * stride + std::max(m_state.vertexDecl->GetSize(0), stride); } inline void FillUPVertexBuffer(void* buffer, const void* userData, uint32_t dataSize, uint32_t bufferSize) { diff --git a/src/d3d9/d3d9_swvp_emu.cpp b/src/d3d9/d3d9_swvp_emu.cpp index 210edf01f..213bd2790 100644 --- a/src/d3d9/d3d9_swvp_emu.cpp +++ b/src/d3d9/d3d9_swvp_emu.cpp @@ -144,7 +144,7 @@ namespace dxvk { uint32_t primitiveId = m_module.opLoad(uint_t, primitiveIdPtr); // The size of any given vertex - uint32_t vertexSize = m_module.constu32(pDecl->GetSize() / sizeof(uint32_t)); + uint32_t vertexSize = m_module.constu32(pDecl->GetSize(0) / sizeof(uint32_t)); //The offset of this vertex from the beginning of the buffer uint32_t thisVertexOffset = m_module.opIMul(uint_t, vertexSize, primitiveId); diff --git a/src/d3d9/d3d9_vertex_declaration.cpp b/src/d3d9/d3d9_vertex_declaration.cpp index 58c43be43..be9cd25e8 100644 --- a/src/d3d9/d3d9_vertex_declaration.cpp +++ b/src/d3d9/d3d9_vertex_declaration.cpp @@ -354,8 +354,8 @@ namespace dxvk { void D3D9VertexDecl::Classify() { for (const auto& element : m_elements) { - if (element.Stream == 0 && element.Type != D3DDECLTYPE_UNUSED) - m_size = std::max(m_size, element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type))); + if (element.Type != D3DDECLTYPE_UNUSED) + m_sizes[element.Stream] = std::max(m_sizes[element.Stream], element.Offset + GetDecltypeSize(D3DDECLTYPE(element.Type))); if (element.Usage == D3DDECLUSAGE_COLOR && element.UsageIndex == 0) m_flags.set(D3D9VertexDeclFlag::HasColor0); diff --git a/src/d3d9/d3d9_vertex_declaration.h b/src/d3d9/d3d9_vertex_declaration.h index 82b182756..404555cc8 100644 --- a/src/d3d9/d3d9_vertex_declaration.h +++ b/src/d3d9/d3d9_vertex_declaration.h @@ -50,8 +50,8 @@ namespace dxvk { return m_elements; } - UINT GetSize() const { - return m_size; + UINT GetSize(UINT Stream) const { + return m_sizes[Stream]; } bool TestFlag(D3D9VertexDeclFlag flag) const { @@ -94,8 +94,7 @@ namespace dxvk { uint32_t m_texcoordMask = 0; - // The size of Stream 0. That's all we care about. - uint32_t m_size = 0; + std::array m_sizes = {}; };