1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-30 11:52:11 +01:00

[d3d9] Only use staging buffers for uploads once we've stalled on the resource

This commit is contained in:
Robin Kertels 2021-06-19 02:49:24 +02:00 committed by Joshie
parent b83261b759
commit 8eeff90e0a
3 changed files with 59 additions and 16 deletions

View File

@ -173,6 +173,15 @@ namespace dxvk {
*/
bool NeedsUpload() { return m_desc.Pool != D3DPOOL_DEFAULT && !m_dirtyRange.IsDegenerate(); }
bool DoesStagingBufferUploads() const { return m_uploadUsingStaging; }
void EnableStagingBufferUploads() {
if (GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER)
return;
m_uploadUsingStaging = true;
}
void PreLoad();
private:
@ -196,6 +205,7 @@ namespace dxvk {
const D3D9_BUFFER_DESC m_desc;
DWORD m_mapFlags;
bool m_wasWrittenByGPU = false;
bool m_uploadUsingStaging = false;
Rc<DxvkBuffer> m_buffer;
Rc<DxvkBuffer> m_stagingBuffer;

View File

@ -358,6 +358,11 @@ namespace dxvk {
bool NeedsUpload(UINT Subresource) const { return m_needsUpload.get(Subresource); }
bool NeedsAnyUpload() { return m_needsUpload.any(); }
void ClearNeedsUpload() { return m_needsUpload.clearAll(); }
bool DoesStagingBufferUploads(UINT Subresource) const { return m_uploadUsingStaging.get(Subresource); }
void EnableStagingBufferUploads(UINT Subresource) {
m_uploadUsingStaging.set(Subresource, true);
}
void SetNeedsMipGen(bool value) { m_needsMipGen = value; }
bool NeedsMipGen() const { return m_needsMipGen; }
@ -442,6 +447,8 @@ namespace dxvk {
D3D9SubresourceBitset m_needsUpload = { };
D3D9SubresourceBitset m_uploadUsingStaging = { };
DWORD m_exposedMipLevels = 0;
bool m_needsMipGen = false;

View File

@ -4114,12 +4114,17 @@ namespace dxvk {
// calling app promises not to overwrite data that is in use
// or is reading. Remember! This will only trigger for MANAGED resources
// that cannot get affected by GPU, therefore readonly is A-OK for NOT waiting.
const bool skipWait = scratch || managed || (systemmem && !wasWrittenByGPU);
const bool usesStagingBuffer = pResource->DoesStagingBufferUploads(Subresource);
const bool skipWait = (scratch || managed || (systemmem && !wasWrittenByGPU))
&& (usesStagingBuffer || readOnly);
if (alloced) {
std::memset(physSlice.mapPtr, 0, physSlice.length);
}
else if (!skipWait) {
if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappedBuffer, D3DLOCK_DONOTWAIT))
pResource->EnableStagingBufferUploads(Subresource);
if (!WaitForResource(mappedBuffer, Flags))
return D3DERR_WASSTILLDRAWING;
}
@ -4354,31 +4359,41 @@ namespace dxvk {
scaledAlignedBoxExtent.height = std::min<uint32_t>(texLevelExtent.height - scaledBoxOffset.y, scaledAlignedBoxExtent.height);
scaledAlignedBoxExtent.depth = std::min<uint32_t>(texLevelExtent.depth - scaledBoxOffset.z, scaledAlignedBoxExtent.depth);
VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize;
D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize);
VkOffset3D boxOffsetBlockCount = util::computeBlockOffset(scaledBoxOffset, formatInfo->blockSize);
VkDeviceSize copySrcOffset = (boxOffsetBlockCount.z * texLevelExtentBlockCount.height * texLevelExtentBlockCount.width
+ boxOffsetBlockCount.y * texLevelExtentBlockCount.width
+ boxOffsetBlockCount.x)
* formatInfo->elementSize;
VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4);
void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + copySrcOffset;
util::packImageData(
slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize,
pitch, pitch * texLevelExtentBlockCount.height);
VkDeviceSize rowAlignment = 0;
DxvkBufferSlice copySrcSlice;
if (pResource->DoesStagingBufferUploads(Subresource)) {
VkDeviceSize dirtySize = scaledBoxExtentBlockCount.width * scaledBoxExtentBlockCount.height * scaledBoxExtentBlockCount.depth * formatInfo->elementSize;
VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4);
D3D9BufferSlice slice = AllocTempBuffer<false>(dirtySize);
copySrcSlice = slice.slice;
void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + copySrcOffset;
util::packImageData(
slice.mapPtr, srcData, scaledBoxExtentBlockCount, formatInfo->elementSize,
pitch, pitch * texLevelExtentBlockCount.height);
} else {
copySrcSlice = DxvkBufferSlice(pResource->GetBuffer(Subresource), copySrcOffset, srcSlice.length);
rowAlignment = 4;
}
EmitCs([
cSrcSlice = slice.slice,
cSrcSlice = std::move(copySrcSlice),
cDstImage = image,
cDstLayers = dstLayers,
cDstLevelExtent = scaledAlignedBoxExtent,
cOffset = scaledBoxOffset
cOffset = scaledBoxOffset,
cRowAlignment = rowAlignment
] (DxvkContext* ctx) {
ctx->copyBufferToImage(
cDstImage, cDstLayers,
cOffset, cDstLevelExtent,
cSrcSlice.buffer(), cSrcSlice.offset(), 0, 0);
cSrcSlice.buffer(), cSrcSlice.offset(),
cRowAlignment, 0);
});
}
else {
@ -4389,6 +4404,7 @@ namespace dxvk {
// TODO: PLEASE CLEAN ME
texLevelExtentBlockCount.height *= std::min(convertFormat.PlaneCount, 2u);
// the converter can not handle the 4 aligned pitch so we always repack into a staging buffer
D3D9BufferSlice slice = AllocTempBuffer<false>(srcSlice.length);
VkDeviceSize pitch = align(texLevelExtentBlockCount.width * formatInfo->elementSize, 4);
@ -4508,8 +4524,12 @@ namespace dxvk {
const bool readOnly = Flags & D3DLOCK_READONLY;
const bool noOverlap = !pResource->GPUReadingRange().Overlaps(lockRange);
const bool noOverwrite = Flags & D3DLOCK_NOOVERWRITE;
const bool skipWait = (!wasWrittenByGPU && (readOnly || noOverlap)) || noOverwrite;
const bool usesStagingBuffer = pResource->DoesStagingBufferUploads();
const bool skipWait = (!wasWrittenByGPU && (usesStagingBuffer || readOnly || noOverlap)) || noOverwrite;
if (!skipWait) {
if (!(Flags & D3DLOCK_DONOTWAIT) && !WaitForResource(mappingBuffer, D3DLOCK_DONOTWAIT))
pResource->EnableStagingBufferUploads();
if (!WaitForResource(mappingBuffer, Flags))
return D3DERR_WASSTILLDRAWING;
@ -4545,13 +4565,19 @@ namespace dxvk {
D3D9Range& range = pResource->DirtyRange();
D3D9BufferSlice slice = AllocTempBuffer<false>(range.max - range.min);
void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + range.min;
memcpy(slice.mapPtr, srcData, range.max - range.min);
DxvkBufferSlice copySrcSlice;
if (pResource->DoesStagingBufferUploads()) {
D3D9BufferSlice slice = AllocTempBuffer<false>(range.max - range.min);
copySrcSlice = slice.slice;
void* srcData = reinterpret_cast<uint8_t*>(srcSlice.mapPtr) + range.min;
memcpy(slice.mapPtr, srcData, range.max - range.min);
} else {
copySrcSlice = DxvkBufferSlice(pResource->GetBuffer<D3D9_COMMON_BUFFER_TYPE_MAPPING>(), range.min, range.max - range.min);
}
EmitCs([
cDstSlice = dstBuffer,
cSrcSlice = slice.slice,
cSrcSlice = copySrcSlice,
cDstOffset = range.min,
cLength = range.max - range.min
] (DxvkContext* ctx) {