#include "d3d11_cmdlist.h" #include "d3d11_context_imm.h" #include "d3d11_device.h" #include "d3d11_fence.h" #include "d3d11_texture.h" #include "../util/util_win32_compat.h" constexpr static uint32_t MinFlushIntervalUs = 750; constexpr static uint32_t IncFlushIntervalUs = 250; constexpr static uint32_t MaxPendingSubmits = 6; namespace dxvk { D3D11ImmediateContext::D3D11ImmediateContext( D3D11Device* pParent, const Rc& Device) : D3D11CommonContext(pParent, Device, 0, DxvkCsChunkFlag::SingleUse), m_csThread(Device, Device->createContext()), m_submissionFence(new sync::CallbackFence()), m_flushTracker(GetMaxFlushType(pParent, Device)), m_stagingBufferFence(new sync::Fence(0)), m_multithread(this, false, pParent->GetOptions()->enableContextLock), m_videoContext(this, Device) { EmitCs([ cDevice = m_device, cBarrierControlFlags = pParent->GetOptionsBarrierControlFlags() ] (DxvkContext* ctx) { ctx->beginRecording(cDevice->createCommandList()); ctx->setBarrierControl(cBarrierControlFlags); }); // Stall here so that external submissions to the // CS thread can actually access the command list SynchronizeCsThread(DxvkCsThread::SynchronizeAll); ClearState(); } D3D11ImmediateContext::~D3D11ImmediateContext() { // Avoids hanging when in this state, see comment // in DxvkDevice::~DxvkDevice. if (this_thread::isInModuleDetachment()) return; ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); SynchronizeCsThread(DxvkCsThread::SynchronizeAll); SynchronizeDevice(); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::QueryInterface(REFIID riid, void** ppvObject) { if (riid == __uuidof(ID3D10Multithread)) { *ppvObject = ref(&m_multithread); return S_OK; } if (riid == __uuidof(ID3D11VideoContext)) { *ppvObject = ref(&m_videoContext); return S_OK; } return D3D11CommonContext::QueryInterface(riid, ppvObject); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::GetData( ID3D11Asynchronous* pAsync, void* pData, UINT DataSize, UINT GetDataFlags) { if (!pAsync || (DataSize && !pData)) return E_INVALIDARG; // Check whether the data size is actually correct if (DataSize && DataSize != pAsync->GetDataSize()) return E_INVALIDARG; // Passing a non-null pData is actually allowed if // DataSize is 0, but we should ignore that pointer pData = DataSize ? pData : nullptr; // Get query status directly from the query object auto query = static_cast(pAsync); HRESULT hr = query->GetData(pData, GetDataFlags); // If we're likely going to spin on the asynchronous object, // flush the context so that we're keeping the GPU busy. if (hr == S_FALSE) { // Don't mark the event query as stalling if the app does // not intend to spin on it. This reduces flushes on End. if (!(GetDataFlags & D3D11_ASYNC_GETDATA_DONOTFLUSH)) query->NotifyStall(); // Ignore the DONOTFLUSH flag here as some games will spin // on queries without ever flushing the context otherwise. D3D10DeviceLock lock = LockContext(); ConsiderFlush(GpuFlushType::ImplicitSynchronization); } return hr; } void STDMETHODCALLTYPE D3D11ImmediateContext::Begin(ID3D11Asynchronous* pAsync) { D3D10DeviceLock lock = LockContext(); if (unlikely(!pAsync)) return; auto query = static_cast(pAsync); if (unlikely(!query->DoBegin())) return; EmitCs([cQuery = Com(query)] (DxvkContext* ctx) { cQuery->Begin(ctx); }); } void STDMETHODCALLTYPE D3D11ImmediateContext::End(ID3D11Asynchronous* pAsync) { D3D10DeviceLock lock = LockContext(); if (unlikely(!pAsync)) return; auto query = static_cast(pAsync); if (unlikely(!query->DoEnd())) { EmitCs([cQuery = Com(query)] (DxvkContext* ctx) { cQuery->Begin(ctx); }); } EmitCs([cQuery = Com(query)] (DxvkContext* ctx) { cQuery->End(ctx); }); if (unlikely(query->TrackStalls())) { query->NotifyEnd(); if (query->IsStalling()) ExecuteFlush(GpuFlushType::ImplicitSynchronization, nullptr, false); else if (query->IsEvent()) ConsiderFlush(GpuFlushType::ImplicitStrongHint); } } void STDMETHODCALLTYPE D3D11ImmediateContext::Flush() { D3D10DeviceLock lock = LockContext(); ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); } void STDMETHODCALLTYPE D3D11ImmediateContext::Flush1( D3D11_CONTEXT_TYPE ContextType, HANDLE hEvent) { D3D10DeviceLock lock = LockContext(); ExecuteFlush(GpuFlushType::ExplicitFlush, hEvent, true); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Signal( ID3D11Fence* pFence, UINT64 Value) { D3D10DeviceLock lock = LockContext(); auto fence = static_cast(pFence); if (!fence) return E_INVALIDARG; EmitCs([ cFence = fence->GetFence(), cValue = Value ] (DxvkContext* ctx) { ctx->signalFence(cFence, cValue); }); ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); return S_OK; } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Wait( ID3D11Fence* pFence, UINT64 Value) { D3D10DeviceLock lock = LockContext(); auto fence = static_cast(pFence); if (!fence) return E_INVALIDARG; ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, true); EmitCs([ cFence = fence->GetFence(), cValue = Value ] (DxvkContext* ctx) { ctx->waitFence(cFence, cValue); }); return S_OK; } void STDMETHODCALLTYPE D3D11ImmediateContext::ExecuteCommandList( ID3D11CommandList* pCommandList, BOOL RestoreContextState) { D3D10DeviceLock lock = LockContext(); auto commandList = static_cast(pCommandList); // Reset dirty binding tracking before submitting any CS chunks. // This is needed so that any submission that might occur during // this call does not disrupt bindings set by the deferred context. ResetDirtyTracking(); // Clear state so that the command list can't observe any // current context state. The command list itself will clean // up after execution to ensure that no state changes done // by the command list are visible to the immediate context. ResetCommandListState(); // Flush any outstanding commands so that // we don't mess up the execution order FlushCsChunk(); // As an optimization, flush everything if the // number of pending draw calls is high enough. ConsiderFlush(GpuFlushType::ImplicitWeakHint); // Dispatch command list to the CS thread commandList->EmitToCsThread([this] (DxvkCsChunkRef&& chunk, GpuFlushType flushType) { EmitCsChunk(std::move(chunk)); // Return the sequence number from before the flush since // that is actually going to be needed for resource tracking uint64_t csSeqNum = m_csSeqNum; // Consider a flush after every chunk in case the app // submits a very large command list or the GPU is idle ConsiderFlush(flushType); return csSeqNum; }); // Restore the immediate context's state if (RestoreContextState) RestoreCommandListState(); else ResetContextState(); } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::FinishCommandList( BOOL RestoreDeferredContextState, ID3D11CommandList **ppCommandList) { InitReturnPtr(ppCommandList); Logger::err("D3D11: FinishCommandList called on immediate context"); return DXGI_ERROR_INVALID_CALL; } HRESULT STDMETHODCALLTYPE D3D11ImmediateContext::Map( ID3D11Resource* pResource, UINT Subresource, D3D11_MAP MapType, UINT MapFlags, D3D11_MAPPED_SUBRESOURCE* pMappedResource) { D3D10DeviceLock lock = LockContext(); if (unlikely(!pResource)) return E_INVALIDARG; D3D11_RESOURCE_DIMENSION resourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; pResource->GetType(&resourceDim); if (likely(resourceDim == D3D11_RESOURCE_DIMENSION_BUFFER)) { return MapBuffer( static_cast(pResource), MapType, MapFlags, pMappedResource); } else { return MapImage(GetCommonTexture(pResource), Subresource, MapType, MapFlags, pMappedResource); } } void STDMETHODCALLTYPE D3D11ImmediateContext::Unmap( ID3D11Resource* pResource, UINT Subresource) { // Since it is very uncommon for images to be mapped compared // to buffers, we count the currently mapped images in order // to avoid a virtual method call in the common case. if (unlikely(m_mappedImageCount > 0)) { D3D11_RESOURCE_DIMENSION resourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; pResource->GetType(&resourceDim); if (resourceDim != D3D11_RESOURCE_DIMENSION_BUFFER) { D3D10DeviceLock lock = LockContext(); UnmapImage(GetCommonTexture(pResource), Subresource); } } } HRESULT D3D11ImmediateContext::MapBuffer( D3D11Buffer* pResource, D3D11_MAP MapType, UINT MapFlags, D3D11_MAPPED_SUBRESOURCE* pMappedResource) { if (unlikely(!pMappedResource)) return E_INVALIDARG; if (unlikely(pResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_NONE)) { Logger::err("D3D11: Cannot map a device-local buffer"); pMappedResource->pData = nullptr; return E_INVALIDARG; } VkDeviceSize bufferSize = pResource->Desc()->ByteWidth; if (likely(MapType == D3D11_MAP_WRITE_DISCARD)) { // Allocate a new backing slice for the buffer and set // it as the 'new' mapped slice. This assumes that the // only way to invalidate a buffer is by mapping it. auto bufferSlice = pResource->DiscardSlice(&m_allocationCache); pMappedResource->pData = bufferSlice->mapPtr(); pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; EmitCs([ cBuffer = pResource->GetBuffer(), cBufferSlice = std::move(bufferSlice) ] (DxvkContext* ctx) mutable { ctx->invalidateBuffer(cBuffer, std::move(cBufferSlice)); }); // Ignore small buffers here. These are often updated per // draw and won't contribute much to memory waste anyway. if (unlikely(bufferSize > DxvkPageAllocator::PageSize)) ThrottleDiscard(bufferSize); return S_OK; } else if (likely(MapType == D3D11_MAP_WRITE_NO_OVERWRITE)) { // Put this on a fast path without any extra checks since it's // a somewhat desired method to partially update large buffers pMappedResource->pData = pResource->GetMapPtr(); pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; return S_OK; } else { // Quantum Break likes using MAP_WRITE on resources which would force // us to synchronize with the GPU multiple times per frame. In those // situations, if there are no pending GPU writes to the resource, we // can promote it to MAP_WRITE_DISCARD, but preserve the data by doing // a CPU copy from the previous buffer slice, to avoid the sync point. bool doInvalidatePreserve = false; auto buffer = pResource->GetBuffer(); auto sequenceNumber = pResource->GetSequenceNumber(); if (MapType != D3D11_MAP_READ && !MapFlags && bufferSize <= D3D11Initializer::MaxMemoryPerSubmission) { SynchronizeCsThread(sequenceNumber); bool hasWoAccess = buffer->isInUse(DxvkAccess::Write); bool hasRwAccess = buffer->isInUse(DxvkAccess::Read); if (hasRwAccess && !hasWoAccess) { // Uncached reads can be so slow that a GPU sync may actually be faster doInvalidatePreserve = buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_CACHED_BIT; } } if (doInvalidatePreserve) { auto srcPtr = pResource->GetMapPtr(); auto dstSlice = pResource->DiscardSlice(nullptr); auto dstPtr = dstSlice->mapPtr(); EmitCs([ cBuffer = std::move(buffer), cBufferSlice = std::move(dstSlice) ] (DxvkContext* ctx) mutable { ctx->invalidateBuffer(cBuffer, std::move(cBufferSlice)); }); std::memcpy(dstPtr, srcPtr, bufferSize); pMappedResource->pData = dstPtr; pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; ThrottleDiscard(bufferSize); return S_OK; } else { if (!WaitForResource(*buffer, sequenceNumber, MapType, MapFlags)) { pMappedResource->pData = nullptr; return DXGI_ERROR_WAS_STILL_DRAWING; } pMappedResource->pData = pResource->GetMapPtr(); pMappedResource->RowPitch = bufferSize; pMappedResource->DepthPitch = bufferSize; return S_OK; } } } HRESULT D3D11ImmediateContext::MapImage( D3D11CommonTexture* pResource, UINT Subresource, D3D11_MAP MapType, UINT MapFlags, D3D11_MAPPED_SUBRESOURCE* pMappedResource) { auto mapMode = pResource->GetMapMode(); if (pMappedResource) pMappedResource->pData = nullptr; if (unlikely(Subresource >= pResource->CountSubresources())) return E_INVALIDARG; switch (MapType) { case D3D11_MAP_READ: { if (!(pResource->Desc()->CPUAccessFlags & D3D11_CPU_ACCESS_READ)) return E_INVALIDARG; } break; case D3D11_MAP_READ_WRITE: { if (!(pResource->Desc()->CPUAccessFlags & D3D11_CPU_ACCESS_READ) || !(pResource->Desc()->CPUAccessFlags & D3D11_CPU_ACCESS_WRITE)) return E_INVALIDARG; } break; case D3D11_MAP_WRITE: { if (!(pResource->Desc()->CPUAccessFlags & D3D11_CPU_ACCESS_WRITE) || (pResource->Desc()->Usage == D3D11_USAGE_DYNAMIC)) return E_INVALIDARG; } break; case D3D11_MAP_WRITE_DISCARD: { if (!(pResource->Desc()->CPUAccessFlags & D3D11_CPU_ACCESS_WRITE) || pResource->Desc()->Usage != D3D11_USAGE_DYNAMIC) return E_INVALIDARG; } break; case D3D11_MAP_WRITE_NO_OVERWRITE: { // NO_OVERWRITE is explcitly banned for dynamic images if (!(pResource->Desc()->CPUAccessFlags & D3D11_CPU_ACCESS_WRITE) || (pResource->Desc()->Usage != D3D11_USAGE_DEFAULT)) return E_INVALIDARG; } break; } if (likely(pMappedResource != nullptr)) { // Resources with an unknown memory layout cannot return a pointer if (pResource->Desc()->Usage == D3D11_USAGE_DEFAULT && pResource->Desc()->TextureLayout == D3D11_TEXTURE_LAYOUT_UNDEFINED) return E_INVALIDARG; } else { if (pResource->Desc()->Usage != D3D11_USAGE_DEFAULT) return E_INVALIDARG; } VkFormat packedFormat = m_parent->LookupPackedFormat( pResource->Desc()->Format, pResource->GetFormatMode()).Format; uint64_t sequenceNumber = pResource->GetSequenceNumber(Subresource); auto formatInfo = lookupFormatInfo(packedFormat); auto layout = pResource->GetSubresourceLayout(formatInfo->aspectMask, Subresource); if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_DIRECT) { Rc mappedImage = pResource->GetImage(); if (MapType == D3D11_MAP_WRITE_DISCARD) { EmitCs([ cImage = std::move(mappedImage), cStorage = pResource->DiscardStorage() ] (DxvkContext* ctx) { ctx->invalidateImage(cImage, Rc(cStorage)); ctx->initImage(cImage, cImage->getAvailableSubresources(), VK_IMAGE_LAYOUT_PREINITIALIZED); }); ThrottleDiscard(layout.Size); } else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) { if (!WaitForResource(*mappedImage, sequenceNumber, MapType, MapFlags)) return DXGI_ERROR_WAS_STILL_DRAWING; } } else if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_DYNAMIC) { // Nothing else to really do here, NotifyMap will ensure that we // actually get a staging buffer that isn't currently in use. ThrottleDiscard(layout.Size); } else { Rc mappedBuffer = pResource->GetMappedBuffer(Subresource); constexpr uint32_t DoInvalidate = (1u << 0); constexpr uint32_t DoPreserve = (1u << 1); constexpr uint32_t DoWait = (1u << 2); uint32_t doFlags; if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) { // If the image can be written by the GPU, we need to update the // mapped staging buffer to reflect the current image contents. if (pResource->Desc()->Usage == D3D11_USAGE_DEFAULT) { bool needsReadback = !pResource->NeedsDirtyRegionTracking(); needsReadback |= MapType == D3D11_MAP_READ || MapType == D3D11_MAP_READ_WRITE; if (needsReadback) ReadbackImageBuffer(pResource, Subresource); } } if (MapType == D3D11_MAP_READ) { // Reads will not change the image content, so we only need // to wait for the GPU to finish writing to the mapped buffer. doFlags = DoWait; } else if (MapType == D3D11_MAP_WRITE_DISCARD) { doFlags = DoInvalidate; // If we know for sure that the mapped buffer is currently not // in use by the GPU, we don't have to allocate a new slice. if (m_csThread.lastSequenceNumber() >= sequenceNumber && !mappedBuffer->isInUse(DxvkAccess::Read)) doFlags = 0; } else if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_STAGING && (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT)) { // Always respect DO_NOT_WAIT for mapped staging images doFlags = DoWait; } else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE || mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) { // Need to synchronize thread to determine pending GPU accesses SynchronizeCsThread(sequenceNumber); // Don't implicitly discard large very large resources // since that might lead to memory issues. VkDeviceSize bufferSize = mappedBuffer->info().size; if (bufferSize > D3D11Initializer::MaxMemoryPerSubmission) { // Don't check access flags, WaitForResource will return // early anyway if the resource is currently in use doFlags = DoWait; } else if (mappedBuffer->isInUse(DxvkAccess::Write)) { // There are pending GPU writes, need to wait for those doFlags = DoWait; } else if (mappedBuffer->isInUse(DxvkAccess::Read)) { // All pending GPU accesses are reads, so the buffer data // is still current, and we can prevent GPU synchronization // by creating a new slice with an exact copy of the data. doFlags = DoInvalidate | DoPreserve; } else { // There are no pending accesses, so we don't need to wait doFlags = 0; } } else { // No need to synchronize staging resources with NO_OVERWRITE // since the buffer will be used directly. doFlags = 0; } if (doFlags & DoInvalidate) { VkDeviceSize bufferSize = mappedBuffer->info().size; auto srcSlice = pResource->GetMappedSlice(Subresource); auto dstSlice = pResource->DiscardSlice(Subresource); auto srcPtr = srcSlice->mapPtr(); auto dstPtr = dstSlice->mapPtr(); EmitCs([ cImageBuffer = std::move(mappedBuffer), cImageBufferSlice = std::move(dstSlice) ] (DxvkContext* ctx) mutable { ctx->invalidateBuffer(cImageBuffer, std::move(cImageBufferSlice)); }); if (doFlags & DoPreserve) std::memcpy(dstPtr, srcPtr, bufferSize); ThrottleDiscard(bufferSize); } else { if (doFlags & DoWait) { // We cannot respect DO_NOT_WAIT for buffer-mapped resources since // our internal copies need to be transparent to the application. if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) MapFlags &= ~D3D11_MAP_FLAG_DO_NOT_WAIT; // Wait for mapped buffer to become available if (!WaitForResource(*mappedBuffer, sequenceNumber, MapType, MapFlags)) return DXGI_ERROR_WAS_STILL_DRAWING; } } } // Mark the subresource as successfully mapped pResource->NotifyMap(Subresource, MapType); if (pMappedResource) { pMappedResource->pData = pResource->GetMapPtr(Subresource, layout.Offset); pMappedResource->RowPitch = layout.RowPitch; pMappedResource->DepthPitch = layout.DepthPitch; } m_mappedImageCount += 1; return S_OK; } void D3D11ImmediateContext::UnmapImage( D3D11CommonTexture* pResource, UINT Subresource) { auto mapType = pResource->GetMapType(Subresource); auto mapMode = pResource->GetMapMode(); if (mapType == D3D11CommonTexture::UnmappedSubresource) return; // Decrement mapped image counter only after making sure // the given subresource is actually mapped right now m_mappedImageCount -= 1; // If the texture has an image as well as a staging buffer, // upload the written buffer data to the image bool needsUpload = mapType != uint32_t(D3D11_MAP_READ) && (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER || mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_DYNAMIC); if (needsUpload) { if (pResource->NeedsDirtyRegionTracking()) { for (uint32_t i = 0; i < pResource->GetDirtyRegionCount(Subresource); i++) { D3D11_COMMON_TEXTURE_REGION region = pResource->GetDirtyRegion(Subresource, i); UpdateDirtyImageRegion(pResource, Subresource, ®ion); } } else { UpdateDirtyImageRegion(pResource, Subresource, nullptr); } } // Unmap the subresource. This will implicitly destroy the // staging buffer for dynamically mapped images. pResource->NotifyUnmap(Subresource); } void D3D11ImmediateContext::ReadbackImageBuffer( D3D11CommonTexture* pResource, UINT Subresource) { VkImageAspectFlags aspectMask = lookupFormatInfo(pResource->GetPackedFormat())->aspectMask; VkImageSubresource subresource = pResource->GetSubresourceFromIndex(aspectMask, Subresource); EmitCs([ cSrcImage = pResource->GetImage(), cSrcSubresource = vk::makeSubresourceLayers(subresource), cDstBuffer = pResource->GetMappedBuffer(Subresource), cPackedFormat = pResource->GetPackedFormat() ] (DxvkContext* ctx) { VkOffset3D offset = { 0, 0, 0 }; VkExtent3D extent = cSrcImage->mipLevelExtent(cSrcSubresource.mipLevel); ctx->copyImageToBuffer(cDstBuffer, 0, 0, 0, cPackedFormat, cSrcImage, cSrcSubresource, offset, extent); }); if (pResource->HasSequenceNumber()) TrackTextureSequenceNumber(pResource, Subresource); } void D3D11ImmediateContext::UpdateDirtyImageRegion( D3D11CommonTexture* pResource, UINT Subresource, const D3D11_COMMON_TEXTURE_REGION* pRegion) { auto formatInfo = lookupFormatInfo(pResource->GetPackedFormat()); auto subresource = vk::makeSubresourceLayers( pResource->GetSubresourceFromIndex(formatInfo->aspectMask, Subresource)); // Update the entire image if no dirty region was specified D3D11_COMMON_TEXTURE_REGION region; if (pRegion) { region = *pRegion; } else { region.Offset = VkOffset3D { 0, 0, 0 }; region.Extent = pResource->MipLevelExtent(subresource.mipLevel); } auto subresourceLayout = pResource->GetSubresourceLayout(formatInfo->aspectMask, Subresource); // Update dirty region one aspect at a time, due to // how the data is laid out in the staging buffer. for (uint32_t i = 0; i < pResource->GetPlaneCount(); i++) { subresource.aspectMask = formatInfo->aspectMask; if (formatInfo->flags.test(DxvkFormatFlag::MultiPlane)) subresource.aspectMask = vk::getPlaneAspect(i); EmitCs([ cDstImage = pResource->GetImage(), cDstSubresource = subresource, cDstOffset = region.Offset, cDstExtent = region.Extent, cSrcBuffer = pResource->GetMappedBuffer(Subresource), cSrcOffset = pResource->ComputeMappedOffset(Subresource, i, region.Offset), cSrcRowPitch = subresourceLayout.RowPitch, cSrcDepthPitch = subresourceLayout.DepthPitch, cPackedFormat = pResource->GetPackedFormat() ] (DxvkContext* ctx) { ctx->copyBufferToImage( cDstImage, cDstSubresource, cDstOffset, cDstExtent, cSrcBuffer, cSrcOffset, cSrcRowPitch, cSrcDepthPitch, cPackedFormat); }); } if (pResource->HasSequenceNumber()) TrackTextureSequenceNumber(pResource, Subresource); } void D3D11ImmediateContext::UpdateMappedBuffer( D3D11Buffer* pDstBuffer, UINT Offset, UINT Length, const void* pSrcData, UINT CopyFlags) { void* mapPtr = nullptr; if (likely(CopyFlags != D3D11_COPY_NO_OVERWRITE)) { auto bufferSlice = pDstBuffer->DiscardSlice(&m_allocationCache); mapPtr = bufferSlice->mapPtr(); EmitCs([ cBuffer = pDstBuffer->GetBuffer(), cBufferSlice = std::move(bufferSlice) ] (DxvkContext* ctx) mutable { ctx->invalidateBuffer(cBuffer, std::move(cBufferSlice)); }); } else { mapPtr = pDstBuffer->GetMapPtr(); } std::memcpy(reinterpret_cast(mapPtr) + Offset, pSrcData, Length); } void STDMETHODCALLTYPE D3D11ImmediateContext::SwapDeviceContextState( ID3DDeviceContextState* pState, ID3DDeviceContextState** ppPreviousState) { InitReturnPtr(ppPreviousState); if (!pState) return; // Clear dirty tracking here since all context state will be // re-applied anyway when the context state is swapped in again. ResetDirtyTracking(); // Reset all state affected by the current context state. ResetCommandListState(); Com oldState = std::move(m_stateObject); Com newState = static_cast(pState); if (oldState == nullptr) oldState = new D3D11DeviceContextState(m_parent); if (ppPreviousState) *ppPreviousState = oldState.ref(); m_stateObject = newState; oldState->SetState(m_state); newState->GetState(m_state); // Restore all state affected by the new context state RestoreCommandListState(); } void D3D11ImmediateContext::Acquire11on12Resource( ID3D11Resource* pResource, VkImageLayout SrcLayout) { D3D10DeviceLock lock = LockContext(); auto texture = GetCommonTexture(pResource); auto buffer = GetCommonBuffer(pResource); if (buffer) { EmitCs([ cBuffer = buffer->GetBuffer() ] (DxvkContext* ctx) { ctx->emitBufferBarrier(cBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, cBuffer->info().stages, cBuffer->info().access); }); } else if (texture) { EmitCs([ cImage = texture->GetImage(), cLayout = SrcLayout ] (DxvkContext* ctx) { ctx->emitImageBarrier(cImage, cLayout, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, cImage->info().layout, cImage->info().stages, cImage->info().access); }); } } void D3D11ImmediateContext::Release11on12Resource( ID3D11Resource* pResource, VkImageLayout DstLayout) { D3D10DeviceLock lock = LockContext(); auto texture = GetCommonTexture(pResource); auto buffer = GetCommonBuffer(pResource); if (buffer) { EmitCs([ cBuffer = buffer->GetBuffer() ] (DxvkContext* ctx) { ctx->emitBufferBarrier(cBuffer, cBuffer->info().stages, cBuffer->info().access, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT); }); } else if (texture) { EmitCs([ cImage = texture->GetImage(), cLayout = DstLayout ] (DxvkContext* ctx) { ctx->emitImageBarrier(cImage, cImage->info().layout, cImage->info().stages, cImage->info().access, cLayout, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT); }); } } void D3D11ImmediateContext::SynchronizeCsThread(uint64_t SequenceNumber) { D3D10DeviceLock lock = LockContext(); // Dispatch current chunk so that all commands // recorded prior to this function will be run if (SequenceNumber > m_csSeqNum) FlushCsChunk(); m_csThread.synchronize(SequenceNumber); } void D3D11ImmediateContext::SynchronizeDevice() { m_device->waitForIdle(); } void D3D11ImmediateContext::EndFrame( Rc LatencyTracker) { D3D10DeviceLock lock = LockContext(); // Don't keep draw buffers alive indefinitely. This cannot be // done in ExecuteFlush because command recording itself might // flush, so no state changes are allowed to happen there. SetDrawBuffers(nullptr, nullptr); EmitCs([ cTracker = std::move(LatencyTracker) ] (DxvkContext* ctx) { ctx->endFrame(); if (cTracker && cTracker->needsAutoMarkers()) ctx->endLatencyTracking(cTracker); }); } bool D3D11ImmediateContext::WaitForResource( const DxvkPagedResource& Resource, uint64_t SequenceNumber, D3D11_MAP MapType, UINT MapFlags) { // Determine access type to wait for based on map mode DxvkAccess access = MapType == D3D11_MAP_READ ? DxvkAccess::Write : DxvkAccess::Read; // Wait for any CS chunk using the resource to execute, since // otherwise we cannot accurately determine if the resource is // actually being used by the GPU right now. bool isInUse = Resource.isInUse(access); if (!isInUse) { SynchronizeCsThread(SequenceNumber); isInUse = Resource.isInUse(access); } if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT) { if (isInUse) { // We don't have to wait, but misbehaving games may // still try to spin on `Map` until the resource is // idle, so we should flush pending commands ConsiderFlush(GpuFlushType::ImplicitSynchronization); return false; } } else { if (isInUse) { // Make sure pending commands using the resource get // executed on the the GPU if we have to wait for it ExecuteFlush(GpuFlushType::ImplicitSynchronization, nullptr, false); SynchronizeCsThread(SequenceNumber); m_device->waitForResource(Resource, access); } } return true; } void D3D11ImmediateContext::InjectCsChunk( DxvkCsQueue Queue, DxvkCsChunkRef&& Chunk, bool Synchronize) { // Do not update the sequence number when emitting a chunk // from an external source since that would break tracking m_csThread.injectChunk(Queue, std::move(Chunk), Synchronize); } void D3D11ImmediateContext::EmitCsChunk(DxvkCsChunkRef&& chunk) { // Flush init commands so that the CS thread // can processe them before the first use. m_parent->FlushInitCommands(); m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); } void D3D11ImmediateContext::TrackTextureSequenceNumber( D3D11CommonTexture* pResource, UINT Subresource) { uint64_t sequenceNumber = GetCurrentSequenceNumber(); pResource->TrackSequenceNumber(Subresource, sequenceNumber); ConsiderFlush(GpuFlushType::ImplicitStrongHint); } void D3D11ImmediateContext::TrackBufferSequenceNumber( D3D11Buffer* pResource) { uint64_t sequenceNumber = GetCurrentSequenceNumber(); pResource->TrackSequenceNumber(sequenceNumber); ConsiderFlush(GpuFlushType::ImplicitStrongHint); } uint64_t D3D11ImmediateContext::GetCurrentSequenceNumber() { // We do not flush empty chunks, so if we are tracking a resource // immediately after a flush, we need to use the sequence number // of the previously submitted chunk to prevent deadlocks. return m_csChunk->empty() ? m_csSeqNum : m_csSeqNum + 1; } uint64_t D3D11ImmediateContext::GetPendingCsChunks() { return GetCurrentSequenceNumber() - m_flushSeqNum; } void D3D11ImmediateContext::ApplyDirtyNullBindings() { // At the end of a submission, set all bindings that have not been applied yet // to null on the DXVK context. This way, we avoid keeping resources alive that // are bound to the DXVK context but not to the immediate context. // // Note: This requires that all methods that may modify dirty bindings on the // DXVK context also reset the corresponding dirty bits *before* performing the // bind operation, or otherwise an implicit flush can potentially override them. auto& dirtyState = m_state.lazy.bindingsDirty; EmitCs([ cDirtyState = dirtyState ] (DxvkContext* ctx) { for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { auto dxStage = DxbcProgramType(i); auto vkStage = GetShaderStage(dxStage); // Unbind all dirty constant buffers auto cbvSlot = computeConstantBufferBinding(dxStage, 0); for (uint32_t index : bit::BitMask(cDirtyState[dxStage].cbvMask)) ctx->bindUniformBuffer(vkStage, cbvSlot + index, DxvkBufferSlice()); // Unbind all dirty samplers auto samplerSlot = computeSamplerBinding(dxStage, 0); for (uint32_t index : bit::BitMask(cDirtyState[dxStage].samplerMask)) ctx->bindResourceSampler(vkStage, samplerSlot + index, nullptr); // Unbind all dirty shader resource views auto srvSlot = computeSrvBinding(dxStage, 0); for (uint32_t m = 0; m < cDirtyState[dxStage].srvMask.size(); m++) { for (uint32_t index : bit::BitMask(cDirtyState[dxStage].srvMask[m])) ctx->bindResourceImageView(vkStage, srvSlot + index + m * 64u, nullptr); } } }); // Since we set the DXVK context bindings to null, any bindings that are null // on the D3D context are no longer dirty, so we can clear the respective bits. for (uint32_t i = 0; i < uint32_t(DxbcProgramType::Count); i++) { auto stage = DxbcProgramType(i); for (uint32_t index : bit::BitMask(dirtyState[stage].cbvMask)) { if (!m_state.cbv[stage].buffers[index].buffer.ptr()) dirtyState[stage].cbvMask &= ~(1u << index); } for (uint32_t index : bit::BitMask(dirtyState[stage].samplerMask)) { if (!m_state.samplers[stage].samplers[index]) dirtyState[stage].samplerMask &= ~(1u << index); } for (uint32_t m = 0; m < dirtyState[stage].srvMask.size(); m++) { for (uint32_t index : bit::BitMask(dirtyState[stage].srvMask[m])) { if (!m_state.srv[stage].views[index + m * 64u].ptr()) dirtyState[stage].srvMask[m] &= ~(uint64_t(1u) << index); } } if (dirtyState[stage].empty()) m_state.lazy.shadersDirty.clr(stage); } } void D3D11ImmediateContext::ConsiderFlush( GpuFlushType FlushType) { uint64_t chunkId = GetCurrentSequenceNumber(); uint64_t submissionId = m_submissionFence->value(); if (m_flushTracker.considerFlush(FlushType, chunkId, submissionId)) ExecuteFlush(FlushType, nullptr, false); } void D3D11ImmediateContext::ExecuteFlush( GpuFlushType FlushType, HANDLE hEvent, BOOL Synchronize) { bool synchronizeSubmission = Synchronize && m_parent->Is11on12Device(); if (synchronizeSubmission) m_submitStatus.result = VK_NOT_READY; // Exit early if there's nothing to do if (!GetPendingCsChunks() && !hEvent) return; // Unbind unused resources ApplyDirtyNullBindings(); // Signal the submission fence and flush the command list uint64_t submissionId = ++m_submissionId; if (hEvent) { m_submissionFence->setCallback(submissionId, [hEvent] { SetEvent(hEvent); }); } EmitCs([ cSubmissionFence = m_submissionFence, cSubmissionId = submissionId, cSubmissionStatus = synchronizeSubmission ? &m_submitStatus : nullptr, cStagingFence = m_stagingBufferFence, cStagingMemory = GetStagingMemoryStatistics().allocatedTotal ] (DxvkContext* ctx) { ctx->signal(cSubmissionFence, cSubmissionId); ctx->signal(cStagingFence, cStagingMemory); ctx->flushCommandList(cSubmissionStatus); }); FlushCsChunk(); // Notify flush tracker about the flush m_flushSeqNum = m_csSeqNum; m_flushTracker.notifyFlush(m_flushSeqNum, submissionId); // If necessary, block calling thread until the // Vulkan queue submission is performed. if (synchronizeSubmission) m_device->waitForSubmission(&m_submitStatus); // Free local staging buffer so that we don't // end up with a persistent allocation ResetStagingBuffer(); // Reset counter for discarded memory in flight m_discardMemoryOnFlush = m_discardMemoryCounter; // Notify the device that the context has been flushed, // this resets some resource initialization heuristics. m_parent->NotifyContextFlush(); // No point in tracking this across submissions m_hasPendingMsaaResolve = false; } void D3D11ImmediateContext::ThrottleAllocation() { DxvkStagingBufferStats stats = GetStagingMemoryStatistics(); VkDeviceSize stagingMemoryInFlight = stats.allocatedTotal - m_stagingBufferFence->value(); if (stagingMemoryInFlight > stats.allocatedSinceLastReset + D3D11Initializer::MaxMemoryInFlight) { // Stall calling thread to avoid situation where we keep growing the staging // buffer indefinitely, but ignore the newly allocated amount so that we don't // wait for the GPU to go fully idle in case of a large allocation. ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, false); m_device->waitForFence(*m_stagingBufferFence, stats.allocatedTotal - stats.allocatedSinceLastReset - D3D11Initializer::MaxMemoryInFlight); } else if (stats.allocatedSinceLastReset >= D3D11Initializer::MaxMemoryPerSubmission) { // Flush somewhat aggressively if there's a lot of memory in flight ExecuteFlush(GpuFlushType::ExplicitFlush, nullptr, false); } } void D3D11ImmediateContext::ThrottleDiscard( VkDeviceSize Size) { m_discardMemoryCounter += Size; if (m_discardMemoryCounter - m_discardMemoryOnFlush >= D3D11Initializer::MaxMemoryPerSubmission) ThrottleAllocation(); } DxvkStagingBufferStats D3D11ImmediateContext::GetStagingMemoryStatistics() { DxvkStagingBufferStats stats = m_staging.getStatistics(); stats.allocatedTotal += m_discardMemoryCounter; stats.allocatedSinceLastReset += m_discardMemoryCounter - m_discardMemoryOnFlush; return stats; } GpuFlushType D3D11ImmediateContext::GetMaxFlushType( D3D11Device* pParent, const Rc& Device) { if (pParent->GetOptions()->reproducibleCommandStream) return GpuFlushType::ExplicitFlush; else if (Device->perfHints().preferRenderPassOps) return GpuFlushType::ImplicitMediumHint; else return GpuFlushType::ImplicitWeakHint; } }