#include #include "d3d11_context.h" #include "d3d11_device.h" #include "d3d11_query.h" #include "d3d11_texture.h" #include "d3d11_video.h" #include "../dxbc/dxbc_util.h" namespace dxvk { D3D11DeviceContext::D3D11DeviceContext( D3D11Device* pParent, const Rc& Device, DxvkCsChunkFlags CsFlags) : D3D11DeviceChild(pParent), m_multithread(this, false), m_device (Device), m_staging (Device, StagingBufferSize), m_csFlags (CsFlags), m_csChunk (AllocCsChunk()), m_cmdData (nullptr) { } D3D11DeviceContext::~D3D11DeviceContext() { } void STDMETHODCALLTYPE D3D11DeviceContext::CopyTiles( ID3D11Resource* pTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pTileRegionStartCoordinate, const D3D11_TILE_REGION_SIZE* pTileRegionSize, ID3D11Buffer* pBuffer, UINT64 BufferStartOffsetInBytes, UINT Flags) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::CopyTiles: Not implemented"); } HRESULT STDMETHODCALLTYPE D3D11DeviceContext::CopyTileMappings( ID3D11Resource* pDestTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pDestRegionStartCoordinate, ID3D11Resource* pSourceTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pSourceRegionStartCoordinate, const D3D11_TILE_REGION_SIZE* pTileRegionSize, UINT Flags) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::CopyTileMappings: Not implemented"); return DXGI_ERROR_INVALID_CALL; } HRESULT STDMETHODCALLTYPE D3D11DeviceContext::ResizeTilePool( ID3D11Buffer* pTilePool, UINT64 NewSizeInBytes) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::ResizeTilePool: Not implemented"); return DXGI_ERROR_INVALID_CALL; } void STDMETHODCALLTYPE D3D11DeviceContext::TiledResourceBarrier( ID3D11DeviceChild* pTiledResourceOrViewAccessBeforeBarrier, ID3D11DeviceChild* pTiledResourceOrViewAccessAfterBarrier) { } void STDMETHODCALLTYPE D3D11DeviceContext::GenerateMips(ID3D11ShaderResourceView* pShaderResourceView) { D3D10DeviceLock lock = LockContext(); auto view = static_cast(pShaderResourceView); if (!view || view->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) return; D3D11_COMMON_RESOURCE_DESC resourceDesc = view->GetResourceDesc(); if (!(resourceDesc.MiscFlags & D3D11_RESOURCE_MISC_GENERATE_MIPS)) return; EmitCs([cDstImageView = view->GetImageView()] (DxvkContext* ctx) { ctx->generateMipmaps(cDstImageView, VK_FILTER_LINEAR); }); } HRESULT STDMETHODCALLTYPE D3D11DeviceContext::UpdateTileMappings( ID3D11Resource* pTiledResource, UINT NumTiledResourceRegions, const D3D11_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoordinates, const D3D11_TILE_REGION_SIZE* pTiledResourceRegionSizes, ID3D11Buffer* pTilePool, UINT NumRanges, const UINT* pRangeFlags, const UINT* pTilePoolStartOffsets, const UINT* pRangeTileCounts, UINT Flags) { bool s_errorShown = false; if (std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::UpdateTileMappings: Not implemented"); return DXGI_ERROR_INVALID_CALL; } void STDMETHODCALLTYPE D3D11DeviceContext::UpdateTiles( ID3D11Resource* pDestTiledResource, const D3D11_TILED_RESOURCE_COORDINATE* pDestTileRegionStartCoordinate, const D3D11_TILE_REGION_SIZE* pDestTileRegionSize, const void* pSourceTileData, UINT Flags) { bool s_errorShown = false; if (std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::UpdateTiles: Not implemented"); } void STDMETHODCALLTYPE D3D11DeviceContext::SetResourceMinLOD( ID3D11Resource* pResource, FLOAT MinLOD) { bool s_errorShown = false; if (std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::SetResourceMinLOD: Not implemented"); } FLOAT STDMETHODCALLTYPE D3D11DeviceContext::GetResourceMinLOD(ID3D11Resource* pResource) { bool s_errorShown = false; if (std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::GetResourceMinLOD: Not implemented"); return 0.0f; } void STDMETHODCALLTYPE D3D11DeviceContext::ResolveSubresource( ID3D11Resource* pDstResource, UINT DstSubresource, ID3D11Resource* pSrcResource, UINT SrcSubresource, DXGI_FORMAT Format) { D3D10DeviceLock lock = LockContext(); bool isSameSubresource = pDstResource == pSrcResource && DstSubresource == SrcSubresource; if (!pDstResource || !pSrcResource || isSameSubresource) return; D3D11_RESOURCE_DIMENSION dstResourceType; D3D11_RESOURCE_DIMENSION srcResourceType; pDstResource->GetType(&dstResourceType); pSrcResource->GetType(&srcResourceType); if (dstResourceType != D3D11_RESOURCE_DIMENSION_TEXTURE2D || srcResourceType != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { Logger::err(str::format( "D3D11: ResolveSubresource: Incompatible resources", "\n Dst resource type: ", dstResourceType, "\n Src resource type: ", srcResourceType)); return; } auto dstTexture = static_cast(pDstResource); auto srcTexture = static_cast(pSrcResource); D3D11_TEXTURE2D_DESC dstDesc; D3D11_TEXTURE2D_DESC srcDesc; dstTexture->GetDesc(&dstDesc); srcTexture->GetDesc(&srcDesc); if (dstDesc.SampleDesc.Count != 1) { Logger::err(str::format( "D3D11: ResolveSubresource: Invalid sample counts", "\n Dst sample count: ", dstDesc.SampleDesc.Count, "\n Src sample count: ", srcDesc.SampleDesc.Count)); return; } D3D11CommonTexture* dstTextureInfo = GetCommonTexture(pDstResource); D3D11CommonTexture* srcTextureInfo = GetCommonTexture(pSrcResource); const DXGI_VK_FORMAT_INFO dstFormatInfo = m_parent->LookupFormat(dstDesc.Format, DXGI_VK_FORMAT_MODE_ANY); const DXGI_VK_FORMAT_INFO srcFormatInfo = m_parent->LookupFormat(srcDesc.Format, DXGI_VK_FORMAT_MODE_ANY); auto dstVulkanFormatInfo = lookupFormatInfo(dstFormatInfo.Format); auto srcVulkanFormatInfo = lookupFormatInfo(srcFormatInfo.Format); if (DstSubresource >= dstTextureInfo->CountSubresources() || SrcSubresource >= srcTextureInfo->CountSubresources()) return; const VkImageSubresource dstSubresource = dstTextureInfo->GetSubresourceFromIndex( dstVulkanFormatInfo->aspectMask, DstSubresource); const VkImageSubresource srcSubresource = srcTextureInfo->GetSubresourceFromIndex( srcVulkanFormatInfo->aspectMask, SrcSubresource); const VkImageSubresourceLayers dstSubresourceLayers = { dstSubresource.aspectMask, dstSubresource.mipLevel, dstSubresource.arrayLayer, 1 }; const VkImageSubresourceLayers srcSubresourceLayers = { srcSubresource.aspectMask, srcSubresource.mipLevel, srcSubresource.arrayLayer, 1 }; if (srcDesc.SampleDesc.Count == 1 || m_parent->GetOptions()->disableMsaa) { EmitCs([ cDstImage = dstTextureInfo->GetImage(), cSrcImage = srcTextureInfo->GetImage(), cDstLayers = dstSubresourceLayers, cSrcLayers = srcSubresourceLayers ] (DxvkContext* ctx) { ctx->copyImage( cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 }, cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 }, cDstImage->mipLevelExtent(cDstLayers.mipLevel)); }); } else { const VkFormat format = m_parent->LookupFormat( Format, DXGI_VK_FORMAT_MODE_ANY).Format; EmitCs([ cDstImage = dstTextureInfo->GetImage(), cSrcImage = srcTextureInfo->GetImage(), cDstSubres = dstSubresourceLayers, cSrcSubres = srcSubresourceLayers, cFormat = format ] (DxvkContext* ctx) { VkImageResolve region; region.srcSubresource = cSrcSubres; region.srcOffset = VkOffset3D { 0, 0, 0 }; region.dstSubresource = cDstSubres; region.dstOffset = VkOffset3D { 0, 0, 0 }; region.extent = cDstImage->mipLevelExtent(cDstSubres.mipLevel); ctx->resolveImage(cDstImage, cSrcImage, region, cFormat); }); } if (dstTextureInfo->HasSequenceNumber()) TrackTextureSequenceNumber(dstTextureInfo, DstSubresource); } void STDMETHODCALLTYPE D3D11DeviceContext::DrawAuto() { D3D10DeviceLock lock = LockContext(); D3D11Buffer* buffer = m_state.ia.vertexBuffers[0].buffer.ptr(); if (buffer == nullptr) return; DxvkBufferSlice vtxBuf = buffer->GetBufferSlice(); DxvkBufferSlice ctrBuf = buffer->GetSOCounter(); if (!ctrBuf.defined()) return; EmitCs([=] (DxvkContext* ctx) { ctx->drawIndirectXfb(ctrBuf, vtxBuf.buffer()->getXfbVertexStride(), vtxBuf.offset()); }); } void STDMETHODCALLTYPE D3D11DeviceContext::Draw( UINT VertexCount, UINT StartVertexLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->draw( VertexCount, 1, StartVertexLocation, 0); }); } void STDMETHODCALLTYPE D3D11DeviceContext::DrawIndexed( UINT IndexCount, UINT StartIndexLocation, INT BaseVertexLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->drawIndexed( IndexCount, 1, StartIndexLocation, BaseVertexLocation, 0); }); } void STDMETHODCALLTYPE D3D11DeviceContext::DrawInstanced( UINT VertexCountPerInstance, UINT InstanceCount, UINT StartVertexLocation, UINT StartInstanceLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->draw( VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation); }); } void STDMETHODCALLTYPE D3D11DeviceContext::DrawIndexedInstanced( UINT IndexCountPerInstance, UINT InstanceCount, UINT StartIndexLocation, INT BaseVertexLocation, UINT StartInstanceLocation) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->drawIndexed( IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation); }); } void STDMETHODCALLTYPE D3D11DeviceContext::DrawIndexedInstancedIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { D3D10DeviceLock lock = LockContext(); SetDrawBuffers(pBufferForArgs, nullptr); if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand))) return; // If possible, batch up multiple indirect draw calls of // the same type into one single multiDrawIndirect call auto cmdData = static_cast(m_cmdData); auto stride = 0u; if (cmdData && cmdData->type == D3D11CmdType::DrawIndirectIndexed) stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndexedIndirectCommand)); if (stride) { cmdData->count += 1; cmdData->stride = stride; } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { ctx->drawIndexedIndirect(data->offset, data->count, data->stride); }); cmdData->type = D3D11CmdType::DrawIndirectIndexed; cmdData->offset = AlignedByteOffsetForArgs; cmdData->count = 1; cmdData->stride = 0; } } void STDMETHODCALLTYPE D3D11DeviceContext::DrawInstancedIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { D3D10DeviceLock lock = LockContext(); SetDrawBuffers(pBufferForArgs, nullptr); if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand))) return; // If possible, batch up multiple indirect draw calls of // the same type into one single multiDrawIndirect call auto cmdData = static_cast(m_cmdData); auto stride = 0u; if (cmdData && cmdData->type == D3D11CmdType::DrawIndirect) stride = GetIndirectCommandStride(cmdData, AlignedByteOffsetForArgs, sizeof(VkDrawIndirectCommand)); if (stride) { cmdData->count += 1; cmdData->stride = stride; } else { cmdData = EmitCsCmd( [] (DxvkContext* ctx, const D3D11CmdDrawIndirectData* data) { ctx->drawIndirect(data->offset, data->count, data->stride); }); cmdData->type = D3D11CmdType::DrawIndirect; cmdData->offset = AlignedByteOffsetForArgs; cmdData->count = 1; cmdData->stride = 0; } } void STDMETHODCALLTYPE D3D11DeviceContext::Dispatch( UINT ThreadGroupCountX, UINT ThreadGroupCountY, UINT ThreadGroupCountZ) { D3D10DeviceLock lock = LockContext(); EmitCs([=] (DxvkContext* ctx) { ctx->dispatch( ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ); }); } void STDMETHODCALLTYPE D3D11DeviceContext::DispatchIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { D3D10DeviceLock lock = LockContext(); SetDrawBuffers(pBufferForArgs, nullptr); if (!ValidateDrawBufferSize(pBufferForArgs, AlignedByteOffsetForArgs, sizeof(VkDispatchIndirectCommand))) return; EmitCs([cOffset = AlignedByteOffsetForArgs] (DxvkContext* ctx) { ctx->dispatchIndirect(cOffset); }); } void STDMETHODCALLTYPE D3D11DeviceContext::SetMarkerInt( LPCWSTR pLabel, INT Data) { // Not implemented in the backend, ignore } void STDMETHODCALLTYPE D3D11DeviceContext::BeginEventInt( LPCWSTR pLabel, INT Data) { // Not implemented in the backend, ignore } void STDMETHODCALLTYPE D3D11DeviceContext::EndEvent() { // Not implemented in the backend, ignore } void STDMETHODCALLTYPE D3D11DeviceContext::GetHardwareProtectionState( BOOL* pHwProtectionEnable) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::GetHardwareProtectionState: Not implemented"); if (pHwProtectionEnable) *pHwProtectionEnable = FALSE; } void STDMETHODCALLTYPE D3D11DeviceContext::SetHardwareProtectionState( BOOL HwProtectionEnable) { static bool s_errorShown = false; if (!std::exchange(s_errorShown, true)) Logger::err("D3D11DeviceContext::SetHardwareProtectionState: Not implemented"); } void STDMETHODCALLTYPE D3D11DeviceContext::TransitionSurfaceLayout( IDXGIVkInteropSurface* pSurface, const VkImageSubresourceRange* pSubresources, VkImageLayout OldLayout, VkImageLayout NewLayout) { D3D10DeviceLock lock = LockContext(); // Get the underlying D3D11 resource Com resource; pSurface->QueryInterface(__uuidof(ID3D11Resource), reinterpret_cast(&resource)); // Get the texture from that resource D3D11CommonTexture* texture = GetCommonTexture(resource.ptr()); EmitCs([ cImage = texture->GetImage(), cSubresources = *pSubresources, cOldLayout = OldLayout, cNewLayout = NewLayout ] (DxvkContext* ctx) { ctx->transformImage( cImage, cSubresources, cOldLayout, cNewLayout); }); } void D3D11DeviceContext::ApplyInputLayout() { auto inputLayout = m_state.ia.inputLayout.prvRef(); if (likely(inputLayout != nullptr)) { EmitCs([ cInputLayout = std::move(inputLayout) ] (DxvkContext* ctx) { cInputLayout->BindToContext(ctx); }); } else { EmitCs([] (DxvkContext* ctx) { ctx->setInputLayout(0, nullptr, 0, nullptr); }); } } void D3D11DeviceContext::ApplyPrimitiveTopology() { D3D11_PRIMITIVE_TOPOLOGY topology = m_state.ia.primitiveTopology; DxvkInputAssemblyState iaState = { }; if (topology <= D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ) { static const std::array s_iaStates = {{ { VK_PRIMITIVE_TOPOLOGY_MAX_ENUM, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, VK_TRUE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, VK_TRUE, 0 }, { }, { }, { }, { }, // Random gap that exists for no reason { VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY, VK_TRUE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, VK_FALSE, 0 }, { VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY, VK_TRUE, 0 }, }}; iaState = s_iaStates[uint32_t(topology)]; } else if (topology >= D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST && topology <= D3D11_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST) { // The number of control points per patch can be inferred from the enum value in D3D11 uint32_t vertexCount = uint32_t(topology - D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1); iaState = { VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, VK_FALSE, vertexCount }; } EmitCs([iaState] (DxvkContext* ctx) { ctx->setInputAssemblyState(iaState); }); } void D3D11DeviceContext::ApplyBlendState() { if (m_state.om.cbState != nullptr) { EmitCs([ cBlendState = m_state.om.cbState, cSampleMask = m_state.om.sampleMask ] (DxvkContext* ctx) { cBlendState->BindToContext(ctx, cSampleMask); }); } else { EmitCs([ cSampleMask = m_state.om.sampleMask ] (DxvkContext* ctx) { DxvkBlendMode cbState; DxvkLogicOpState loState; DxvkMultisampleState msState; InitDefaultBlendState(&cbState, &loState, &msState, cSampleMask); for (uint32_t i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++) ctx->setBlendMode(i, cbState); ctx->setLogicOpState(loState); ctx->setMultisampleState(msState); }); } } void D3D11DeviceContext::ApplyBlendFactor() { EmitCs([ cBlendConstants = DxvkBlendConstants { m_state.om.blendFactor[0], m_state.om.blendFactor[1], m_state.om.blendFactor[2], m_state.om.blendFactor[3] } ] (DxvkContext* ctx) { ctx->setBlendConstants(cBlendConstants); }); } void D3D11DeviceContext::ApplyDepthStencilState() { if (m_state.om.dsState != nullptr) { EmitCs([ cDepthStencilState = m_state.om.dsState ] (DxvkContext* ctx) { cDepthStencilState->BindToContext(ctx); }); } else { EmitCs([] (DxvkContext* ctx) { DxvkDepthStencilState dsState; InitDefaultDepthStencilState(&dsState); ctx->setDepthStencilState(dsState); }); } } void D3D11DeviceContext::ApplyStencilRef() { EmitCs([ cStencilRef = m_state.om.stencilRef ] (DxvkContext* ctx) { ctx->setStencilReference(cStencilRef); }); } void D3D11DeviceContext::ApplyRasterizerState() { if (m_state.rs.state != nullptr) { EmitCs([ cRasterizerState = m_state.rs.state ] (DxvkContext* ctx) { cRasterizerState->BindToContext(ctx); }); } else { EmitCs([] (DxvkContext* ctx) { DxvkRasterizerState rsState; InitDefaultRasterizerState(&rsState); ctx->setRasterizerState(rsState); }); } } void D3D11DeviceContext::ApplyRasterizerSampleCount() { DxbcPushConstants pc; pc.rasterizerSampleCount = m_state.om.sampleCount; if (unlikely(!m_state.om.sampleCount)) { pc.rasterizerSampleCount = m_state.rs.state->Desc()->ForcedSampleCount; if (!m_state.om.sampleCount) pc.rasterizerSampleCount = 1; } EmitCs([ cPushConstants = pc ] (DxvkContext* ctx) { ctx->pushConstants(0, sizeof(cPushConstants), &cPushConstants); }); } void D3D11DeviceContext::ApplyViewportState() { std::array viewports; std::array scissors; // The backend can't handle a viewport count of zero, // so we should at least specify one empty viewport uint32_t viewportCount = m_state.rs.numViewports; if (unlikely(!viewportCount)) { viewportCount = 1; viewports[0] = VkViewport(); scissors [0] = VkRect2D(); } // D3D11's coordinate system has its origin in the bottom left, // but the viewport coordinates are aligned to the top-left // corner so we can get away with flipping the viewport. for (uint32_t i = 0; i < m_state.rs.numViewports; i++) { const D3D11_VIEWPORT& vp = m_state.rs.viewports[i]; viewports[i] = VkViewport { vp.TopLeftX, vp.Height + vp.TopLeftY, vp.Width, -vp.Height, vp.MinDepth, vp.MaxDepth, }; } // Scissor rectangles. Vulkan does not provide an easy way // to disable the scissor test, so we'll have to set scissor // rects that are at least as large as the framebuffer. bool enableScissorTest = false; if (m_state.rs.state != nullptr) { D3D11_RASTERIZER_DESC rsDesc; m_state.rs.state->GetDesc(&rsDesc); enableScissorTest = rsDesc.ScissorEnable; } for (uint32_t i = 0; i < m_state.rs.numViewports; i++) { if (!enableScissorTest) { scissors[i] = VkRect2D { VkOffset2D { 0, 0 }, VkExtent2D { D3D11_VIEWPORT_BOUNDS_MAX, D3D11_VIEWPORT_BOUNDS_MAX } }; } else if (i >= m_state.rs.numScissors) { scissors[i] = VkRect2D { VkOffset2D { 0, 0 }, VkExtent2D { 0, 0 } }; } else { D3D11_RECT sr = m_state.rs.scissors[i]; VkOffset2D srPosA; srPosA.x = std::max(0, sr.left); srPosA.y = std::max(0, sr.top); VkOffset2D srPosB; srPosB.x = std::max(srPosA.x, sr.right); srPosB.y = std::max(srPosA.y, sr.bottom); VkExtent2D srSize; srSize.width = uint32_t(srPosB.x - srPosA.x); srSize.height = uint32_t(srPosB.y - srPosA.y); scissors[i] = VkRect2D { srPosA, srSize }; } } if (likely(viewportCount == 1)) { EmitCs([ cViewport = viewports[0], cScissor = scissors[0] ] (DxvkContext* ctx) { ctx->setViewports(1, &cViewport, &cScissor); }); } else { EmitCs([ cViewportCount = viewportCount, cViewports = viewports, cScissors = scissors ] (DxvkContext* ctx) { ctx->setViewports( cViewportCount, cViewports.data(), cScissors.data()); }); } } template void D3D11DeviceContext::BindShader( const D3D11CommonShader* pShaderModule) { // Bind the shader and the ICB at once EmitCs([ cSlice = pShaderModule != nullptr && pShaderModule->GetIcb() != nullptr ? DxvkBufferSlice(pShaderModule->GetIcb()) : DxvkBufferSlice(), cShader = pShaderModule != nullptr ? pShaderModule->GetShader() : nullptr ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); uint32_t slotId = computeConstantBufferBinding(ShaderStage, D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT); ctx->bindShader (stage, cShader); ctx->bindResourceBuffer(stage, slotId, cSlice); }); } void D3D11DeviceContext::BindFramebuffer() { DxvkRenderTargets attachments; uint32_t sampleCount = 0; // D3D11 doesn't have the concept of a framebuffer object, // so we'll just create a new one every time the render // target bindings are updated. Set up the attachments. for (UINT i = 0; i < m_state.om.renderTargetViews.size(); i++) { if (m_state.om.renderTargetViews[i] != nullptr) { attachments.color[i] = { m_state.om.renderTargetViews[i]->GetImageView(), m_state.om.renderTargetViews[i]->GetRenderLayout() }; sampleCount = m_state.om.renderTargetViews[i]->GetSampleCount(); } } if (m_state.om.depthStencilView != nullptr) { attachments.depth = { m_state.om.depthStencilView->GetImageView(), m_state.om.depthStencilView->GetRenderLayout() }; sampleCount = m_state.om.depthStencilView->GetSampleCount(); } // Create and bind the framebuffer object to the context EmitCs([ cAttachments = std::move(attachments) ] (DxvkContext* ctx) { ctx->bindRenderTargets(cAttachments); }); // If necessary, update push constant for the sample count if (m_state.om.sampleCount != sampleCount) { m_state.om.sampleCount = sampleCount; ApplyRasterizerSampleCount(); } } void D3D11DeviceContext::BindDrawBuffers( D3D11Buffer* pBufferForArgs, D3D11Buffer* pBufferForCount) { EmitCs([ cArgBuffer = pBufferForArgs ? pBufferForArgs->GetBufferSlice() : DxvkBufferSlice(), cCntBuffer = pBufferForCount ? pBufferForCount->GetBufferSlice() : DxvkBufferSlice() ] (DxvkContext* ctx) { ctx->bindDrawBuffers(cArgBuffer, cCntBuffer); }); } void D3D11DeviceContext::BindVertexBuffer( UINT Slot, D3D11Buffer* pBuffer, UINT Offset, UINT Stride) { if (likely(pBuffer != nullptr)) { EmitCs([ cSlotId = Slot, cBufferSlice = pBuffer->GetBufferSlice(Offset), cStride = Stride ] (DxvkContext* ctx) { ctx->bindVertexBuffer(cSlotId, cBufferSlice, cStride); }); } else { EmitCs([ cSlotId = Slot ] (DxvkContext* ctx) { ctx->bindVertexBuffer(cSlotId, DxvkBufferSlice(), 0); }); } } void D3D11DeviceContext::BindIndexBuffer( D3D11Buffer* pBuffer, UINT Offset, DXGI_FORMAT Format) { VkIndexType indexType = Format == DXGI_FORMAT_R16_UINT ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; EmitCs([ cBufferSlice = pBuffer != nullptr ? pBuffer->GetBufferSlice(Offset) : DxvkBufferSlice(), cIndexType = indexType ] (DxvkContext* ctx) { ctx->bindIndexBuffer(cBufferSlice, cIndexType); }); } void D3D11DeviceContext::BindXfbBuffer( UINT Slot, D3D11Buffer* pBuffer, UINT Offset) { DxvkBufferSlice bufferSlice; DxvkBufferSlice counterSlice; if (pBuffer != nullptr) { bufferSlice = pBuffer->GetBufferSlice(); counterSlice = pBuffer->GetSOCounter(); } EmitCs([ cSlotId = Slot, cOffset = Offset, cBufferSlice = bufferSlice, cCounterSlice = counterSlice ] (DxvkContext* ctx) { if (cCounterSlice.defined() && cOffset != ~0u) { ctx->updateBuffer( cCounterSlice.buffer(), cCounterSlice.offset(), sizeof(cOffset), &cOffset); } ctx->bindXfbBuffer(cSlotId, cBufferSlice, cCounterSlice); }); } template void D3D11DeviceContext::BindConstantBuffer( UINT Slot, D3D11Buffer* pBuffer, UINT Offset, UINT Length) { EmitCs([ cSlotId = Slot, cBufferSlice = pBuffer ? pBuffer->GetBufferSlice(16 * Offset, 16 * Length) : DxvkBufferSlice() ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceBuffer(stage, cSlotId, cBufferSlice); }); } template void D3D11DeviceContext::BindConstantBufferRange( UINT Slot, UINT Offset, UINT Length) { EmitCs([ cSlotId = Slot, cOffset = 16 * Offset, cLength = 16 * Length ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceBufferRange(stage, cSlotId, cOffset, cLength); }); } template void D3D11DeviceContext::BindSampler( UINT Slot, D3D11SamplerState* pSampler) { EmitCs([ cSlotId = Slot, cSampler = pSampler != nullptr ? pSampler->GetDXVKSampler() : nullptr ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceSampler(stage, cSlotId, cSampler); }); } template void D3D11DeviceContext::BindShaderResource( UINT Slot, D3D11ShaderResourceView* pResource) { EmitCs([ cSlotId = Slot, cImageView = pResource != nullptr ? pResource->GetImageView() : nullptr, cBufferView = pResource != nullptr ? pResource->GetBufferView() : nullptr ] (DxvkContext* ctx) { VkShaderStageFlagBits stage = GetShaderStage(ShaderStage); ctx->bindResourceView(stage, cSlotId, cImageView, cBufferView); }); } template void D3D11DeviceContext::BindUnorderedAccessView( UINT UavSlot, D3D11UnorderedAccessView* pUav, UINT CtrSlot, UINT Counter) { EmitCs([ cUavSlotId = UavSlot, cCtrSlotId = CtrSlot, cImageView = pUav != nullptr ? pUav->GetImageView() : nullptr, cBufferView = pUav != nullptr ? pUav->GetBufferView() : nullptr, cCounterSlice = pUav != nullptr ? pUav->GetCounterSlice() : DxvkBufferSlice(), cCounterValue = Counter ] (DxvkContext* ctx) { VkShaderStageFlags stages = ShaderStage == DxbcProgramType::PixelShader ? VK_SHADER_STAGE_ALL_GRAPHICS : VK_SHADER_STAGE_COMPUTE_BIT; if (cCounterSlice.defined() && cCounterValue != ~0u) { ctx->updateBuffer( cCounterSlice.buffer(), cCounterSlice.offset(), sizeof(uint32_t), &cCounterValue); } ctx->bindResourceView (stages, cUavSlotId, cImageView, cBufferView); ctx->bindResourceBuffer (stages, cCtrSlotId, cCounterSlice); }); } void D3D11DeviceContext::UpdateBuffer( D3D11Buffer* pDstBuffer, UINT Offset, UINT Length, const void* pSrcData) { DxvkBufferSlice bufferSlice = pDstBuffer->GetBufferSlice(Offset, Length); if (Length <= 1024 && !(Offset & 0x3) && !(Length & 0x3)) { // The backend has special code paths for small buffer updates, // however both offset and size must be aligned to four bytes. DxvkDataSlice dataSlice = AllocUpdateBufferSlice(Length); std::memcpy(dataSlice.ptr(), pSrcData, Length); EmitCs([ cDataBuffer = std::move(dataSlice), cBufferSlice = std::move(bufferSlice) ] (DxvkContext* ctx) { ctx->updateBuffer( cBufferSlice.buffer(), cBufferSlice.offset(), cBufferSlice.length(), cDataBuffer.ptr()); }); } else { // Otherwise, to avoid large data copies on the CS thread, // write directly to a staging buffer and dispatch a copy DxvkBufferSlice stagingSlice = AllocStagingBuffer(Length); std::memcpy(stagingSlice.mapPtr(0), pSrcData, Length); EmitCs([ cStagingSlice = std::move(stagingSlice), cBufferSlice = std::move(bufferSlice) ] (DxvkContext* ctx) { ctx->copyBuffer( cBufferSlice.buffer(), cBufferSlice.offset(), cStagingSlice.buffer(), cStagingSlice.offset(), cBufferSlice.length()); }); } if (pDstBuffer->HasSequenceNumber()) TrackBufferSequenceNumber(pDstBuffer); } void D3D11DeviceContext::UpdateTexture( D3D11CommonTexture* pDstTexture, UINT DstSubresource, const D3D11_BOX* pDstBox, const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch) { if (DstSubresource >= pDstTexture->CountSubresources()) return; VkFormat packedFormat = pDstTexture->GetPackedFormat(); auto formatInfo = lookupFormatInfo(packedFormat); auto subresource = pDstTexture->GetSubresourceFromIndex( formatInfo->aspectMask, DstSubresource); VkExtent3D mipExtent = pDstTexture->MipLevelExtent(subresource.mipLevel); VkOffset3D offset = { 0, 0, 0 }; VkExtent3D extent = mipExtent; if (pDstBox != nullptr) { if (pDstBox->left >= pDstBox->right || pDstBox->top >= pDstBox->bottom || pDstBox->front >= pDstBox->back) return; // no-op, but legal offset.x = pDstBox->left; offset.y = pDstBox->top; offset.z = pDstBox->front; extent.width = pDstBox->right - pDstBox->left; extent.height = pDstBox->bottom - pDstBox->top; extent.depth = pDstBox->back - pDstBox->front; } if (!util::isBlockAligned(offset, extent, formatInfo->blockSize, mipExtent)) { Logger::err("D3D11: UpdateSubresource1: Unaligned region"); return; } auto stagingSlice = AllocStagingBuffer(util::computeImageDataSize(packedFormat, extent)); util::packImageData(stagingSlice.mapPtr(0), pSrcData, SrcRowPitch, SrcDepthPitch, 0, 0, pDstTexture->GetVkImageType(), extent, 1, formatInfo, formatInfo->aspectMask); UpdateImage(pDstTexture, &subresource, offset, extent, std::move(stagingSlice)); } void D3D11DeviceContext::UpdateImage( D3D11CommonTexture* pDstTexture, const VkImageSubresource* pDstSubresource, VkOffset3D DstOffset, VkExtent3D DstExtent, DxvkBufferSlice StagingBuffer) { bool dstIsImage = pDstTexture->GetMapMode() != D3D11_COMMON_TEXTURE_MAP_MODE_STAGING; uint32_t dstSubresource = D3D11CalcSubresource(pDstSubresource->mipLevel, pDstSubresource->arrayLayer, pDstTexture->Desc()->MipLevels); if (dstIsImage) { EmitCs([ cDstImage = pDstTexture->GetImage(), cDstLayers = vk::makeSubresourceLayers(*pDstSubresource), cDstOffset = DstOffset, cDstExtent = DstExtent, cStagingSlice = std::move(StagingBuffer), cPackedFormat = pDstTexture->GetPackedFormat() ] (DxvkContext* ctx) { if (cDstLayers.aspectMask != (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { ctx->copyBufferToImage(cDstImage, cDstLayers, cDstOffset, cDstExtent, cStagingSlice.buffer(), cStagingSlice.offset(), 0, 0); } else { ctx->copyPackedBufferToDepthStencilImage(cDstImage, cDstLayers, VkOffset2D { cDstOffset.x, cDstOffset.y }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cStagingSlice.buffer(), cStagingSlice.offset(), VkOffset2D { 0, 0 }, VkExtent2D { cDstExtent.width, cDstExtent.height }, cPackedFormat); } }); } else { // If the destination image is backed only by a buffer, we need to use // the packed buffer copy function which does not know about planes and // format metadata, so deal with it manually here. VkExtent3D dstMipExtent = pDstTexture->MipLevelExtent(pDstSubresource->mipLevel); auto dstFormat = pDstTexture->GetPackedFormat(); auto dstFormatInfo = lookupFormatInfo(dstFormat); uint32_t planeCount = 1; if (dstFormatInfo->flags.test(DxvkFormatFlag::MultiPlane)) planeCount = vk::getPlaneCount(dstFormatInfo->aspectMask); // The source data isn't stored in an image so we'll also need to // track the offset for that while iterating over the planes. VkDeviceSize srcPlaneOffset = 0; for (uint32_t i = 0; i < planeCount; i++) { VkImageAspectFlags dstAspectMask = dstFormatInfo->aspectMask; VkDeviceSize elementSize = dstFormatInfo->elementSize; VkExtent3D blockSize = dstFormatInfo->blockSize; if (dstFormatInfo->flags.test(DxvkFormatFlag::MultiPlane)) { dstAspectMask = vk::getPlaneAspect(i); auto plane = &dstFormatInfo->planes[i]; blockSize.width *= plane->blockSize.width; blockSize.height *= plane->blockSize.height; elementSize = plane->elementSize; } VkExtent3D blockCount = util::computeBlockCount(DstExtent, blockSize); EmitCs([ cDstBuffer = pDstTexture->GetMappedBuffer(dstSubresource), cDstStart = pDstTexture->GetSubresourceLayout(dstAspectMask, dstSubresource).Offset, cDstOffset = util::computeBlockOffset(DstOffset, blockSize), cDstSize = util::computeBlockCount(dstMipExtent, blockSize), cDstExtent = blockCount, cSrcBuffer = StagingBuffer.buffer(), cSrcStart = StagingBuffer.offset() + srcPlaneOffset, cPixelSize = elementSize ] (DxvkContext* ctx) { ctx->copyPackedBufferImage( cDstBuffer, cDstStart, cDstOffset, cDstSize, cSrcBuffer, cSrcStart, VkOffset3D(), cDstExtent, cDstExtent, cPixelSize); }); srcPlaneOffset += util::flattenImageExtent(blockCount) * elementSize; } } if (pDstTexture->HasSequenceNumber()) TrackTextureSequenceNumber(pDstTexture, dstSubresource); } void D3D11DeviceContext::SetDrawBuffers( ID3D11Buffer* pBufferForArgs, ID3D11Buffer* pBufferForCount) { auto argBuffer = static_cast(pBufferForArgs); auto cntBuffer = static_cast(pBufferForCount); if (m_state.id.argBuffer != argBuffer || m_state.id.cntBuffer != cntBuffer) { m_state.id.argBuffer = argBuffer; m_state.id.cntBuffer = cntBuffer; BindDrawBuffers(argBuffer, cntBuffer); } } bool D3D11DeviceContext::TestRtvUavHazards( UINT NumRTVs, ID3D11RenderTargetView* const* ppRTVs, UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUAVs) { if (NumRTVs == D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL) NumRTVs = 0; if (NumUAVs == D3D11_KEEP_UNORDERED_ACCESS_VIEWS) NumUAVs = 0; for (uint32_t i = 0; i < NumRTVs; i++) { auto rtv = static_cast(ppRTVs[i]); if (!rtv) continue; for (uint32_t j = 0; j < i; j++) { if (CheckViewOverlap(rtv, static_cast(ppRTVs[j]))) return true; } if (rtv->HasBindFlag(D3D11_BIND_UNORDERED_ACCESS)) { for (uint32_t j = 0; j < NumUAVs; j++) { if (CheckViewOverlap(rtv, static_cast(ppUAVs[j]))) return true; } } } for (uint32_t i = 0; i < NumUAVs; i++) { auto uav = static_cast(ppUAVs[i]); if (!uav) continue; for (uint32_t j = 0; j < i; j++) { if (CheckViewOverlap(uav, static_cast(ppUAVs[j]))) return true; } } return false; } template bool D3D11DeviceContext::TestSrvHazards( D3D11ShaderResourceView* pView) { bool hazard = false; if (ShaderStage == DxbcProgramType::ComputeShader) { int32_t uav = m_state.cs.uavMask.findNext(0); while (uav >= 0 && !hazard) { hazard = CheckViewOverlap(pView, m_state.cs.unorderedAccessViews[uav].ptr()); uav = m_state.cs.uavMask.findNext(uav + 1); } } else { hazard = CheckViewOverlap(pView, m_state.om.depthStencilView.ptr()); for (uint32_t i = 0; !hazard && i < m_state.om.maxRtv; i++) hazard = CheckViewOverlap(pView, m_state.om.renderTargetViews[i].ptr()); for (uint32_t i = 0; !hazard && i < m_state.om.maxUav; i++) hazard = CheckViewOverlap(pView, m_state.ps.unorderedAccessViews[i].ptr()); } return hazard; } template void D3D11DeviceContext::ResolveSrvHazards( T* pView, D3D11ShaderResourceBindings& Bindings) { uint32_t slotId = computeSrvBinding(ShaderStage, 0); int32_t srvId = Bindings.hazardous.findNext(0); while (srvId >= 0) { auto srv = Bindings.views[srvId].ptr(); if (likely(srv && srv->TestHazards())) { bool hazard = CheckViewOverlap(pView, srv); if (unlikely(hazard)) { Bindings.views[srvId] = nullptr; Bindings.hazardous.clr(srvId); BindShaderResource(slotId + srvId, nullptr); } } else { // Avoid further redundant iterations Bindings.hazardous.clr(srvId); } srvId = Bindings.hazardous.findNext(srvId + 1); } } template void D3D11DeviceContext::ResolveCsSrvHazards( T* pView) { if (!pView) return; ResolveSrvHazards (pView, m_state.cs.shaderResources); } VkClearValue D3D11DeviceContext::ConvertColorValue( const FLOAT Color[4], const DxvkFormatInfo* pFormatInfo) { VkClearValue result; if (pFormatInfo->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { for (uint32_t i = 0; i < 4; i++) { if (pFormatInfo->flags.test(DxvkFormatFlag::SampledUInt)) result.color.uint32[i] = uint32_t(std::max(0.0f, Color[i])); else if (pFormatInfo->flags.test(DxvkFormatFlag::SampledSInt)) result.color.int32[i] = int32_t(Color[i]); else result.color.float32[i] = Color[i]; } } else { result.depthStencil.depth = Color[0]; result.depthStencil.stencil = 0; } return result; } DxvkDataSlice D3D11DeviceContext::AllocUpdateBufferSlice(size_t Size) { constexpr size_t UpdateBufferSize = 1 * 1024 * 1024; if (Size >= UpdateBufferSize) { Rc buffer = new DxvkDataBuffer(Size); return buffer->alloc(Size); } else { if (m_updateBuffer == nullptr) m_updateBuffer = new DxvkDataBuffer(UpdateBufferSize); DxvkDataSlice slice = m_updateBuffer->alloc(Size); if (slice.ptr() == nullptr) { m_updateBuffer = new DxvkDataBuffer(UpdateBufferSize); slice = m_updateBuffer->alloc(Size); } return slice; } } DxvkBufferSlice D3D11DeviceContext::AllocStagingBuffer( VkDeviceSize Size) { return m_staging.alloc(256, Size); } void D3D11DeviceContext::ResetStagingBuffer() { m_staging.reset(); } DxvkCsChunkRef D3D11DeviceContext::AllocCsChunk() { return m_parent->AllocCsChunk(m_csFlags); } void D3D11DeviceContext::InitDefaultPrimitiveTopology( DxvkInputAssemblyState* pIaState) { pIaState->primitiveTopology = VK_PRIMITIVE_TOPOLOGY_MAX_ENUM; pIaState->primitiveRestart = VK_FALSE; pIaState->patchVertexCount = 0; } void D3D11DeviceContext::InitDefaultRasterizerState( DxvkRasterizerState* pRsState) { pRsState->polygonMode = VK_POLYGON_MODE_FILL; pRsState->cullMode = VK_CULL_MODE_BACK_BIT; pRsState->frontFace = VK_FRONT_FACE_CLOCKWISE; pRsState->depthClipEnable = VK_TRUE; pRsState->depthBiasEnable = VK_FALSE; pRsState->conservativeMode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; pRsState->sampleCount = 0; } void D3D11DeviceContext::InitDefaultDepthStencilState( DxvkDepthStencilState* pDsState) { VkStencilOpState stencilOp; stencilOp.failOp = VK_STENCIL_OP_KEEP; stencilOp.passOp = VK_STENCIL_OP_KEEP; stencilOp.depthFailOp = VK_STENCIL_OP_KEEP; stencilOp.compareOp = VK_COMPARE_OP_ALWAYS; stencilOp.compareMask = D3D11_DEFAULT_STENCIL_READ_MASK; stencilOp.writeMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; stencilOp.reference = 0; pDsState->enableDepthTest = VK_TRUE; pDsState->enableDepthWrite = VK_TRUE; pDsState->enableStencilTest = VK_FALSE; pDsState->depthCompareOp = VK_COMPARE_OP_LESS; pDsState->stencilOpFront = stencilOp; pDsState->stencilOpBack = stencilOp; } void D3D11DeviceContext::InitDefaultBlendState( DxvkBlendMode* pCbState, DxvkLogicOpState* pLoState, DxvkMultisampleState* pMsState, UINT SampleMask) { pCbState->enableBlending = VK_FALSE; pCbState->colorSrcFactor = VK_BLEND_FACTOR_ONE; pCbState->colorDstFactor = VK_BLEND_FACTOR_ZERO; pCbState->colorBlendOp = VK_BLEND_OP_ADD; pCbState->alphaSrcFactor = VK_BLEND_FACTOR_ONE; pCbState->alphaDstFactor = VK_BLEND_FACTOR_ZERO; pCbState->alphaBlendOp = VK_BLEND_OP_ADD; pCbState->writeMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; pLoState->enableLogicOp = VK_FALSE; pLoState->logicOp = VK_LOGIC_OP_NO_OP; pMsState->sampleMask = SampleMask; pMsState->enableAlphaToCoverage = VK_FALSE; } void D3D11DeviceContext::TrackResourceSequenceNumber( ID3D11Resource* pResource) { if (!pResource) return; D3D11CommonTexture* texture = GetCommonTexture(pResource); if (texture) { if (texture->HasSequenceNumber()) { for (uint32_t i = 0; i < texture->CountSubresources(); i++) TrackTextureSequenceNumber(texture, i); } } else { D3D11Buffer* buffer = static_cast(pResource); if (buffer->HasSequenceNumber()) TrackBufferSequenceNumber(buffer); } } }