From 8c98bbb634dc3ab1c0e27c0f8742e5c9916fedc5 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Tue, 4 Mar 2025 03:15:15 +0100 Subject: [PATCH] [dxvk] Add implicit resolve when app tries to sample multisampled image --- src/dxvk/dxvk_context.cpp | 128 ++++++++++++++++---- src/dxvk/dxvk_context.h | 8 +- src/dxvk/dxvk_implicit_resolve.cpp | 182 +++++++++++++++++++++++++++++ src/dxvk/dxvk_implicit_resolve.h | 108 +++++++++++++++++ src/dxvk/dxvk_sparse.h | 10 ++ src/dxvk/meson.build | 1 + 6 files changed, 415 insertions(+), 22 deletions(-) create mode 100644 src/dxvk/dxvk_implicit_resolve.cpp create mode 100644 src/dxvk/dxvk_implicit_resolve.h diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index c9c2c0c4f..729e8c0b9 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -15,7 +16,8 @@ namespace dxvk { m_initAcquires(DxvkCmdBuffer::InitBarriers), m_initBarriers(DxvkCmdBuffer::InitBuffer), m_execBarriers(DxvkCmdBuffer::ExecBuffer), - m_queryManager(m_common->queryPool()) { + m_queryManager(m_common->queryPool()), + m_implicitResolves(device) { // Init framebuffer info with default render pass in case // the app does not explicitly bind any render targets m_state.om.framebufferInfo = makeFramebufferInfo(m_state.om.renderTargets); @@ -81,6 +83,8 @@ namespace dxvk { this->endCurrentCommands(); this->relocateQueuedResources(); + m_implicitResolves.cleanup(m_trackingId); + if (m_descriptorPool->shouldSubmit(false)) { m_cmd->trackDescriptorPool(m_descriptorPool, m_descriptorManager); m_descriptorPool = m_descriptorManager->getDescriptorPool(); @@ -418,8 +422,16 @@ namespace dxvk { clearRect.layerCount = imageView->info().layerCount; m_cmd->cmdClearAttachments(1, &clearInfo, 1, &clearRect); - } else + } else { this->deferClear(imageView, clearAspects, clearValue); + } + + if (imageView->isMultisampled()) { + auto subresources = imageView->imageSubresources(); + subresources.aspectMask = clearAspects; + + m_implicitResolves.invalidate(*imageView->image(), subresources); + } } @@ -440,6 +452,13 @@ namespace dxvk { this->clearImageViewFb(imageView, offset, extent, aspect, value); else if (viewUsage & VK_IMAGE_USAGE_STORAGE_BIT) this->clearImageViewCs(imageView, offset, extent, value); + + if (imageView->isMultisampled()) { + auto subresources = imageView->imageSubresources(); + subresources.aspectMask = aspect; + + m_implicitResolves.invalidate(*imageView->image(), subresources); + } } @@ -579,6 +598,9 @@ namespace dxvk { srcImage, srcSubresource, srcOffset, extent); } + + if (dstImage->info().sampleCount > VK_SAMPLE_COUNT_1_BIT) + m_implicitResolves.invalidate(*dstImage, vk::makeSubresourceRange(dstSubresource)); } @@ -5659,8 +5681,19 @@ namespace dxvk { m_cmd->cmdClearAttachments(lateClearCount, lateClears.data(), 1, &clearRect); } - for (uint32_t i = 0; i < framebufferInfo.numAttachments(); i++) - m_cmd->track(framebufferInfo.getAttachment(i).view->image(), DxvkAccess::Write); + for (uint32_t i = 0; i < framebufferInfo.numAttachments(); i++) { + const auto& attachment = framebufferInfo.getAttachment(i); + m_cmd->track(attachment.view->image(), DxvkAccess::Write); + + if (attachment.view->isMultisampled()) { + VkImageSubresourceRange subresources = attachment.view->imageSubresources(); + + if (subresources.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) + subresources.aspectMask = vk::getWritableAspectsForLayout(attachment.layout); + + m_implicitResolves.invalidate(*attachment.view->image(), subresources); + } + } m_cmd->addStatCtr(DxvkStatCounter::CmdRenderPassCount, 1u); } @@ -6108,14 +6141,24 @@ namespace dxvk { viewHandle = res.imageView->handle(binding.viewType); if (viewHandle) { - descriptorInfo.image.sampler = VK_NULL_HANDLE; - descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { + descriptorInfo.image.sampler = VK_NULL_HANDLE; + descriptorInfo.image.imageView = viewHandle; + descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) - accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); - m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.imageView->image(), DxvkAccess::Read); + } else { + auto view = m_implicitResolves.getResolveView(*res.imageView, m_trackingId); + + descriptorInfo.image.sampler = VK_NULL_HANDLE; + descriptorInfo.image.imageView = view->handle(binding.viewType); + descriptorInfo.image.imageLayout = view->image()->info().layout; + + m_cmd->track(view->image(), DxvkAccess::Read); + } } else { descriptorInfo.image.sampler = VK_NULL_HANDLE; descriptorInfo.image.imageView = VK_NULL_HANDLE; @@ -6157,15 +6200,26 @@ namespace dxvk { viewHandle = res.imageView->handle(binding.viewType); if (viewHandle) { - descriptorInfo.image.sampler = res.sampler->handle(); - descriptorInfo.image.imageView = viewHandle; - descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; + if (likely(!res.imageView->isMultisampled() || binding.isMultisampled)) { + descriptorInfo.image.sampler = res.sampler->handle(); + descriptorInfo.image.imageView = viewHandle; + descriptorInfo.image.imageLayout = res.imageView->image()->info().layout; - if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) - accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); + if (BindPoint == VK_PIPELINE_BIND_POINT_COMPUTE || unlikely(res.imageView->image()->hasGfxStores())) + accessImage(DxvkCmdBuffer::ExecBuffer, *res.imageView, util::pipelineStages(binding.stage), binding.access, DxvkAccessOp::None); - m_cmd->track(res.sampler); - m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.imageView->image(), DxvkAccess::Read); + m_cmd->track(res.sampler); + } else { + auto view = m_implicitResolves.getResolveView(*res.imageView, m_trackingId); + + descriptorInfo.image.sampler = res.sampler->handle(); + descriptorInfo.image.imageView = view->handle(binding.viewType); + descriptorInfo.image.imageLayout = view->image()->info().layout; + + m_cmd->track(view->image(), DxvkAccess::Read); + m_cmd->track(res.sampler); + } } else { descriptorInfo.image.sampler = m_common->dummyResources().samplerHandle(); descriptorInfo.image.imageView = VK_NULL_HANDLE; @@ -6821,7 +6875,8 @@ namespace dxvk { &m_state.pc.data[pushConstRange.offset]); } - + + template bool DxvkContext::commitComputeState() { this->spillRenderPass(false); @@ -6843,9 +6898,15 @@ namespace dxvk { if (unlikely(m_features.test(DxvkContextFeature::DebugUtils))) this->beginBarrierControlDebugRegion(); - if (m_descriptorState.hasDirtyComputeSets()) + if (m_descriptorState.hasDirtyComputeSets()) { this->updateComputeShaderResources(); + if (unlikely(Resolve && m_implicitResolves.hasPendingResolves())) { + this->flushImplicitResolves(); + return this->commitComputeState(); + } + } + if (m_flags.test(DxvkContextFlag::DirtyPushConstants)) this->updatePushConstants(); @@ -6853,7 +6914,7 @@ namespace dxvk { } - template + template bool DxvkContext::commitGraphicsState() { if (m_flags.test(DxvkContextFlag::GpDirtyPipeline)) { if (unlikely(!this->updateGraphicsPipeline())) @@ -6920,8 +6981,18 @@ namespace dxvk { return false; } - if (m_descriptorState.hasDirtyGraphicsSets()) + if (m_descriptorState.hasDirtyGraphicsSets()) { this->updateGraphicsShaderResources(); + + if (unlikely(Resolve && m_implicitResolves.hasPendingResolves())) { + // If implicit resolves are required for any of the shader bindings, we need + // to discard all the state setup that we've done so far and try again + this->spillRenderPass(true); + this->flushImplicitResolves(); + + return this->commitGraphicsState(); + } + } if (m_state.gp.flags.test(DxvkGraphicsPipelineFlag::HasTransformFeedback)) this->updateTransformFeedbackState(); @@ -7675,6 +7746,21 @@ namespace dxvk { } + void DxvkContext::flushImplicitResolves() { + spillRenderPass(true); + + DxvkImplicitResolveOp op; + + while (m_implicitResolves.extractResolve(op)) { + prepareImage(op.inputImage, vk::makeSubresourceRange(op.resolveRegion.srcSubresource)); + prepareImage(op.resolveImage, vk::makeSubresourceRange(op.resolveRegion.dstSubresource)); + + resolveImageRp(op.resolveImage, op.inputImage, op.resolveRegion, + op.resolveFormat, op.resolveMode, op.resolveMode); + } + } + + void DxvkContext::beginCurrentCommands() { beginActiveDebugRegions(); diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index c8bec80ef..86fe31bdc 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -4,6 +4,7 @@ #include "dxvk_bind_mask.h" #include "dxvk_cmdlist.h" #include "dxvk_context_state.h" +#include "dxvk_implicit_resolve.h" #include "dxvk_latency.h" #include "dxvk_objects.h" #include "dxvk_queue.h" @@ -1461,6 +1462,8 @@ namespace dxvk { uint64_t m_latencyFrameId = 0u; bool m_endLatencyTracking = false; + DxvkImplicitResolveTracker m_implicitResolves; + void blitImageFb( Rc dstView, const VkOffset3D* dstOffsets, @@ -1776,9 +1779,10 @@ namespace dxvk { template void updatePushConstants(); + template bool commitComputeState(); - template + template bool commitGraphicsState(); template @@ -1876,6 +1880,8 @@ namespace dxvk { void resizeDescriptorArrays( uint32_t bindingCount); + void flushImplicitResolves(); + void beginCurrentCommands(); void endCurrentCommands(); diff --git a/src/dxvk/dxvk_implicit_resolve.cpp b/src/dxvk/dxvk_implicit_resolve.cpp new file mode 100644 index 000000000..abaa92d12 --- /dev/null +++ b/src/dxvk/dxvk_implicit_resolve.cpp @@ -0,0 +1,182 @@ +#include + +#include "dxvk_device.h" +#include "dxvk_implicit_resolve.h" + +namespace dxvk { + + DxvkImplicitResolveTracker::DxvkImplicitResolveTracker(Rc device) + : m_device(std::move(device)) { + + } + + + DxvkImplicitResolveTracker::~DxvkImplicitResolveTracker() { + + } + + + Rc DxvkImplicitResolveTracker::getResolveView( + DxvkImageView& view, + uint64_t trackingId) { + // We generally only expect to have one or two views at most in games + // that hit this path at all, so iterating over the arras is fine + for (auto& v : m_resolveViews) { + if (v.inputView == &view) { + addResolveOp(v); + return v.resolveView; + } + } + + // Create a new resolve image with only the array layers covered by the + // input view. We expect resolve images to be somewhat short-lived. + DxvkImageCreateInfo imageInfo = view.image()->info(); + + DxvkImageCreateInfo resolveInfo = { }; + resolveInfo.type = imageInfo.type; + resolveInfo.format = view.info().format; + resolveInfo.sampleCount = VK_SAMPLE_COUNT_1_BIT; + resolveInfo.extent = imageInfo.extent; + resolveInfo.numLayers = view.info().layerCount; + resolveInfo.mipLevels = 1u; + resolveInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + resolveInfo.stages = m_device->getShaderPipelineStages(); + resolveInfo.access = VK_ACCESS_SHADER_READ_BIT; + resolveInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + resolveInfo.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + resolveInfo.transient = VK_TRUE; + resolveInfo.debugName = "Resolve image"; + + if (view.info().aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + resolveInfo.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + resolveInfo.stages |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + resolveInfo.access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } else { + resolveInfo.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + resolveInfo.stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + resolveInfo.access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + + Rc image = m_device->createImage(resolveInfo, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + cleanup(image->getMemoryInfo().size, trackingId); + + DxvkImageViewKey viewKey = view.info(); + viewKey.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + viewKey.layerIndex = 0u; + + auto& resolveView = m_resolveViews.emplace_back(); + resolveView.inputView = &view; + resolveView.resolveView = image->createView(viewKey); + + addResolveOp(resolveView); + + return resolveView.resolveView; + } + + + bool DxvkImplicitResolveTracker::extractResolve( + DxvkImplicitResolveOp& resolve) { + if (m_resolveOps.empty()) { + resolve = DxvkImplicitResolveOp(); + return false; + } + + resolve = std::move(m_resolveOps.back()); + m_resolveOps.pop_back(); + return true; + } + + + void DxvkImplicitResolveTracker::invalidate( + const DxvkImage& image, + const VkImageSubresourceRange& subresources) { + for (auto& v : m_resolveViews) { + if (v.resolveDone && v.inputView->image() == &image) { + auto viewSubresource = v.inputView->imageSubresources(); + + if ((subresources.aspectMask & viewSubresource.aspectMask) + && vk::checkSubresourceRangeOverlap(viewSubresource, subresources)) + v.resolveDone = false; + } + } + } + + + void DxvkImplicitResolveTracker::cleanup( + uint64_t trackingId) { + cleanup(0u, trackingId); + } + + + void DxvkImplicitResolveTracker::addResolveOp( + DxvkImplicitResolveView& view) { + if (view.resolveDone) + return; + + // Determine resolve parameters based on the view format rather than the + // image format, since this will more likely represent what the app is + // trying to do + auto format = view.inputView->formatInfo(); + + auto& op = m_resolveOps.emplace_back(); + op.inputImage = view.inputView->image(); + op.resolveImage = view.resolveView->image(); + op.resolveRegion.srcSubresource = vk::pickSubresourceLayers(view.inputView->imageSubresources(), 0u); + op.resolveRegion.srcSubresource.aspectMask = format->aspectMask; + op.resolveRegion.dstSubresource = vk::pickSubresourceLayers(view.resolveView->imageSubresources(), 0u); + op.resolveRegion.dstSubresource.aspectMask = format->aspectMask; + op.resolveRegion.dstSubresource.baseArrayLayer = 0u; + op.resolveRegion.extent = view.resolveView->mipLevelExtent(0u); + op.resolveFormat = view.inputView->info().format; + op.resolveMode = VK_RESOLVE_MODE_AVERAGE_BIT; + + if ((format->flags.any(DxvkFormatFlag::SampledSInt, DxvkFormatFlag::SampledUInt) + || (format->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)))) + op.resolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + + view.resolveDone = true; + } + + + void DxvkImplicitResolveTracker::cleanup( + VkDeviceSize allocationSize, + uint64_t trackingId) { + constexpr VkDeviceSize MaxMemory = 64ull << 20u; + + constexpr uint64_t MaxLifetime = 256u; + constexpr uint64_t MinLifetime = 16u; + + // Eliminate images that haven't been used in a long time + for (auto i = m_resolveViews.begin(); i != m_resolveViews.end(); ) { + if (i->resolveView->image()->getTrackId() + MaxLifetime < trackingId) { + i = m_resolveViews.erase(i); + } else { + allocationSize += i->resolveView->image()->getMemoryInfo().size; + i++; + } + } + + // If we're using a large amount of memory for resolve images, eliminate + // the least recently used resolve images until we drop below the size + // threshold again. + while (allocationSize > MaxMemory) { + auto lr = m_resolveViews.end(); + + for (auto i = m_resolveViews.begin(); i != m_resolveViews.end(); i++) { + if (i->resolveView->image()->getTrackId() + MinLifetime < trackingId) { + if (lr == m_resolveViews.end() + || lr->resolveView->image()->getTrackId() > i->resolveView->image()->getTrackId()) + lr = i; + } + } + + if (lr == m_resolveViews.end()) + break; + + allocationSize -= lr->resolveView->image()->getMemoryInfo().size; + m_resolveViews.erase(lr); + } + } + +} diff --git a/src/dxvk/dxvk_implicit_resolve.h b/src/dxvk/dxvk_implicit_resolve.h new file mode 100644 index 000000000..1f9f695b2 --- /dev/null +++ b/src/dxvk/dxvk_implicit_resolve.h @@ -0,0 +1,108 @@ +#pragma once + +#include + +#include "dxvk_image.h" + +#include "../util/util_small_vector.h" + +namespace dxvk { + + struct DxvkImplicitResolveView { + Rc inputView = nullptr; + Rc resolveView = nullptr; + bool resolveDone = false; + }; + + + struct DxvkImplicitResolveOp { + Rc inputImage = nullptr; + Rc resolveImage = nullptr; + VkImageResolve resolveRegion = { }; + VkFormat resolveFormat = VK_FORMAT_UNDEFINED; + VkResolveModeFlagBits resolveMode = VK_RESOLVE_MODE_NONE; + }; + + + class DxvkDevice; + + class DxvkImplicitResolveTracker { + + public: + + DxvkImplicitResolveTracker(Rc device); + + ~DxvkImplicitResolveTracker(); + + /** + * \brief Checks whether there are pending resolves + * + * \returns \c true if any there are any resolves that must + * be executed prior to submitting the current draw. + */ + bool hasPendingResolves() const { + return !m_resolveOps.empty(); + } + + /** + * \brief Retrieves resolve image view for a given input view + * + * \param [in] view Multisampled view bound to the context + * \returns Non-multisampled view to replace the bound view with + */ + Rc getResolveView( + DxvkImageView& view, + uint64_t trackingId); + + /** + * \brief Extracts a resolve operation to execute + * + * \param [out] resolve Extracted resolve parameters + * \returns \c true if a resolve was extracted, \c false + * if all resolves have already been processed. + */ + bool extractResolve( + DxvkImplicitResolveOp& resolve); + + /** + * \brief Invalidates resolve cache for a given set of image subresources + * + * Must be called any time the given set of subresources of this + * resource is written, so that the corresponding resolve image + * can get updated the next time it is read. Must not be called + * for any subresource that is only being read, since that may + * cause problems with read-only depth-stencil access. + * \param [in] image The multisampled image + * \param [in] subresources Image subresources written + */ + void invalidate( + const DxvkImage& image, + const VkImageSubresourceRange& subresources); + + /** + * \brief Cleans up resolve image cache + * + * Destroys resolve images that have not been used in a while + * in order to reduce memory wasted on unused images. + * \param [in] trackingId Current context command list ID + */ + void cleanup( + uint64_t trackingId); + + private: + + Rc m_device; + + std::vector m_resolveViews; + std::vector m_resolveOps; + + void addResolveOp( + DxvkImplicitResolveView& view); + + void cleanup( + VkDeviceSize allocationSize, + uint64_t trackingId); + + }; + +} diff --git a/src/dxvk/dxvk_sparse.h b/src/dxvk/dxvk_sparse.h index e9f6f919f..6897ea9dd 100644 --- a/src/dxvk/dxvk_sparse.h +++ b/src/dxvk/dxvk_sparse.h @@ -535,6 +535,16 @@ namespace dxvk { return Rc::unsafeCreate(this); } + /** + * \brief Queries tracking ID + * + * Used to determine when a resource has last been used. + * \returns Tracking ID + */ + uint64_t getTrackId() const { + return m_trackId >> 1u; + } + /** * \brief Sets tracked command list ID * diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index 9b2b07356..31ca70406 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -89,6 +89,7 @@ dxvk_src = [ 'dxvk_gpu_query.cpp', 'dxvk_graphics.cpp', 'dxvk_image.cpp', + 'dxvk_implicit_resolve.cpp', 'dxvk_instance.cpp', 'dxvk_latency_builtin.cpp', 'dxvk_latency_reflex.cpp',