From 86148ec070628f5a89fbb0a91603bae2ce89529a Mon Sep 17 00:00:00 2001 From: Adam Moss Date: Fri, 17 Sep 2021 13:30:49 -0700 Subject: [PATCH] [d3d11,dxvk] Implement DXVK pieces required for DX11 DLSS support Notably, fairly generic functions to create/launch/destroy Cuda kernels, and methods to fetch GPU virtual addresses and handles for DX11 resources. Note: Requires some corresponding dxvk-nvapi changes for DLSS to be initialized successfully. --- src/d3d11/d3d11_context.cpp | 3 +- src/d3d11/d3d11_context_ext.cpp | 60 +++++- src/d3d11/d3d11_context_ext.h | 18 +- src/d3d11/d3d11_cuda.cpp | 51 +++++ src/d3d11/d3d11_cuda.h | 83 +++++++++ src/d3d11/d3d11_device.cpp | 317 +++++++++++++++++++++++++++++++- src/d3d11/d3d11_device.h | 60 +++++- src/d3d11/d3d11_initializer.cpp | 7 +- src/d3d11/d3d11_interfaces.h | 78 +++++++- src/d3d11/meson.build | 1 + src/dxvk/dxvk_adapter.cpp | 11 +- src/dxvk/dxvk_cmdlist.h | 3 + src/dxvk/dxvk_context.cpp | 76 +++++++- src/dxvk/dxvk_context.h | 18 ++ src/dxvk/dxvk_extensions.h | 3 + src/dxvk/dxvk_image.h | 15 ++ src/vulkan/vulkan_loader.h | 17 ++ 17 files changed, 798 insertions(+), 23 deletions(-) create mode 100644 src/d3d11/d3d11_cuda.cpp create mode 100644 src/d3d11/d3d11_cuda.h diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index ad239d71..45af2f7b 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -48,7 +48,8 @@ namespace dxvk { return S_OK; } - if (riid == __uuidof(ID3D11VkExtContext)) { + if (riid == __uuidof(ID3D11VkExtContext) + || riid == __uuidof(ID3D11VkExtContext1)) { *ppvObject = ref(&m_contextExt); return S_OK; } diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp index 2f91c8bf..c892f7ef 100644 --- a/src/d3d11/d3d11_context_ext.cpp +++ b/src/d3d11/d3d11_context_ext.cpp @@ -1,4 +1,12 @@ +#include +#include +#include + +#include "d3d11_device.h" #include "d3d11_context.h" +#include "d3d11_cuda.h" + +#include "../util/log/log.h" namespace dxvk { @@ -136,5 +144,55 @@ namespace dxvk { ctx->setBarrierControl(cFlags); }); } - + + + bool STDMETHODCALLTYPE D3D11DeviceContextExt::LaunchCubinShaderNVX(IUnknown* hShader, uint32_t GridX, uint32_t GridY, uint32_t GridZ, + const void* pParams, uint32_t ParamSize, void* const* pReadResources, uint32_t NumReadResources, void* const* pWriteResources, uint32_t NumWriteResources) { + D3D10DeviceLock lock = m_ctx->LockContext(); + + CubinShaderWrapper* cubinShader = static_cast(hShader); + CubinShaderLaunchInfo launchInfo; + + const uint32_t maxResources = NumReadResources + NumWriteResources; + launchInfo.buffers.reserve(maxResources); + launchInfo.images.reserve(maxResources); + + for (uint32_t i = 0; i < NumReadResources; i++) + launchInfo.insertResource(static_cast(pReadResources[i]), DxvkAccess::Read); + + for (uint32_t i = 0; i < NumWriteResources; i++) + launchInfo.insertResource(static_cast(pWriteResources[i]), DxvkAccess::Write); + + launchInfo.paramSize = ParamSize; + launchInfo.params.resize(launchInfo.paramSize); + std::memcpy(launchInfo.params.data(), pParams, ParamSize); + + launchInfo.cuLaunchConfig[0] = reinterpret_cast(0x01); // CU_LAUNCH_PARAM_BUFFER_POINTER + launchInfo.cuLaunchConfig[1] = launchInfo.params.data(); + launchInfo.cuLaunchConfig[2] = reinterpret_cast(0x02); // CU_LAUNCH_PARAM_BUFFER_SIZE + launchInfo.cuLaunchConfig[3] = &launchInfo.paramSize; // yes, this actually requires a pointer to a size_t containing the parameter size + launchInfo.cuLaunchConfig[4] = reinterpret_cast(0x00); // CU_LAUNCH_PARAM_END + + launchInfo.nvxLaunchInfo.function = cubinShader->cuFunction(); + launchInfo.nvxLaunchInfo.gridDimX = GridX; + launchInfo.nvxLaunchInfo.gridDimY = GridY; + launchInfo.nvxLaunchInfo.gridDimZ = GridZ; + launchInfo.nvxLaunchInfo.blockDimX = cubinShader->blockDim().width; + launchInfo.nvxLaunchInfo.blockDimY = cubinShader->blockDim().height; + launchInfo.nvxLaunchInfo.blockDimZ = cubinShader->blockDim().depth; + launchInfo.nvxLaunchInfo.sharedMemBytes = 0; + launchInfo.nvxLaunchInfo.paramCount = 0; + launchInfo.nvxLaunchInfo.pParams = nullptr; + launchInfo.nvxLaunchInfo.extraCount = 1; + launchInfo.nvxLaunchInfo.pExtras = launchInfo.cuLaunchConfig.data(); + + launchInfo.shader = cubinShader; + + /* Need to capture by value in case this gets called from a deferred context */ + m_ctx->EmitCs([cLaunchInfo = std::move(launchInfo)] (DxvkContext* ctx) { + ctx->launchCuKernelNVX(cLaunchInfo.nvxLaunchInfo, cLaunchInfo.buffers, cLaunchInfo.images); + }); + + return true; + } } diff --git a/src/d3d11/d3d11_context_ext.h b/src/d3d11/d3d11_context_ext.h index bec7d233..2109a0de 100644 --- a/src/d3d11/d3d11_context_ext.h +++ b/src/d3d11/d3d11_context_ext.h @@ -6,7 +6,7 @@ namespace dxvk { class D3D11DeviceContext; - class D3D11DeviceContextExt : public ID3D11VkExtContext { + class D3D11DeviceContextExt : public ID3D11VkExtContext1 { public: @@ -56,11 +56,23 @@ namespace dxvk { void STDMETHODCALLTYPE SetBarrierControl( UINT ControlFlags); - + + bool STDMETHODCALLTYPE LaunchCubinShaderNVX( + IUnknown* hShader, + uint32_t GridX, + uint32_t GridY, + uint32_t GridZ, + const void* pParams, + uint32_t paramSize, + void* const* pReadResources, + uint32_t NumReadResources, + void* const* pWriteResources, + uint32_t NumWriteResources); + private: D3D11DeviceContext* m_ctx; - + }; } diff --git a/src/d3d11/d3d11_cuda.cpp b/src/d3d11/d3d11_cuda.cpp new file mode 100644 index 00000000..4a31d347 --- /dev/null +++ b/src/d3d11/d3d11_cuda.cpp @@ -0,0 +1,51 @@ +#include "d3d11_cuda.h" + +namespace dxvk { + + CubinShaderWrapper::CubinShaderWrapper(const Rc& dxvkDevice, VkCuModuleNVX cuModule, VkCuFunctionNVX cuFunction, VkExtent3D blockDim) + : m_dxvkDevice(dxvkDevice), m_module(cuModule), m_function(cuFunction), m_blockDim(blockDim) { }; + + + CubinShaderWrapper::~CubinShaderWrapper() { + VkDevice vkDevice = m_dxvkDevice->handle(); + m_dxvkDevice->vkd()->vkDestroyCuFunctionNVX(vkDevice, m_function, nullptr); + m_dxvkDevice->vkd()->vkDestroyCuModuleNVX(vkDevice, m_module, nullptr); + }; + + + HRESULT STDMETHODCALLTYPE CubinShaderWrapper::QueryInterface(REFIID riid, void **ppvObject) { + if (riid == __uuidof(IUnknown)) { + *ppvObject = ref(this); + return S_OK; + } + + Logger::warn("CubinShaderWrapper::QueryInterface: Unknown interface query"); + Logger::warn(str::format(riid)); + return E_NOINTERFACE; + } + + + void CubinShaderLaunchInfo::insertResource(ID3D11Resource* pResource, DxvkAccessFlags access) { + auto img = GetCommonTexture(pResource); + auto buf = GetCommonBuffer(pResource); + + if (img) + insertUniqueResource(images, img->GetImage(), access); + if (buf) + insertUniqueResource(buffers, buf->GetBuffer(), access); + } + + + template + void CubinShaderLaunchInfo::insertUniqueResource(std::vector>& list, const T& resource, DxvkAccessFlags access) { + for (auto& entry : list) { + if (entry.first == resource) { + entry.second.set(access); + return; + } + } + + list.push_back({ resource, access }); + } + +} diff --git a/src/d3d11/d3d11_cuda.h b/src/d3d11/d3d11_cuda.h new file mode 100644 index 00000000..a9fcdf4b --- /dev/null +++ b/src/d3d11/d3d11_cuda.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include + +#include "../dxvk/dxvk_resource.h" + +#include "../util/com/com_guid.h" +#include "../util/com/com_object.h" + +#include "d3d11_buffer.h" +#include "d3d11_texture.h" + +namespace dxvk { + + class CubinShaderWrapper : public ComObject { + + public: + + CubinShaderWrapper(const Rc& dxvkDevice, VkCuModuleNVX cuModule, VkCuFunctionNVX cuFunction, VkExtent3D blockDim); + ~CubinShaderWrapper(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void **ppvObject); + + VkCuModuleNVX cuModule() const { + return m_module; + } + + VkCuFunctionNVX cuFunction() const { + return m_function; + } + + VkExtent3D blockDim() const { + return m_blockDim; + } + + private: + + Rc m_dxvkDevice; + VkCuModuleNVX m_module; + VkCuFunctionNVX m_function; + VkExtent3D m_blockDim; + + }; + + + struct CubinShaderLaunchInfo { + + CubinShaderLaunchInfo() = default; + + CubinShaderLaunchInfo(CubinShaderLaunchInfo&& other) { + shader = std::move(other.shader); + params = std::move(other.params); + paramSize = std::move(other.paramSize); + nvxLaunchInfo = std::move(other.nvxLaunchInfo); + cuLaunchConfig = other.cuLaunchConfig; + buffers = std::move(other.buffers); + images = std::move(other.images); + other.cuLaunchConfig[1] = nullptr; + other.cuLaunchConfig[3] = nullptr; + other.nvxLaunchInfo.pExtras = nullptr; + // fix-up internally-pointing pointers + cuLaunchConfig[1] = params.data(); + cuLaunchConfig[3] = ¶mSize; + nvxLaunchInfo.pExtras = cuLaunchConfig.data(); + } + + Com shader; + std::vector params; + size_t paramSize; + VkCuLaunchInfoNVX nvxLaunchInfo = { VK_STRUCTURE_TYPE_CU_LAUNCH_INFO_NVX }; + std::array cuLaunchConfig; + + std::vector, DxvkAccessFlags>> buffers; + std::vector, DxvkAccessFlags>> images; + + void insertResource(ID3D11Resource* pResource, DxvkAccessFlags access); + + template + static void insertUniqueResource(std::vector>& list, const T& resource, DxvkAccessFlags access); + }; + +} diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp index 21f0d1b4..33ee28f2 100644 --- a/src/d3d11/d3d11_device.cpp +++ b/src/d3d11/d3d11_device.cpp @@ -2445,12 +2445,326 @@ namespace dxvk { case D3D11_VK_EXT_DEPTH_BOUNDS: return deviceFeatures.core.features.depthBounds; + case D3D11_VK_NVX_IMAGE_VIEW_HANDLE: + return deviceExtensions.nvxImageViewHandle; + + case D3D11_VK_NVX_BINARY_IMPORT: + return deviceExtensions.nvxBinaryImport + && deviceExtensions.khrBufferDeviceAddress; + default: return false; } } + bool STDMETHODCALLTYPE D3D11DeviceExt::GetCudaTextureObjectNVX(uint32_t srvDriverHandle, uint32_t samplerDriverHandle, uint32_t* pCudaTextureHandle) { + ID3D11ShaderResourceView* srv = HandleToSrvNVX(srvDriverHandle); + + if (!srv) { + Logger::warn(str::format("GetCudaTextureObjectNVX() failure - srv handle wasn't found: ", srvDriverHandle)); + return false; + } + + ID3D11SamplerState* samplerState = HandleToSamplerNVX(samplerDriverHandle); + + if (!samplerState) { + Logger::warn(str::format("GetCudaTextureObjectNVX() failure - sampler handle wasn't found: ", samplerDriverHandle)); + return false; + } + + D3D11SamplerState* pSS = static_cast(samplerState); + Rc pDSS = pSS->GetDXVKSampler(); + VkSampler vkSampler = pDSS->handle(); + + D3D11ShaderResourceView* pSRV = static_cast(srv); + Rc pIV = pSRV->GetImageView(); + VkImageView vkImageView = pIV->handle(); + + VkImageViewHandleInfoNVX imageViewHandleInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX}; + imageViewHandleInfo.imageView = vkImageView; + imageViewHandleInfo.sampler = vkSampler; + imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + + // note: there's no implicit lifetime management here; it's up to the + // app to keep the sampler and SRV alive as long as it wants to use this + // derived handle. + VkDevice vkDevice = m_device->GetDXVKDevice()->handle(); + *pCudaTextureHandle = m_device->GetDXVKDevice()->vkd()->vkGetImageViewHandleNVX(vkDevice, &imageViewHandleInfo); + + if (!*pCudaTextureHandle) { + Logger::warn("GetCudaTextureObjectNVX() handle==0 - failed"); + return false; + } + + return true; + } + + + bool STDMETHODCALLTYPE D3D11DeviceExt::CreateCubinComputeShaderWithNameNVX(const void* pCubin, uint32_t size, + uint32_t blockX, uint32_t blockY, uint32_t blockZ, const char* pShaderName, IUnknown** phShader) { + Rc dxvkDevice = m_device->GetDXVKDevice(); + VkDevice vkDevice = dxvkDevice->handle(); + + VkCuModuleCreateInfoNVX moduleCreateInfo = { VK_STRUCTURE_TYPE_CU_MODULE_CREATE_INFO_NVX }; + moduleCreateInfo.pData = pCubin; + moduleCreateInfo.dataSize = size; + + VkCuModuleNVX cuModule; + VkCuFunctionNVX cuFunction; + VkResult result; + + if ((result = dxvkDevice->vkd()->vkCreateCuModuleNVX(vkDevice, &moduleCreateInfo, nullptr, &cuModule))) { + Logger::warn(str::format("CreateCubinComputeShaderWithNameNVX() - failure to create module - result=", result, " pcubindata=", pCubin, " cubinsize=", size)); + return false; // failure + } + + VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX }; + functionCreateInfo.module = cuModule; + functionCreateInfo.pName = pShaderName; + + if ((result = dxvkDevice->vkd()->vkCreateCuFunctionNVX(vkDevice, &functionCreateInfo, nullptr, &cuFunction))) { + dxvkDevice->vkd()->vkDestroyCuModuleNVX(vkDevice, cuModule, nullptr); + Logger::warn(str::format("CreateCubinComputeShaderWithNameNVX() - failure to create function - result=", result)); + return false; + } + + *phShader = ref(new CubinShaderWrapper(dxvkDevice, + cuModule, cuFunction, { blockX, blockY, blockZ })); + return true; + } + + + bool STDMETHODCALLTYPE D3D11DeviceExt::GetResourceHandleGPUVirtualAddressAndSizeNVX(void* hObject, uint64_t* gpuVAStart, uint64_t* gpuVASize) { + // The hObject 'opaque driver handle' is really just a straight cast + // of the corresponding ID3D11Resource* in dxvk/dxvknvapi + ID3D11Resource* pResource = static_cast(hObject); + + D3D11_COMMON_RESOURCE_DESC resourceDesc; + if (FAILED(GetCommonResourceDesc(pResource, &resourceDesc))) { + Logger::warn("GetResourceHandleGPUVirtualAddressAndSize() - GetCommonResourceDesc() failed"); + return false; + } + + switch (resourceDesc.Dim) { + case D3D11_RESOURCE_DIMENSION_BUFFER: + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + // okay - we can deal with those two dimensions + break; + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: + case D3D11_RESOURCE_DIMENSION_UNKNOWN: + default: + Logger::warn(str::format("GetResourceHandleGPUVirtualAddressAndSize(?) - failure - unsupported dimension: ", resourceDesc.Dim)); + return false; + } + + Rc dxvkDevice = m_device->GetDXVKDevice(); + VkDevice vkDevice = dxvkDevice->handle(); + + if (resourceDesc.Dim == D3D11_RESOURCE_DIMENSION_TEXTURE2D) { + D3D11CommonTexture *texture = GetCommonTexture(pResource); + Rc dxvkImage = texture->GetImage(); + if (0 == (dxvkImage->info().usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT))) { + Logger::warn(str::format("GetResourceHandleGPUVirtualAddressAndSize(res=", pResource,") image info missing required usage bit(s); can't be used for vkGetImageViewHandleNVX - failure")); + return false; + } + + // The d3d11 nvapi provides us a texture but vulkan only lets us get the GPU address from an imageview. So, make a private imageview and get the address from that... + + D3D11_SHADER_RESOURCE_VIEW_DESC resourceViewDesc; + + const D3D11_COMMON_TEXTURE_DESC *texDesc = texture->Desc(); + if (texDesc->ArraySize != 1) { + Logger::debug(str::format("GetResourceHandleGPUVirtualAddressAndSize(?) - unexpected array size: ", texDesc->ArraySize)); + } + resourceViewDesc.Format = texDesc->Format; + resourceViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + resourceViewDesc.Texture2D.MostDetailedMip = 0; + resourceViewDesc.Texture2D.MipLevels = texDesc->MipLevels; + + Com pNewSRV; + HRESULT hr = m_device->CreateShaderResourceView(pResource, &resourceViewDesc, &pNewSRV); + if (FAILED(hr)) { + Logger::warn("GetResourceHandleGPUVirtualAddressAndSize() - private CreateShaderResourceView() failed"); + return false; + } + + Rc dxvkImageView = static_cast(pNewSRV.ptr())->GetImageView(); + VkImageView vkImageView = dxvkImageView->handle(); + + VkImageViewAddressPropertiesNVX imageViewAddressProperties = {VK_STRUCTURE_TYPE_IMAGE_VIEW_ADDRESS_PROPERTIES_NVX}; + + VkResult res = dxvkDevice->vkd()->vkGetImageViewAddressNVX(vkDevice, vkImageView, &imageViewAddressProperties); + if (res != VK_SUCCESS) { + Logger::warn(str::format("GetResourceHandleGPUVirtualAddressAndSize(): vkGetImageViewAddressNVX() result is failure: ", res)); + return false; + } + + *gpuVAStart = imageViewAddressProperties.deviceAddress; + *gpuVASize = imageViewAddressProperties.size; + } + else if (resourceDesc.Dim == D3D11_RESOURCE_DIMENSION_BUFFER) { + D3D11Buffer *buffer = GetCommonBuffer(pResource); + const DxvkBufferSliceHandle bufSliceHandle = buffer->GetBuffer()->getSliceHandle(); + VkBuffer vkBuffer = bufSliceHandle.handle; + + VkBufferDeviceAddressInfoKHR bdaInfo = { VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR }; + bdaInfo.buffer = vkBuffer; + VkDeviceAddress bufAddr = dxvkDevice->vkd()->vkGetBufferDeviceAddressKHR(vkDevice, &bdaInfo); + *gpuVAStart = uint64_t(bufAddr) + bufSliceHandle.offset; + *gpuVASize = bufSliceHandle.length; + } + + if (!*gpuVAStart) + Logger::warn("GetResourceHandleGPUVirtualAddressAndSize() addr==0 - unexpected"); // ... but not explicitly a failure; continue + + return true; + } + + + bool STDMETHODCALLTYPE D3D11DeviceExt::CreateUnorderedAccessViewAndGetDriverHandleNVX(ID3D11Resource* pResource, const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, ID3D11UnorderedAccessView** ppUAV, uint32_t* pDriverHandle) { + D3D11_COMMON_RESOURCE_DESC resourceDesc; + if (!SUCCEEDED(GetCommonResourceDesc(pResource, &resourceDesc))) { + Logger::warn("CreateUnorderedAccessViewAndGetDriverHandleNVX() - GetCommonResourceDesc() failed"); + return false; + } + if (resourceDesc.Dim != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { + Logger::warn(str::format("CreateUnorderedAccessViewAndGetDriverHandleNVX() - failure - unsupported dimension: ", resourceDesc.Dim)); + return false; + } + + auto texture = GetCommonTexture(pResource); + Rc dxvkImage = texture->GetImage(); + if (0 == (dxvkImage->info().usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT))) { + Logger::warn(str::format("CreateUnorderedAccessViewAndGetDriverHandleNVX(res=", pResource, ") image info missing required usage bit(s); can't be used for vkGetImageViewHandleNVX - failure")); + return false; + } + + if (!SUCCEEDED(m_device->CreateUnorderedAccessView(pResource, pDesc, ppUAV))) { + return false; + } + + D3D11UnorderedAccessView *pUAV = static_cast(*ppUAV); + Rc dxvkDevice = m_device->GetDXVKDevice(); + VkDevice vkDevice = dxvkDevice->handle(); + + VkImageViewHandleInfoNVX imageViewHandleInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX}; + Rc dxvkImageView = pUAV->GetImageView(); + VkImageView vkImageView = dxvkImageView->handle(); + + imageViewHandleInfo.imageView = vkImageView; + imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + + *pDriverHandle = dxvkDevice->vkd()->vkGetImageViewHandleNVX(vkDevice, &imageViewHandleInfo); + + if (!*pDriverHandle) { + Logger::warn("CreateUnorderedAccessViewAndGetDriverHandleNVX() handle==0 - failure"); + pUAV->Release(); + return false; + } + + return true; + } + + + bool STDMETHODCALLTYPE D3D11DeviceExt::CreateShaderResourceViewAndGetDriverHandleNVX(ID3D11Resource* pResource, const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, ID3D11ShaderResourceView** ppSRV, uint32_t* pDriverHandle) { + D3D11_COMMON_RESOURCE_DESC resourceDesc; + if (!SUCCEEDED(GetCommonResourceDesc(pResource, &resourceDesc))) { + Logger::warn("CreateShaderResourceViewAndGetDriverHandleNVX() - GetCommonResourceDesc() failed"); + return false; + } + if (resourceDesc.Dim != D3D11_RESOURCE_DIMENSION_TEXTURE2D) { + Logger::warn(str::format("CreateShaderResourceViewAndGetDriverHandleNVX() - failure - unsupported dimension: ", resourceDesc.Dim)); + return false; + } + + auto texture = GetCommonTexture(pResource); + Rc dxvkImage = texture->GetImage(); + if (0 == (dxvkImage->info().usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT))) { + Logger::warn(str::format("CreateShaderResourceViewAndGetDriverHandleNVX(res=", pResource, ") image info missing required usage bit(s); can't be used for vkGetImageViewHandleNVX - failure")); + return false; + } + + if (!SUCCEEDED(m_device->CreateShaderResourceView(pResource, pDesc, ppSRV))) { + return false; + } + + D3D11ShaderResourceView* pSRV = static_cast(*ppSRV); + Rc dxvkDevice = m_device->GetDXVKDevice(); + VkDevice vkDevice = dxvkDevice->handle(); + + VkImageViewHandleInfoNVX imageViewHandleInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX}; + Rc dxvkImageView = pSRV->GetImageView(); + VkImageView vkImageView = dxvkImageView->handle(); + + imageViewHandleInfo.imageView = vkImageView; + imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + + *pDriverHandle = dxvkDevice->vkd()->vkGetImageViewHandleNVX(vkDevice, &imageViewHandleInfo); + + if (!*pDriverHandle) { + Logger::warn("CreateShaderResourceViewAndGetDriverHandleNVX() handle==0 - failure"); + pSRV->Release(); + return false; + } + + // will need to look-up resource from uint32 handle later + AddSrvAndHandleNVX(*ppSRV, *pDriverHandle); + return true; + } + + + bool STDMETHODCALLTYPE D3D11DeviceExt::CreateSamplerStateAndGetDriverHandleNVX(const D3D11_SAMPLER_DESC* pSamplerDesc, ID3D11SamplerState** ppSamplerState, uint32_t* pDriverHandle) { + if (!SUCCEEDED(m_device->CreateSamplerState(pSamplerDesc, ppSamplerState))) { + return false; + } + + // for our purposes the actual value doesn't matter, only its uniqueness + static ULONG seqNum = 1; + *pDriverHandle = InterlockedIncrement(&seqNum); + + // will need to look-up sampler from uint32 handle later + AddSamplerAndHandleNVX(*ppSamplerState, *pDriverHandle); + return true; + } + + + void D3D11DeviceExt::AddSamplerAndHandleNVX(ID3D11SamplerState* pSampler, uint32_t Handle) { + std::lock_guard lock(m_mapLock); + m_samplerHandleToPtr[Handle] = pSampler; + } + + + ID3D11SamplerState* D3D11DeviceExt::HandleToSamplerNVX(uint32_t Handle) { + std::lock_guard lock(m_mapLock); + auto got = m_samplerHandleToPtr.find(Handle); + + if (got == m_samplerHandleToPtr.end()) + return nullptr; + + return static_cast(got->second); + } + + + void D3D11DeviceExt::AddSrvAndHandleNVX(ID3D11ShaderResourceView* pSrv, uint32_t Handle) { + std::lock_guard lock(m_mapLock); + m_srvHandleToPtr[Handle] = pSrv; + } + + + ID3D11ShaderResourceView* D3D11DeviceExt::HandleToSrvNVX(uint32_t Handle) { + std::lock_guard lock(m_mapLock); + auto got = m_srvHandleToPtr.find(Handle); + + if (got == m_srvHandleToPtr.end()) + return nullptr; + + return static_cast(got->second); + } + + + D3D11VideoDevice::D3D11VideoDevice( @@ -2827,7 +3141,8 @@ namespace dxvk { return S_OK; } - if (riid == __uuidof(ID3D11VkExtDevice)) { + if (riid == __uuidof(ID3D11VkExtDevice) + || riid == __uuidof(ID3D11VkExtDevice1)) { *ppvObject = ref(&m_d3d11DeviceExt); return S_OK; } diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h index 935b0e60..7de5cf91 100644 --- a/src/d3d11/d3d11_device.h +++ b/src/d3d11/d3d11_device.h @@ -15,6 +15,7 @@ #include "../util/com/com_private_data.h" #include "d3d11_cmdlist.h" +#include "d3d11_cuda.h" #include "d3d11_initializer.h" #include "d3d11_interfaces.h" #include "d3d11_interop.h" @@ -494,7 +495,7 @@ namespace dxvk { /** * \brief Extended D3D11 device */ - class D3D11DeviceExt : public ID3D11VkExtDevice { + class D3D11DeviceExt : public ID3D11VkExtDevice1 { public: @@ -513,14 +514,67 @@ namespace dxvk { BOOL STDMETHODCALLTYPE GetExtensionSupport( D3D11_VK_EXTENSION Extension); + bool STDMETHODCALLTYPE GetCudaTextureObjectNVX( + uint32_t srvDriverHandle, + uint32_t samplerDriverHandle, + uint32_t* pCudaTextureHandle); + + bool STDMETHODCALLTYPE CreateCubinComputeShaderWithNameNVX( + const void* pCubin, + uint32_t size, + uint32_t blockX, + uint32_t blockY, + uint32_t blockZ, + const char* pShaderName, + IUnknown** phShader); + + bool STDMETHODCALLTYPE GetResourceHandleGPUVirtualAddressAndSizeNVX( + void* hObject, + uint64_t* gpuVAStart, + uint64_t* gpuVASize); + + bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( + ID3D11Resource* pResource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, + ID3D11UnorderedAccessView** ppUAV, + uint32_t* pDriverHandle); + + bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( + ID3D11Resource* pResource, + const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, + ID3D11ShaderResourceView** ppSRV, + uint32_t* pDriverHandle); + + bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( + const D3D11_SAMPLER_DESC* pSamplerDesc, + ID3D11SamplerState** ppSamplerState, + uint32_t* pDriverHandle); + private: D3D11DXGIDevice* m_container; D3D11Device* m_device; + void AddSamplerAndHandleNVX( + ID3D11SamplerState* pSampler, + uint32_t Handle); + + ID3D11SamplerState* HandleToSamplerNVX( + uint32_t Handle); + + void AddSrvAndHandleNVX( + ID3D11ShaderResourceView* pSrv, + uint32_t Handle); + + ID3D11ShaderResourceView* HandleToSrvNVX( + uint32_t Handle); + + dxvk::mutex m_mapLock; + std::unordered_map m_samplerHandleToPtr; + std::unordered_map m_srvHandleToPtr; }; - - + + /** * \brief D3D11 video device */ diff --git a/src/d3d11/d3d11_initializer.cpp b/src/d3d11/d3d11_initializer.cpp index 2bace428..d1d66f19 100644 --- a/src/d3d11/d3d11_initializer.cpp +++ b/src/d3d11/d3d11_initializer.cpp @@ -264,12 +264,7 @@ namespace dxvk { // Initialize the image on the GPU std::lock_guard lock(m_mutex); - VkImageSubresourceRange subresources; - subresources.aspectMask = image->formatInfo()->aspectMask; - subresources.baseMipLevel = 0; - subresources.levelCount = image->info().mipLevels; - subresources.baseArrayLayer = 0; - subresources.layerCount = image->info().numLayers; + VkImageSubresourceRange subresources = image->getAvailableSubresources(); m_context->initImage(image, subresources, VK_IMAGE_LAYOUT_PREINITIALIZED); diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h index 03d0ca55..3b0629bb 100644 --- a/src/d3d11/d3d11_interfaces.h +++ b/src/d3d11/d3d11_interfaces.h @@ -14,6 +14,8 @@ enum D3D11_VK_EXTENSION : uint32_t { D3D11_VK_EXT_MULTI_DRAW_INDIRECT_COUNT = 1, D3D11_VK_EXT_DEPTH_BOUNDS = 2, D3D11_VK_EXT_BARRIER_CONTROL = 3, + D3D11_VK_NVX_BINARY_IMPORT = 4, + D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5, }; @@ -45,6 +47,54 @@ ID3D11VkExtDevice : public IUnknown { }; +/** + * \brief Extended extended D3D11 device + * + * Introduces methods to get virtual addresses and driver + * handles for resources, and create and destroy objects + * for D3D11-Cuda interop. + */ +MIDL_INTERFACE("cfcf64ef-9586-46d0-bca4-97cf2ca61b06") +ID3D11VkExtDevice1 : public ID3D11VkExtDevice { + + virtual bool STDMETHODCALLTYPE GetResourceHandleGPUVirtualAddressAndSizeNVX( + void* hObject, + uint64_t* gpuVAStart, + uint64_t* gpuVASize) = 0; + + virtual bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( + ID3D11Resource* pResource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, + ID3D11UnorderedAccessView** ppUAV, + uint32_t* pDriverHandle) = 0; + + virtual bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( + ID3D11Resource* pResource, + const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, + ID3D11ShaderResourceView** ppSRV, + uint32_t* pDriverHandle) = 0; + + virtual bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( + const D3D11_SAMPLER_DESC* pSamplerDesc, + ID3D11SamplerState** ppSamplerState, + uint32_t* pDriverHandle) = 0; + + virtual bool STDMETHODCALLTYPE CreateCubinComputeShaderWithNameNVX( + const void* pCubin, + uint32_t size, + uint32_t blockX, + uint32_t blockY, + uint32_t blockZ, + const char* pShaderName, + IUnknown** phShader) = 0; + + virtual bool STDMETHODCALLTYPE GetCudaTextureObjectNVX( + uint32_t srvDriverHandle, + uint32_t samplerDriverHandle, + uint32_t* pCudaTextureHandle) = 0; +}; + + /** * \brief Extended D3D11 context * @@ -88,13 +138,39 @@ ID3D11VkExtContext : public IUnknown { virtual void STDMETHODCALLTYPE SetBarrierControl( UINT ControlFlags) = 0; - }; + +/** + * \brief Extended extended D3D11 context + * + * Provides functionality to launch a Cuda kernel + */ +MIDL_INTERFACE("874b09b2-ae0b-41d8-8476-5f3b7a0e879d") +ID3D11VkExtContext1 : public ID3D11VkExtContext { + + virtual bool STDMETHODCALLTYPE LaunchCubinShaderNVX( + IUnknown* hShader, + uint32_t gridX, + uint32_t gridY, + uint32_t gridZ, + const void* pParams, + uint32_t paramSize, + void* const* pReadResources, + uint32_t numReadResources, + void* const* pWriteResources, + uint32_t numWriteResources) = 0; +}; + + #ifdef _MSC_VER struct __declspec(uuid("8a6e3c42-f74c-45b7-8265-a231b677ca17")) ID3D11VkExtDevice; +struct __declspec(uuid("cfcf64ef-9586-46d0-bca4-97cf2ca61b06")) ID3D11VkExtDevice1; struct __declspec(uuid("fd0bca13-5cb6-4c3a-987e-4750de2ca791")) ID3D11VkExtContext; +struct __declspec(uuid("874b09b2-ae0b-41d8-8476-5f3b7a0e879d")) ID3D11VkExtContext1; #else __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17); +__CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); +__CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); #endif diff --git a/src/d3d11/meson.build b/src/d3d11/meson.build index a0156c92..332c433c 100644 --- a/src/d3d11/meson.build +++ b/src/d3d11/meson.build @@ -33,6 +33,7 @@ d3d11_src = [ 'd3d11_context_def.cpp', 'd3d11_context_ext.cpp', 'd3d11_context_imm.cpp', + 'd3d11_cuda.cpp', 'd3d11_depth_stencil.cpp', 'd3d11_device.cpp', 'd3d11_enums.cpp', diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp index d0d09c98..c088fe8b 100644 --- a/src/dxvk/dxvk_adapter.cpp +++ b/src/dxvk/dxvk_adapter.cpp @@ -263,7 +263,7 @@ namespace dxvk { DxvkDeviceFeatures enabledFeatures) { DxvkDeviceExtensions devExtensions; - std::array devExtensionList = {{ + std::array devExtensionList = {{ &devExtensions.amdMemoryOverallocationBehaviour, &devExtensions.amdShaderFragmentMask, &devExtensions.ext4444Formats, @@ -289,8 +289,17 @@ namespace dxvk { &devExtensions.khrSamplerMirrorClampToEdge, &devExtensions.khrShaderFloatControls, &devExtensions.khrSwapchain, + &devExtensions.nvxBinaryImport, + &devExtensions.nvxImageViewHandle, + &devExtensions.khrBufferDeviceAddress, }}; + // VK_KHR_buffer_device_address can be expensive to enable on + // some drivers; only enable selectively for Cuda interop + if (m_deviceExtensions.supports(devExtensions.nvxBinaryImport.name()) && + m_deviceExtensions.supports(devExtensions.nvxImageViewHandle.name())) + devExtensions.khrBufferDeviceAddress.setMode(DxvkExtMode::Optional); + DxvkNameSet extensionsEnabled; if (!m_deviceExtensions.enableExtensions( diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index 29761af2..5dda8dbf 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -340,6 +340,9 @@ namespace dxvk { pSizes, pStrides); } + void cmdLaunchCuKernel(VkCuLaunchInfoNVX launchInfo) { + m_vkd->vkCmdCuLaunchKernelNVX(m_execBuffer, &launchInfo); + } void cmdBlitImage( VkImage srcImage, diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index e21bc17c..ff1ad0e5 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include "dxvk_device.h" #include "dxvk_context.h" @@ -302,12 +304,7 @@ namespace dxvk { if (image->info().layout != layout) { this->spillRenderPass(true); - VkImageSubresourceRange subresources; - subresources.aspectMask = image->formatInfo()->aspectMask; - subresources.baseArrayLayer = 0; - subresources.baseMipLevel = 0; - subresources.layerCount = image->info().numLayers; - subresources.levelCount = image->info().mipLevels; + VkImageSubresourceRange subresources = image->getAvailableSubresources(); this->prepareImage(m_execBarriers, image, subresources); @@ -2609,6 +2606,73 @@ namespace dxvk { m_cmd->trackResource(event); } + + void DxvkContext::launchCuKernelNVX( + const VkCuLaunchInfoNVX& nvxLaunchInfo, + const std::vector, DxvkAccessFlags>>& buffers, + const std::vector, DxvkAccessFlags>>& images) { + // The resources in the std::vectors above are called-out + // explicitly in the API for barrier and tracking purposes + // since they're being used bindlessly. + this->spillRenderPass(true); + + VkPipelineStageFlags srcStages = 0; + VkAccessFlags srcAccess = 0; + + for (auto& r : buffers) { + srcStages |= r.first->info().stages; + srcAccess |= r.first->info().access; + } + + for (auto& r : images) { + srcStages |= r.first->info().stages; + srcAccess |= r.first->info().access; + + this->prepareImage(m_execBarriers, r.first, r.first->getAvailableSubresources()); + } + + m_execBarriers.accessMemory(srcStages, srcAccess, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); + m_execBarriers.recordCommands(m_cmd); + + m_cmd->cmdLaunchCuKernel(nvxLaunchInfo); + + for (auto& r : buffers) { + VkAccessFlags accessFlags = (r.second.test(DxvkAccess::Read) * VK_ACCESS_SHADER_READ_BIT) + | (r.second.test(DxvkAccess::Write) * VK_ACCESS_SHADER_WRITE_BIT); + DxvkBufferSliceHandle bufferSlice = r.first->getSliceHandle(); + m_execBarriers.accessBuffer(bufferSlice, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + accessFlags, + r.first->info().stages, + r.first->info().access); + } + + for (auto& r : images) { + VkAccessFlags accessFlags = (r.second.test(DxvkAccess::Read) * VK_ACCESS_SHADER_READ_BIT) + | (r.second.test(DxvkAccess::Write) * VK_ACCESS_SHADER_WRITE_BIT); + m_execBarriers.accessImage(r.first, + r.first->getAvailableSubresources(), + r.first->info().layout, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + accessFlags, + r.first->info().layout, + r.first->info().stages, + r.first->info().access); + } + + for (auto& r : images) { + if (r.second.test(DxvkAccess::Read)) m_cmd->trackResource(r.first); + if (r.second.test(DxvkAccess::Write)) m_cmd->trackResource(r.first); + } + + for (auto& r : buffers) { + if (r.second.test(DxvkAccess::Read)) m_cmd->trackResource(r.first); + if (r.second.test(DxvkAccess::Write)) m_cmd->trackResource(r.first); + } + } + void DxvkContext::writeTimestamp(const Rc& query) { m_queryManager.writeTimestamp(m_cmd, query); diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 1a57f979..46c90375 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -6,6 +6,7 @@ #include "dxvk_context_state.h" #include "dxvk_data.h" #include "dxvk_objects.h" +#include "dxvk_resource.h" #include "dxvk_util.h" namespace dxvk { @@ -988,6 +989,23 @@ namespace dxvk { void setBarrierControl( DxvkBarrierControlFlags control); + /** + * \brief Launches a Cuda kernel + * + * Since the kernel is launched with an opaque set of + * kernel-specific parameters which may reference + * resources bindlessly, such resources must be listed by + * the caller in the 'buffers' and 'images' parameters so + * that their access may be tracked appropriately. + * \param [in] nvxLaunchInfo Kernel launch parameter struct + * \param [in] buffers List of {buffer,read,write} used by kernel + * \param [in] images List of {image,read,write} used by kernel + */ + void launchCuKernelNVX( + const VkCuLaunchInfoNVX& nvxLaunchInfo, + const std::vector, DxvkAccessFlags>>& buffers, + const std::vector, DxvkAccessFlags>>& images); + /** * \brief Signals a GPU event * \param [in] event The event diff --git a/src/dxvk/dxvk_extensions.h b/src/dxvk/dxvk_extensions.h index a9de1966..11073619 100644 --- a/src/dxvk/dxvk_extensions.h +++ b/src/dxvk/dxvk_extensions.h @@ -283,6 +283,9 @@ namespace dxvk { DxvkExt khrSamplerMirrorClampToEdge = { VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, DxvkExtMode::Optional }; DxvkExt khrShaderFloatControls = { VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, DxvkExtMode::Optional }; DxvkExt khrSwapchain = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, DxvkExtMode::Required }; + DxvkExt nvxBinaryImport = { VK_NVX_BINARY_IMPORT_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt nvxImageViewHandle = { VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt khrBufferDeviceAddress = { VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, DxvkExtMode::Disabled }; }; /** diff --git a/src/dxvk/dxvk_image.h b/src/dxvk/dxvk_image.h index 43f5552e..b3b721e0 100644 --- a/src/dxvk/dxvk_image.h +++ b/src/dxvk/dxvk_image.h @@ -298,6 +298,21 @@ namespace dxvk { VkDeviceSize memSize() const { return m_image.memory.length(); } + + /** + * \brief Get full subresource range of the image + * + * \returns Resource range of the whole image + */ + VkImageSubresourceRange getAvailableSubresources() const { + VkImageSubresourceRange result; + result.aspectMask = formatInfo()->aspectMask; + result.baseMipLevel = 0; + result.levelCount = info().mipLevels; + result.baseArrayLayer = 0; + result.layerCount = info().numLayers; + return result; + } private: diff --git a/src/vulkan/vulkan_loader.h b/src/vulkan/vulkan_loader.h index bf1deeb6..52c67a49 100644 --- a/src/vulkan/vulkan_loader.h +++ b/src/vulkan/vulkan_loader.h @@ -347,6 +347,23 @@ namespace dxvk::vk { VULKAN_FN(vkCmdBeginQueryIndexedEXT); VULKAN_FN(vkCmdEndQueryIndexedEXT); #endif + + #ifdef VK_NVX_image_view_handle + VULKAN_FN(vkGetImageViewHandleNVX); + VULKAN_FN(vkGetImageViewAddressNVX); + #endif + + #ifdef VK_NVX_binary_import + VULKAN_FN(vkCreateCuModuleNVX); + VULKAN_FN(vkCreateCuFunctionNVX); + VULKAN_FN(vkDestroyCuModuleNVX); + VULKAN_FN(vkDestroyCuFunctionNVX); + VULKAN_FN(vkCmdCuLaunchKernelNVX); + #endif + + #ifdef VK_KHR_buffer_device_address + VULKAN_FN(vkGetBufferDeviceAddressKHR); + #endif }; }