1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-01-18 02:52:10 +01:00

[d3d11,dxvk] Implement DXVK pieces required for DX11 DLSS support

Notably, fairly generic functions to create/launch/destroy Cuda kernels,
and methods to fetch GPU virtual addresses and handles for DX11 resources.

Note: Requires some corresponding dxvk-nvapi changes for DLSS to
be initialized successfully.
This commit is contained in:
Adam Moss 2021-09-17 13:30:49 -07:00 committed by Philip Rebohle
parent 3a712b766e
commit 86148ec070
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
17 changed files with 798 additions and 23 deletions

View File

@ -48,7 +48,8 @@ namespace dxvk {
return S_OK;
}
if (riid == __uuidof(ID3D11VkExtContext)) {
if (riid == __uuidof(ID3D11VkExtContext)
|| riid == __uuidof(ID3D11VkExtContext1)) {
*ppvObject = ref(&m_contextExt);
return S_OK;
}

View File

@ -1,4 +1,12 @@
#include <vector>
#include <utility>
#include <cstring>
#include "d3d11_device.h"
#include "d3d11_context.h"
#include "d3d11_cuda.h"
#include "../util/log/log.h"
namespace dxvk {
@ -136,5 +144,55 @@ namespace dxvk {
ctx->setBarrierControl(cFlags);
});
}
bool STDMETHODCALLTYPE D3D11DeviceContextExt::LaunchCubinShaderNVX(IUnknown* hShader, uint32_t GridX, uint32_t GridY, uint32_t GridZ,
const void* pParams, uint32_t ParamSize, void* const* pReadResources, uint32_t NumReadResources, void* const* pWriteResources, uint32_t NumWriteResources) {
D3D10DeviceLock lock = m_ctx->LockContext();
CubinShaderWrapper* cubinShader = static_cast<CubinShaderWrapper*>(hShader);
CubinShaderLaunchInfo launchInfo;
const uint32_t maxResources = NumReadResources + NumWriteResources;
launchInfo.buffers.reserve(maxResources);
launchInfo.images.reserve(maxResources);
for (uint32_t i = 0; i < NumReadResources; i++)
launchInfo.insertResource(static_cast<ID3D11Resource*>(pReadResources[i]), DxvkAccess::Read);
for (uint32_t i = 0; i < NumWriteResources; i++)
launchInfo.insertResource(static_cast<ID3D11Resource*>(pWriteResources[i]), DxvkAccess::Write);
launchInfo.paramSize = ParamSize;
launchInfo.params.resize(launchInfo.paramSize);
std::memcpy(launchInfo.params.data(), pParams, ParamSize);
launchInfo.cuLaunchConfig[0] = reinterpret_cast<void*>(0x01); // CU_LAUNCH_PARAM_BUFFER_POINTER
launchInfo.cuLaunchConfig[1] = launchInfo.params.data();
launchInfo.cuLaunchConfig[2] = reinterpret_cast<void*>(0x02); // CU_LAUNCH_PARAM_BUFFER_SIZE
launchInfo.cuLaunchConfig[3] = &launchInfo.paramSize; // yes, this actually requires a pointer to a size_t containing the parameter size
launchInfo.cuLaunchConfig[4] = reinterpret_cast<void*>(0x00); // CU_LAUNCH_PARAM_END
launchInfo.nvxLaunchInfo.function = cubinShader->cuFunction();
launchInfo.nvxLaunchInfo.gridDimX = GridX;
launchInfo.nvxLaunchInfo.gridDimY = GridY;
launchInfo.nvxLaunchInfo.gridDimZ = GridZ;
launchInfo.nvxLaunchInfo.blockDimX = cubinShader->blockDim().width;
launchInfo.nvxLaunchInfo.blockDimY = cubinShader->blockDim().height;
launchInfo.nvxLaunchInfo.blockDimZ = cubinShader->blockDim().depth;
launchInfo.nvxLaunchInfo.sharedMemBytes = 0;
launchInfo.nvxLaunchInfo.paramCount = 0;
launchInfo.nvxLaunchInfo.pParams = nullptr;
launchInfo.nvxLaunchInfo.extraCount = 1;
launchInfo.nvxLaunchInfo.pExtras = launchInfo.cuLaunchConfig.data();
launchInfo.shader = cubinShader;
/* Need to capture by value in case this gets called from a deferred context */
m_ctx->EmitCs([cLaunchInfo = std::move(launchInfo)] (DxvkContext* ctx) {
ctx->launchCuKernelNVX(cLaunchInfo.nvxLaunchInfo, cLaunchInfo.buffers, cLaunchInfo.images);
});
return true;
}
}

View File

@ -6,7 +6,7 @@ namespace dxvk {
class D3D11DeviceContext;
class D3D11DeviceContextExt : public ID3D11VkExtContext {
class D3D11DeviceContextExt : public ID3D11VkExtContext1 {
public:
@ -56,11 +56,23 @@ namespace dxvk {
void STDMETHODCALLTYPE SetBarrierControl(
UINT ControlFlags);
bool STDMETHODCALLTYPE LaunchCubinShaderNVX(
IUnknown* hShader,
uint32_t GridX,
uint32_t GridY,
uint32_t GridZ,
const void* pParams,
uint32_t paramSize,
void* const* pReadResources,
uint32_t NumReadResources,
void* const* pWriteResources,
uint32_t NumWriteResources);
private:
D3D11DeviceContext* m_ctx;
};
}

51
src/d3d11/d3d11_cuda.cpp Normal file
View File

@ -0,0 +1,51 @@
#include "d3d11_cuda.h"
namespace dxvk {
CubinShaderWrapper::CubinShaderWrapper(const Rc<dxvk::DxvkDevice>& dxvkDevice, VkCuModuleNVX cuModule, VkCuFunctionNVX cuFunction, VkExtent3D blockDim)
: m_dxvkDevice(dxvkDevice), m_module(cuModule), m_function(cuFunction), m_blockDim(blockDim) { };
CubinShaderWrapper::~CubinShaderWrapper() {
VkDevice vkDevice = m_dxvkDevice->handle();
m_dxvkDevice->vkd()->vkDestroyCuFunctionNVX(vkDevice, m_function, nullptr);
m_dxvkDevice->vkd()->vkDestroyCuModuleNVX(vkDevice, m_module, nullptr);
};
HRESULT STDMETHODCALLTYPE CubinShaderWrapper::QueryInterface(REFIID riid, void **ppvObject) {
if (riid == __uuidof(IUnknown)) {
*ppvObject = ref(this);
return S_OK;
}
Logger::warn("CubinShaderWrapper::QueryInterface: Unknown interface query");
Logger::warn(str::format(riid));
return E_NOINTERFACE;
}
void CubinShaderLaunchInfo::insertResource(ID3D11Resource* pResource, DxvkAccessFlags access) {
auto img = GetCommonTexture(pResource);
auto buf = GetCommonBuffer(pResource);
if (img)
insertUniqueResource(images, img->GetImage(), access);
if (buf)
insertUniqueResource(buffers, buf->GetBuffer(), access);
}
template<typename T>
void CubinShaderLaunchInfo::insertUniqueResource(std::vector<std::pair<T, DxvkAccessFlags>>& list, const T& resource, DxvkAccessFlags access) {
for (auto& entry : list) {
if (entry.first == resource) {
entry.second.set(access);
return;
}
}
list.push_back({ resource, access });
}
}

83
src/d3d11/d3d11_cuda.h Normal file
View File

@ -0,0 +1,83 @@
#pragma once
#include <utility>
#include <vector>
#include "../dxvk/dxvk_resource.h"
#include "../util/com/com_guid.h"
#include "../util/com/com_object.h"
#include "d3d11_buffer.h"
#include "d3d11_texture.h"
namespace dxvk {
class CubinShaderWrapper : public ComObject<IUnknown> {
public:
CubinShaderWrapper(const Rc<dxvk::DxvkDevice>& dxvkDevice, VkCuModuleNVX cuModule, VkCuFunctionNVX cuFunction, VkExtent3D blockDim);
~CubinShaderWrapper();
HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void **ppvObject);
VkCuModuleNVX cuModule() const {
return m_module;
}
VkCuFunctionNVX cuFunction() const {
return m_function;
}
VkExtent3D blockDim() const {
return m_blockDim;
}
private:
Rc<DxvkDevice> m_dxvkDevice;
VkCuModuleNVX m_module;
VkCuFunctionNVX m_function;
VkExtent3D m_blockDim;
};
struct CubinShaderLaunchInfo {
CubinShaderLaunchInfo() = default;
CubinShaderLaunchInfo(CubinShaderLaunchInfo&& other) {
shader = std::move(other.shader);
params = std::move(other.params);
paramSize = std::move(other.paramSize);
nvxLaunchInfo = std::move(other.nvxLaunchInfo);
cuLaunchConfig = other.cuLaunchConfig;
buffers = std::move(other.buffers);
images = std::move(other.images);
other.cuLaunchConfig[1] = nullptr;
other.cuLaunchConfig[3] = nullptr;
other.nvxLaunchInfo.pExtras = nullptr;
// fix-up internally-pointing pointers
cuLaunchConfig[1] = params.data();
cuLaunchConfig[3] = &paramSize;
nvxLaunchInfo.pExtras = cuLaunchConfig.data();
}
Com<CubinShaderWrapper> shader;
std::vector<uint8_t> params;
size_t paramSize;
VkCuLaunchInfoNVX nvxLaunchInfo = { VK_STRUCTURE_TYPE_CU_LAUNCH_INFO_NVX };
std::array<void*, 5> cuLaunchConfig;
std::vector<std::pair<Rc<DxvkBuffer>, DxvkAccessFlags>> buffers;
std::vector<std::pair<Rc<DxvkImage>, DxvkAccessFlags>> images;
void insertResource(ID3D11Resource* pResource, DxvkAccessFlags access);
template<typename T>
static void insertUniqueResource(std::vector<std::pair<T, DxvkAccessFlags>>& list, const T& resource, DxvkAccessFlags access);
};
}

View File

@ -2445,12 +2445,326 @@ namespace dxvk {
case D3D11_VK_EXT_DEPTH_BOUNDS:
return deviceFeatures.core.features.depthBounds;
case D3D11_VK_NVX_IMAGE_VIEW_HANDLE:
return deviceExtensions.nvxImageViewHandle;
case D3D11_VK_NVX_BINARY_IMPORT:
return deviceExtensions.nvxBinaryImport
&& deviceExtensions.khrBufferDeviceAddress;
default:
return false;
}
}
bool STDMETHODCALLTYPE D3D11DeviceExt::GetCudaTextureObjectNVX(uint32_t srvDriverHandle, uint32_t samplerDriverHandle, uint32_t* pCudaTextureHandle) {
ID3D11ShaderResourceView* srv = HandleToSrvNVX(srvDriverHandle);
if (!srv) {
Logger::warn(str::format("GetCudaTextureObjectNVX() failure - srv handle wasn't found: ", srvDriverHandle));
return false;
}
ID3D11SamplerState* samplerState = HandleToSamplerNVX(samplerDriverHandle);
if (!samplerState) {
Logger::warn(str::format("GetCudaTextureObjectNVX() failure - sampler handle wasn't found: ", samplerDriverHandle));
return false;
}
D3D11SamplerState* pSS = static_cast<D3D11SamplerState*>(samplerState);
Rc<DxvkSampler> pDSS = pSS->GetDXVKSampler();
VkSampler vkSampler = pDSS->handle();
D3D11ShaderResourceView* pSRV = static_cast<D3D11ShaderResourceView*>(srv);
Rc<DxvkImageView> pIV = pSRV->GetImageView();
VkImageView vkImageView = pIV->handle();
VkImageViewHandleInfoNVX imageViewHandleInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX};
imageViewHandleInfo.imageView = vkImageView;
imageViewHandleInfo.sampler = vkSampler;
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
// note: there's no implicit lifetime management here; it's up to the
// app to keep the sampler and SRV alive as long as it wants to use this
// derived handle.
VkDevice vkDevice = m_device->GetDXVKDevice()->handle();
*pCudaTextureHandle = m_device->GetDXVKDevice()->vkd()->vkGetImageViewHandleNVX(vkDevice, &imageViewHandleInfo);
if (!*pCudaTextureHandle) {
Logger::warn("GetCudaTextureObjectNVX() handle==0 - failed");
return false;
}
return true;
}
bool STDMETHODCALLTYPE D3D11DeviceExt::CreateCubinComputeShaderWithNameNVX(const void* pCubin, uint32_t size,
uint32_t blockX, uint32_t blockY, uint32_t blockZ, const char* pShaderName, IUnknown** phShader) {
Rc<DxvkDevice> dxvkDevice = m_device->GetDXVKDevice();
VkDevice vkDevice = dxvkDevice->handle();
VkCuModuleCreateInfoNVX moduleCreateInfo = { VK_STRUCTURE_TYPE_CU_MODULE_CREATE_INFO_NVX };
moduleCreateInfo.pData = pCubin;
moduleCreateInfo.dataSize = size;
VkCuModuleNVX cuModule;
VkCuFunctionNVX cuFunction;
VkResult result;
if ((result = dxvkDevice->vkd()->vkCreateCuModuleNVX(vkDevice, &moduleCreateInfo, nullptr, &cuModule))) {
Logger::warn(str::format("CreateCubinComputeShaderWithNameNVX() - failure to create module - result=", result, " pcubindata=", pCubin, " cubinsize=", size));
return false; // failure
}
VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX };
functionCreateInfo.module = cuModule;
functionCreateInfo.pName = pShaderName;
if ((result = dxvkDevice->vkd()->vkCreateCuFunctionNVX(vkDevice, &functionCreateInfo, nullptr, &cuFunction))) {
dxvkDevice->vkd()->vkDestroyCuModuleNVX(vkDevice, cuModule, nullptr);
Logger::warn(str::format("CreateCubinComputeShaderWithNameNVX() - failure to create function - result=", result));
return false;
}
*phShader = ref(new CubinShaderWrapper(dxvkDevice,
cuModule, cuFunction, { blockX, blockY, blockZ }));
return true;
}
bool STDMETHODCALLTYPE D3D11DeviceExt::GetResourceHandleGPUVirtualAddressAndSizeNVX(void* hObject, uint64_t* gpuVAStart, uint64_t* gpuVASize) {
// The hObject 'opaque driver handle' is really just a straight cast
// of the corresponding ID3D11Resource* in dxvk/dxvknvapi
ID3D11Resource* pResource = static_cast<ID3D11Resource*>(hObject);
D3D11_COMMON_RESOURCE_DESC resourceDesc;
if (FAILED(GetCommonResourceDesc(pResource, &resourceDesc))) {
Logger::warn("GetResourceHandleGPUVirtualAddressAndSize() - GetCommonResourceDesc() failed");
return false;
}
switch (resourceDesc.Dim) {
case D3D11_RESOURCE_DIMENSION_BUFFER:
case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
// okay - we can deal with those two dimensions
break;
case D3D11_RESOURCE_DIMENSION_TEXTURE1D:
case D3D11_RESOURCE_DIMENSION_TEXTURE3D:
case D3D11_RESOURCE_DIMENSION_UNKNOWN:
default:
Logger::warn(str::format("GetResourceHandleGPUVirtualAddressAndSize(?) - failure - unsupported dimension: ", resourceDesc.Dim));
return false;
}
Rc<DxvkDevice> dxvkDevice = m_device->GetDXVKDevice();
VkDevice vkDevice = dxvkDevice->handle();
if (resourceDesc.Dim == D3D11_RESOURCE_DIMENSION_TEXTURE2D) {
D3D11CommonTexture *texture = GetCommonTexture(pResource);
Rc<DxvkImage> dxvkImage = texture->GetImage();
if (0 == (dxvkImage->info().usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT))) {
Logger::warn(str::format("GetResourceHandleGPUVirtualAddressAndSize(res=", pResource,") image info missing required usage bit(s); can't be used for vkGetImageViewHandleNVX - failure"));
return false;
}
// The d3d11 nvapi provides us a texture but vulkan only lets us get the GPU address from an imageview. So, make a private imageview and get the address from that...
D3D11_SHADER_RESOURCE_VIEW_DESC resourceViewDesc;
const D3D11_COMMON_TEXTURE_DESC *texDesc = texture->Desc();
if (texDesc->ArraySize != 1) {
Logger::debug(str::format("GetResourceHandleGPUVirtualAddressAndSize(?) - unexpected array size: ", texDesc->ArraySize));
}
resourceViewDesc.Format = texDesc->Format;
resourceViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
resourceViewDesc.Texture2D.MostDetailedMip = 0;
resourceViewDesc.Texture2D.MipLevels = texDesc->MipLevels;
Com<ID3D11ShaderResourceView> pNewSRV;
HRESULT hr = m_device->CreateShaderResourceView(pResource, &resourceViewDesc, &pNewSRV);
if (FAILED(hr)) {
Logger::warn("GetResourceHandleGPUVirtualAddressAndSize() - private CreateShaderResourceView() failed");
return false;
}
Rc<DxvkImageView> dxvkImageView = static_cast<D3D11ShaderResourceView*>(pNewSRV.ptr())->GetImageView();
VkImageView vkImageView = dxvkImageView->handle();
VkImageViewAddressPropertiesNVX imageViewAddressProperties = {VK_STRUCTURE_TYPE_IMAGE_VIEW_ADDRESS_PROPERTIES_NVX};
VkResult res = dxvkDevice->vkd()->vkGetImageViewAddressNVX(vkDevice, vkImageView, &imageViewAddressProperties);
if (res != VK_SUCCESS) {
Logger::warn(str::format("GetResourceHandleGPUVirtualAddressAndSize(): vkGetImageViewAddressNVX() result is failure: ", res));
return false;
}
*gpuVAStart = imageViewAddressProperties.deviceAddress;
*gpuVASize = imageViewAddressProperties.size;
}
else if (resourceDesc.Dim == D3D11_RESOURCE_DIMENSION_BUFFER) {
D3D11Buffer *buffer = GetCommonBuffer(pResource);
const DxvkBufferSliceHandle bufSliceHandle = buffer->GetBuffer()->getSliceHandle();
VkBuffer vkBuffer = bufSliceHandle.handle;
VkBufferDeviceAddressInfoKHR bdaInfo = { VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR };
bdaInfo.buffer = vkBuffer;
VkDeviceAddress bufAddr = dxvkDevice->vkd()->vkGetBufferDeviceAddressKHR(vkDevice, &bdaInfo);
*gpuVAStart = uint64_t(bufAddr) + bufSliceHandle.offset;
*gpuVASize = bufSliceHandle.length;
}
if (!*gpuVAStart)
Logger::warn("GetResourceHandleGPUVirtualAddressAndSize() addr==0 - unexpected"); // ... but not explicitly a failure; continue
return true;
}
bool STDMETHODCALLTYPE D3D11DeviceExt::CreateUnorderedAccessViewAndGetDriverHandleNVX(ID3D11Resource* pResource, const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, ID3D11UnorderedAccessView** ppUAV, uint32_t* pDriverHandle) {
D3D11_COMMON_RESOURCE_DESC resourceDesc;
if (!SUCCEEDED(GetCommonResourceDesc(pResource, &resourceDesc))) {
Logger::warn("CreateUnorderedAccessViewAndGetDriverHandleNVX() - GetCommonResourceDesc() failed");
return false;
}
if (resourceDesc.Dim != D3D11_RESOURCE_DIMENSION_TEXTURE2D) {
Logger::warn(str::format("CreateUnorderedAccessViewAndGetDriverHandleNVX() - failure - unsupported dimension: ", resourceDesc.Dim));
return false;
}
auto texture = GetCommonTexture(pResource);
Rc<DxvkImage> dxvkImage = texture->GetImage();
if (0 == (dxvkImage->info().usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT))) {
Logger::warn(str::format("CreateUnorderedAccessViewAndGetDriverHandleNVX(res=", pResource, ") image info missing required usage bit(s); can't be used for vkGetImageViewHandleNVX - failure"));
return false;
}
if (!SUCCEEDED(m_device->CreateUnorderedAccessView(pResource, pDesc, ppUAV))) {
return false;
}
D3D11UnorderedAccessView *pUAV = static_cast<D3D11UnorderedAccessView *>(*ppUAV);
Rc<DxvkDevice> dxvkDevice = m_device->GetDXVKDevice();
VkDevice vkDevice = dxvkDevice->handle();
VkImageViewHandleInfoNVX imageViewHandleInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX};
Rc<DxvkImageView> dxvkImageView = pUAV->GetImageView();
VkImageView vkImageView = dxvkImageView->handle();
imageViewHandleInfo.imageView = vkImageView;
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
*pDriverHandle = dxvkDevice->vkd()->vkGetImageViewHandleNVX(vkDevice, &imageViewHandleInfo);
if (!*pDriverHandle) {
Logger::warn("CreateUnorderedAccessViewAndGetDriverHandleNVX() handle==0 - failure");
pUAV->Release();
return false;
}
return true;
}
bool STDMETHODCALLTYPE D3D11DeviceExt::CreateShaderResourceViewAndGetDriverHandleNVX(ID3D11Resource* pResource, const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, ID3D11ShaderResourceView** ppSRV, uint32_t* pDriverHandle) {
D3D11_COMMON_RESOURCE_DESC resourceDesc;
if (!SUCCEEDED(GetCommonResourceDesc(pResource, &resourceDesc))) {
Logger::warn("CreateShaderResourceViewAndGetDriverHandleNVX() - GetCommonResourceDesc() failed");
return false;
}
if (resourceDesc.Dim != D3D11_RESOURCE_DIMENSION_TEXTURE2D) {
Logger::warn(str::format("CreateShaderResourceViewAndGetDriverHandleNVX() - failure - unsupported dimension: ", resourceDesc.Dim));
return false;
}
auto texture = GetCommonTexture(pResource);
Rc<DxvkImage> dxvkImage = texture->GetImage();
if (0 == (dxvkImage->info().usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT))) {
Logger::warn(str::format("CreateShaderResourceViewAndGetDriverHandleNVX(res=", pResource, ") image info missing required usage bit(s); can't be used for vkGetImageViewHandleNVX - failure"));
return false;
}
if (!SUCCEEDED(m_device->CreateShaderResourceView(pResource, pDesc, ppSRV))) {
return false;
}
D3D11ShaderResourceView* pSRV = static_cast<D3D11ShaderResourceView*>(*ppSRV);
Rc<DxvkDevice> dxvkDevice = m_device->GetDXVKDevice();
VkDevice vkDevice = dxvkDevice->handle();
VkImageViewHandleInfoNVX imageViewHandleInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX};
Rc<DxvkImageView> dxvkImageView = pSRV->GetImageView();
VkImageView vkImageView = dxvkImageView->handle();
imageViewHandleInfo.imageView = vkImageView;
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
*pDriverHandle = dxvkDevice->vkd()->vkGetImageViewHandleNVX(vkDevice, &imageViewHandleInfo);
if (!*pDriverHandle) {
Logger::warn("CreateShaderResourceViewAndGetDriverHandleNVX() handle==0 - failure");
pSRV->Release();
return false;
}
// will need to look-up resource from uint32 handle later
AddSrvAndHandleNVX(*ppSRV, *pDriverHandle);
return true;
}
bool STDMETHODCALLTYPE D3D11DeviceExt::CreateSamplerStateAndGetDriverHandleNVX(const D3D11_SAMPLER_DESC* pSamplerDesc, ID3D11SamplerState** ppSamplerState, uint32_t* pDriverHandle) {
if (!SUCCEEDED(m_device->CreateSamplerState(pSamplerDesc, ppSamplerState))) {
return false;
}
// for our purposes the actual value doesn't matter, only its uniqueness
static ULONG seqNum = 1;
*pDriverHandle = InterlockedIncrement(&seqNum);
// will need to look-up sampler from uint32 handle later
AddSamplerAndHandleNVX(*ppSamplerState, *pDriverHandle);
return true;
}
void D3D11DeviceExt::AddSamplerAndHandleNVX(ID3D11SamplerState* pSampler, uint32_t Handle) {
std::lock_guard lock(m_mapLock);
m_samplerHandleToPtr[Handle] = pSampler;
}
ID3D11SamplerState* D3D11DeviceExt::HandleToSamplerNVX(uint32_t Handle) {
std::lock_guard lock(m_mapLock);
auto got = m_samplerHandleToPtr.find(Handle);
if (got == m_samplerHandleToPtr.end())
return nullptr;
return static_cast<ID3D11SamplerState*>(got->second);
}
void D3D11DeviceExt::AddSrvAndHandleNVX(ID3D11ShaderResourceView* pSrv, uint32_t Handle) {
std::lock_guard lock(m_mapLock);
m_srvHandleToPtr[Handle] = pSrv;
}
ID3D11ShaderResourceView* D3D11DeviceExt::HandleToSrvNVX(uint32_t Handle) {
std::lock_guard lock(m_mapLock);
auto got = m_srvHandleToPtr.find(Handle);
if (got == m_srvHandleToPtr.end())
return nullptr;
return static_cast<ID3D11ShaderResourceView*>(got->second);
}
D3D11VideoDevice::D3D11VideoDevice(
@ -2827,7 +3141,8 @@ namespace dxvk {
return S_OK;
}
if (riid == __uuidof(ID3D11VkExtDevice)) {
if (riid == __uuidof(ID3D11VkExtDevice)
|| riid == __uuidof(ID3D11VkExtDevice1)) {
*ppvObject = ref(&m_d3d11DeviceExt);
return S_OK;
}

View File

@ -15,6 +15,7 @@
#include "../util/com/com_private_data.h"
#include "d3d11_cmdlist.h"
#include "d3d11_cuda.h"
#include "d3d11_initializer.h"
#include "d3d11_interfaces.h"
#include "d3d11_interop.h"
@ -494,7 +495,7 @@ namespace dxvk {
/**
* \brief Extended D3D11 device
*/
class D3D11DeviceExt : public ID3D11VkExtDevice {
class D3D11DeviceExt : public ID3D11VkExtDevice1 {
public:
@ -513,14 +514,67 @@ namespace dxvk {
BOOL STDMETHODCALLTYPE GetExtensionSupport(
D3D11_VK_EXTENSION Extension);
bool STDMETHODCALLTYPE GetCudaTextureObjectNVX(
uint32_t srvDriverHandle,
uint32_t samplerDriverHandle,
uint32_t* pCudaTextureHandle);
bool STDMETHODCALLTYPE CreateCubinComputeShaderWithNameNVX(
const void* pCubin,
uint32_t size,
uint32_t blockX,
uint32_t blockY,
uint32_t blockZ,
const char* pShaderName,
IUnknown** phShader);
bool STDMETHODCALLTYPE GetResourceHandleGPUVirtualAddressAndSizeNVX(
void* hObject,
uint64_t* gpuVAStart,
uint64_t* gpuVASize);
bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX(
ID3D11Resource* pResource,
const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc,
ID3D11UnorderedAccessView** ppUAV,
uint32_t* pDriverHandle);
bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX(
ID3D11Resource* pResource,
const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc,
ID3D11ShaderResourceView** ppSRV,
uint32_t* pDriverHandle);
bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX(
const D3D11_SAMPLER_DESC* pSamplerDesc,
ID3D11SamplerState** ppSamplerState,
uint32_t* pDriverHandle);
private:
D3D11DXGIDevice* m_container;
D3D11Device* m_device;
void AddSamplerAndHandleNVX(
ID3D11SamplerState* pSampler,
uint32_t Handle);
ID3D11SamplerState* HandleToSamplerNVX(
uint32_t Handle);
void AddSrvAndHandleNVX(
ID3D11ShaderResourceView* pSrv,
uint32_t Handle);
ID3D11ShaderResourceView* HandleToSrvNVX(
uint32_t Handle);
dxvk::mutex m_mapLock;
std::unordered_map<uint32_t, ID3D11SamplerState*> m_samplerHandleToPtr;
std::unordered_map<uint32_t, ID3D11ShaderResourceView*> m_srvHandleToPtr;
};
/**
* \brief D3D11 video device
*/

View File

@ -264,12 +264,7 @@ namespace dxvk {
// Initialize the image on the GPU
std::lock_guard<dxvk::mutex> lock(m_mutex);
VkImageSubresourceRange subresources;
subresources.aspectMask = image->formatInfo()->aspectMask;
subresources.baseMipLevel = 0;
subresources.levelCount = image->info().mipLevels;
subresources.baseArrayLayer = 0;
subresources.layerCount = image->info().numLayers;
VkImageSubresourceRange subresources = image->getAvailableSubresources();
m_context->initImage(image, subresources, VK_IMAGE_LAYOUT_PREINITIALIZED);

View File

@ -14,6 +14,8 @@ enum D3D11_VK_EXTENSION : uint32_t {
D3D11_VK_EXT_MULTI_DRAW_INDIRECT_COUNT = 1,
D3D11_VK_EXT_DEPTH_BOUNDS = 2,
D3D11_VK_EXT_BARRIER_CONTROL = 3,
D3D11_VK_NVX_BINARY_IMPORT = 4,
D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5,
};
@ -45,6 +47,54 @@ ID3D11VkExtDevice : public IUnknown {
};
/**
* \brief Extended extended D3D11 device
*
* Introduces methods to get virtual addresses and driver
* handles for resources, and create and destroy objects
* for D3D11-Cuda interop.
*/
MIDL_INTERFACE("cfcf64ef-9586-46d0-bca4-97cf2ca61b06")
ID3D11VkExtDevice1 : public ID3D11VkExtDevice {
virtual bool STDMETHODCALLTYPE GetResourceHandleGPUVirtualAddressAndSizeNVX(
void* hObject,
uint64_t* gpuVAStart,
uint64_t* gpuVASize) = 0;
virtual bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX(
ID3D11Resource* pResource,
const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc,
ID3D11UnorderedAccessView** ppUAV,
uint32_t* pDriverHandle) = 0;
virtual bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX(
ID3D11Resource* pResource,
const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc,
ID3D11ShaderResourceView** ppSRV,
uint32_t* pDriverHandle) = 0;
virtual bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX(
const D3D11_SAMPLER_DESC* pSamplerDesc,
ID3D11SamplerState** ppSamplerState,
uint32_t* pDriverHandle) = 0;
virtual bool STDMETHODCALLTYPE CreateCubinComputeShaderWithNameNVX(
const void* pCubin,
uint32_t size,
uint32_t blockX,
uint32_t blockY,
uint32_t blockZ,
const char* pShaderName,
IUnknown** phShader) = 0;
virtual bool STDMETHODCALLTYPE GetCudaTextureObjectNVX(
uint32_t srvDriverHandle,
uint32_t samplerDriverHandle,
uint32_t* pCudaTextureHandle) = 0;
};
/**
* \brief Extended D3D11 context
*
@ -88,13 +138,39 @@ ID3D11VkExtContext : public IUnknown {
virtual void STDMETHODCALLTYPE SetBarrierControl(
UINT ControlFlags) = 0;
};
/**
* \brief Extended extended D3D11 context
*
* Provides functionality to launch a Cuda kernel
*/
MIDL_INTERFACE("874b09b2-ae0b-41d8-8476-5f3b7a0e879d")
ID3D11VkExtContext1 : public ID3D11VkExtContext {
virtual bool STDMETHODCALLTYPE LaunchCubinShaderNVX(
IUnknown* hShader,
uint32_t gridX,
uint32_t gridY,
uint32_t gridZ,
const void* pParams,
uint32_t paramSize,
void* const* pReadResources,
uint32_t numReadResources,
void* const* pWriteResources,
uint32_t numWriteResources) = 0;
};
#ifdef _MSC_VER
struct __declspec(uuid("8a6e3c42-f74c-45b7-8265-a231b677ca17")) ID3D11VkExtDevice;
struct __declspec(uuid("cfcf64ef-9586-46d0-bca4-97cf2ca61b06")) ID3D11VkExtDevice1;
struct __declspec(uuid("fd0bca13-5cb6-4c3a-987e-4750de2ca791")) ID3D11VkExtContext;
struct __declspec(uuid("874b09b2-ae0b-41d8-8476-5f3b7a0e879d")) ID3D11VkExtContext1;
#else
__CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17);
__CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06);
__CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91);
__CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d);
#endif

View File

@ -33,6 +33,7 @@ d3d11_src = [
'd3d11_context_def.cpp',
'd3d11_context_ext.cpp',
'd3d11_context_imm.cpp',
'd3d11_cuda.cpp',
'd3d11_depth_stencil.cpp',
'd3d11_device.cpp',
'd3d11_enums.cpp',

View File

@ -263,7 +263,7 @@ namespace dxvk {
DxvkDeviceFeatures enabledFeatures) {
DxvkDeviceExtensions devExtensions;
std::array<DxvkExt*, 25> devExtensionList = {{
std::array<DxvkExt*, 28> devExtensionList = {{
&devExtensions.amdMemoryOverallocationBehaviour,
&devExtensions.amdShaderFragmentMask,
&devExtensions.ext4444Formats,
@ -289,8 +289,17 @@ namespace dxvk {
&devExtensions.khrSamplerMirrorClampToEdge,
&devExtensions.khrShaderFloatControls,
&devExtensions.khrSwapchain,
&devExtensions.nvxBinaryImport,
&devExtensions.nvxImageViewHandle,
&devExtensions.khrBufferDeviceAddress,
}};
// VK_KHR_buffer_device_address can be expensive to enable on
// some drivers; only enable selectively for Cuda interop
if (m_deviceExtensions.supports(devExtensions.nvxBinaryImport.name()) &&
m_deviceExtensions.supports(devExtensions.nvxImageViewHandle.name()))
devExtensions.khrBufferDeviceAddress.setMode(DxvkExtMode::Optional);
DxvkNameSet extensionsEnabled;
if (!m_deviceExtensions.enableExtensions(

View File

@ -340,6 +340,9 @@ namespace dxvk {
pSizes, pStrides);
}
void cmdLaunchCuKernel(VkCuLaunchInfoNVX launchInfo) {
m_vkd->vkCmdCuLaunchKernelNVX(m_execBuffer, &launchInfo);
}
void cmdBlitImage(
VkImage srcImage,

View File

@ -1,4 +1,6 @@
#include <cstring>
#include <vector>
#include <utility>
#include "dxvk_device.h"
#include "dxvk_context.h"
@ -302,12 +304,7 @@ namespace dxvk {
if (image->info().layout != layout) {
this->spillRenderPass(true);
VkImageSubresourceRange subresources;
subresources.aspectMask = image->formatInfo()->aspectMask;
subresources.baseArrayLayer = 0;
subresources.baseMipLevel = 0;
subresources.layerCount = image->info().numLayers;
subresources.levelCount = image->info().mipLevels;
VkImageSubresourceRange subresources = image->getAvailableSubresources();
this->prepareImage(m_execBarriers, image, subresources);
@ -2609,6 +2606,73 @@ namespace dxvk {
m_cmd->trackResource<DxvkAccess::None>(event);
}
void DxvkContext::launchCuKernelNVX(
const VkCuLaunchInfoNVX& nvxLaunchInfo,
const std::vector<std::pair<Rc<DxvkBuffer>, DxvkAccessFlags>>& buffers,
const std::vector<std::pair<Rc<DxvkImage>, DxvkAccessFlags>>& images) {
// The resources in the std::vectors above are called-out
// explicitly in the API for barrier and tracking purposes
// since they're being used bindlessly.
this->spillRenderPass(true);
VkPipelineStageFlags srcStages = 0;
VkAccessFlags srcAccess = 0;
for (auto& r : buffers) {
srcStages |= r.first->info().stages;
srcAccess |= r.first->info().access;
}
for (auto& r : images) {
srcStages |= r.first->info().stages;
srcAccess |= r.first->info().access;
this->prepareImage(m_execBarriers, r.first, r.first->getAvailableSubresources());
}
m_execBarriers.accessMemory(srcStages, srcAccess,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
m_execBarriers.recordCommands(m_cmd);
m_cmd->cmdLaunchCuKernel(nvxLaunchInfo);
for (auto& r : buffers) {
VkAccessFlags accessFlags = (r.second.test(DxvkAccess::Read) * VK_ACCESS_SHADER_READ_BIT)
| (r.second.test(DxvkAccess::Write) * VK_ACCESS_SHADER_WRITE_BIT);
DxvkBufferSliceHandle bufferSlice = r.first->getSliceHandle();
m_execBarriers.accessBuffer(bufferSlice,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
accessFlags,
r.first->info().stages,
r.first->info().access);
}
for (auto& r : images) {
VkAccessFlags accessFlags = (r.second.test(DxvkAccess::Read) * VK_ACCESS_SHADER_READ_BIT)
| (r.second.test(DxvkAccess::Write) * VK_ACCESS_SHADER_WRITE_BIT);
m_execBarriers.accessImage(r.first,
r.first->getAvailableSubresources(),
r.first->info().layout,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
accessFlags,
r.first->info().layout,
r.first->info().stages,
r.first->info().access);
}
for (auto& r : images) {
if (r.second.test(DxvkAccess::Read)) m_cmd->trackResource<DxvkAccess::Read>(r.first);
if (r.second.test(DxvkAccess::Write)) m_cmd->trackResource<DxvkAccess::Write>(r.first);
}
for (auto& r : buffers) {
if (r.second.test(DxvkAccess::Read)) m_cmd->trackResource<DxvkAccess::Read>(r.first);
if (r.second.test(DxvkAccess::Write)) m_cmd->trackResource<DxvkAccess::Write>(r.first);
}
}
void DxvkContext::writeTimestamp(const Rc<DxvkGpuQuery>& query) {
m_queryManager.writeTimestamp(m_cmd, query);

View File

@ -6,6 +6,7 @@
#include "dxvk_context_state.h"
#include "dxvk_data.h"
#include "dxvk_objects.h"
#include "dxvk_resource.h"
#include "dxvk_util.h"
namespace dxvk {
@ -988,6 +989,23 @@ namespace dxvk {
void setBarrierControl(
DxvkBarrierControlFlags control);
/**
* \brief Launches a Cuda kernel
*
* Since the kernel is launched with an opaque set of
* kernel-specific parameters which may reference
* resources bindlessly, such resources must be listed by
* the caller in the 'buffers' and 'images' parameters so
* that their access may be tracked appropriately.
* \param [in] nvxLaunchInfo Kernel launch parameter struct
* \param [in] buffers List of {buffer,read,write} used by kernel
* \param [in] images List of {image,read,write} used by kernel
*/
void launchCuKernelNVX(
const VkCuLaunchInfoNVX& nvxLaunchInfo,
const std::vector<std::pair<Rc<DxvkBuffer>, DxvkAccessFlags>>& buffers,
const std::vector<std::pair<Rc<DxvkImage>, DxvkAccessFlags>>& images);
/**
* \brief Signals a GPU event
* \param [in] event The event

View File

@ -283,6 +283,9 @@ namespace dxvk {
DxvkExt khrSamplerMirrorClampToEdge = { VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, DxvkExtMode::Optional };
DxvkExt khrShaderFloatControls = { VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, DxvkExtMode::Optional };
DxvkExt khrSwapchain = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, DxvkExtMode::Required };
DxvkExt nvxBinaryImport = { VK_NVX_BINARY_IMPORT_EXTENSION_NAME, DxvkExtMode::Optional };
DxvkExt nvxImageViewHandle = { VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, DxvkExtMode::Optional };
DxvkExt khrBufferDeviceAddress = { VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, DxvkExtMode::Disabled };
};
/**

View File

@ -298,6 +298,21 @@ namespace dxvk {
VkDeviceSize memSize() const {
return m_image.memory.length();
}
/**
* \brief Get full subresource range of the image
*
* \returns Resource range of the whole image
*/
VkImageSubresourceRange getAvailableSubresources() const {
VkImageSubresourceRange result;
result.aspectMask = formatInfo()->aspectMask;
result.baseMipLevel = 0;
result.levelCount = info().mipLevels;
result.baseArrayLayer = 0;
result.layerCount = info().numLayers;
return result;
}
private:

View File

@ -347,6 +347,23 @@ namespace dxvk::vk {
VULKAN_FN(vkCmdBeginQueryIndexedEXT);
VULKAN_FN(vkCmdEndQueryIndexedEXT);
#endif
#ifdef VK_NVX_image_view_handle
VULKAN_FN(vkGetImageViewHandleNVX);
VULKAN_FN(vkGetImageViewAddressNVX);
#endif
#ifdef VK_NVX_binary_import
VULKAN_FN(vkCreateCuModuleNVX);
VULKAN_FN(vkCreateCuFunctionNVX);
VULKAN_FN(vkDestroyCuModuleNVX);
VULKAN_FN(vkDestroyCuFunctionNVX);
VULKAN_FN(vkCmdCuLaunchKernelNVX);
#endif
#ifdef VK_KHR_buffer_device_address
VULKAN_FN(vkGetBufferDeviceAddressKHR);
#endif
};
}