From c1d6c20066e1422f51a470f805e0d9496d80f6f4 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Thu, 11 Jan 2018 12:23:55 +0100 Subject: [PATCH] [d3d11] Implemented D3D11 parts of append/consume buffers --- src/d3d11/d3d11_context.cpp | 62 +++++++++++++++++++------ src/d3d11/d3d11_context.h | 5 +++ src/d3d11/d3d11_context_state.h | 1 + src/d3d11/d3d11_device.cpp | 62 ++++++++++++++++++++++++- src/d3d11/d3d11_device.h | 13 ++++++ src/d3d11/d3d11_include.h | 5 +++ src/d3d11/d3d11_uav.cpp | 59 ++++++++++++++++++++++++ src/d3d11/d3d11_uav.h | 80 +++++++++++++++++++++++++++++++++ src/d3d11/d3d11_view.h | 11 ++--- src/d3d11/meson.build | 1 + src/dxbc/dxbc_util.cpp | 8 ++-- src/dxbc/dxbc_util.h | 1 + src/dxvk/dxvk_limits.h | 2 +- 13 files changed, 284 insertions(+), 26 deletions(-) create mode 100644 src/d3d11/d3d11_uav.cpp create mode 100644 src/d3d11/d3d11_uav.h diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index ec455f937..980db1394 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -496,7 +496,18 @@ namespace dxvk { ID3D11Buffer* pDstBuffer, UINT DstAlignedByteOffset, ID3D11UnorderedAccessView* pSrcView) { - Logger::err("D3D11DeviceContext::CopyStructureCount: Not implemented"); + auto buf = static_cast(pDstBuffer); + auto uav = static_cast(pSrcView); + + const DxvkBufferSlice dstSlice = buf->GetBufferSlice(DstAlignedByteOffset); + const DxvkBufferSlice srcSlice = uav->GetCounterSlice(); + + m_context->copyBuffer( + dstSlice.buffer(), + dstSlice.offset(), + srcSlice.buffer(), + srcSlice.offset(), + sizeof(uint32_t)); } @@ -504,7 +515,7 @@ namespace dxvk { ID3D11RenderTargetView* pRenderTargetView, const FLOAT ColorRGBA[4]) { auto rtv = static_cast(pRenderTargetView); - const Rc dxvkView = rtv->GetDXVKImageView(); + const Rc dxvkView = rtv->GetImageView(); // Find out whether the given attachment is currently bound // or not, and if it is, which attachment index it has. @@ -573,7 +584,7 @@ namespace dxvk { FLOAT Depth, UINT8 Stencil) { auto dsv = static_cast(pDepthStencilView); - const Rc dxvkView = dsv->GetDXVKImageView(); + const Rc dxvkView = dsv->GetImageView(); VkClearDepthStencilValue clearValue; clearValue.depth = Depth; @@ -1471,15 +1482,17 @@ namespace dxvk { UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUnorderedAccessViews, const UINT* pUAVInitialCounts) { - // TODO implement append-consume buffers -// if (pUAVInitialCounts != nullptr) -// Logger::err("D3D11DeviceContext: pUAVInitialCounts not supported"); - this->BindUnorderedAccessViews( DxbcProgramType::ComputeShader, m_state.cs.unorderedAccessViews, StartSlot, NumUAVs, ppUnorderedAccessViews); + + if (pUAVInitialCounts != nullptr) { + this->InitUnorderedAccessViewCounters( + NumUAVs, ppUnorderedAccessViews, + pUAVInitialCounts); + } } @@ -1566,11 +1579,11 @@ namespace dxvk { for (UINT i = 0; i < m_state.om.renderTargetViews.size(); i++) { if (m_state.om.renderTargetViews.at(i) != nullptr) - attachments.setColorTarget(i, m_state.om.renderTargetViews.at(i)->GetDXVKImageView()); + attachments.setColorTarget(i, m_state.om.renderTargetViews.at(i)->GetImageView()); } if (m_state.om.depthStencilView != nullptr) - attachments.setDepthTarget(m_state.om.depthStencilView->GetDXVKImageView()); + attachments.setDepthTarget(m_state.om.depthStencilView->GetImageView()); if (attachments.hasAttachments()) framebuffer = m_device->createFramebuffer(attachments); @@ -1884,10 +1897,10 @@ namespace dxvk { // Figure out what we have to bind based on the resource type if (resView->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) { m_context->bindResourceTexelBuffer( - slotId + i, resView->GetDXVKBufferView()); + slotId + i, resView->GetBufferView()); } else { m_context->bindResourceImage( - slotId + i, resView->GetDXVKImageView()); + slotId + i, resView->GetImageView()); } } else { // When unbinding a resource, it doesn't really matter if @@ -1920,10 +1933,10 @@ namespace dxvk { // Figure out what we have to bind based on the resource type if (uav->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) { m_context->bindResourceTexelBuffer( - slotId + i, uav->GetDXVKBufferView()); + slotId + i, uav->GetBufferView()); } else { m_context->bindResourceImage( - slotId + i, uav->GetDXVKImageView()); + slotId + i, uav->GetImageView()); } } else { // When unbinding a resource, it doesn't really matter if @@ -1936,6 +1949,29 @@ namespace dxvk { } + void D3D11DeviceContext::InitUnorderedAccessViewCounters( + UINT NumUAVs, + ID3D11UnorderedAccessView* const* ppUnorderedAccessViews, + const UINT* pUAVInitialCounts) { + for (uint32_t i = 0; i < NumUAVs; i++) { + auto uav = static_cast(ppUnorderedAccessViews[i]); + + if (uav != nullptr) { + const DxvkBufferSlice counterSlice = uav->GetCounterSlice(); + const D3D11UavCounter counterValue = { pUAVInitialCounts[i] }; + + if (counterSlice.handle() != VK_NULL_HANDLE) { + m_context->updateBuffer( + counterSlice.buffer(), + counterSlice.offset(), + counterSlice.length(), + &counterValue); + } + } + } + } + + void D3D11DeviceContext::ApplyViewportState() { // We cannot set less than one viewport in Vulkan, and // rendering with no active viewport is illegal anyway. diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 30bab91bc..1ec86c008 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -594,6 +594,11 @@ namespace dxvk { UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUnorderedAccessViews); + void InitUnorderedAccessViewCounters( + UINT NumUAVs, + ID3D11UnorderedAccessView* const* ppUnorderedAccessViews, + const UINT* pUAVInitialCounts); + void ApplyViewportState(); Rc CreateDefaultSampler(); diff --git a/src/d3d11/d3d11_context_state.h b/src/d3d11/d3d11_context_state.h index 1ed1241e9..66ad097c1 100644 --- a/src/d3d11/d3d11_context_state.h +++ b/src/d3d11/d3d11_context_state.h @@ -7,6 +7,7 @@ #include "d3d11_sampler.h" #include "d3d11_shader.h" #include "d3d11_state.h" +#include "d3d11_uav.h" #include "d3d11_view.h" namespace dxvk { diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp index 4f5e0b415..098ff1d71 100644 --- a/src/d3d11/d3d11_device.cpp +++ b/src/d3d11/d3d11_device.cpp @@ -37,6 +37,8 @@ namespace dxvk { m_context = new D3D11DeviceContext(this, m_dxvkDevice); m_resourceInitContext = m_dxvkDevice->createContext(); + + CreateCounterBuffer(); } @@ -378,10 +380,18 @@ namespace dxvk { return S_FALSE; try { + // Fetch a buffer slice for atomic + // append/consume functionality. + DxvkBufferSlice counterSlice; + + if (desc.Buffer.Flags & (D3D11_BUFFER_UAV_FLAG_APPEND | D3D11_BUFFER_UAV_FLAG_COUNTER)) + counterSlice = AllocateCounterSlice(); + *ppUAView = ref(new D3D11UnorderedAccessView( this, pResource, desc, m_dxvkDevice->createBufferView( - resource->GetBufferSlice().buffer(), viewInfo))); + resource->GetBufferSlice().buffer(), viewInfo), + counterSlice)); return S_OK; } catch (const DxvkError& e) { Logger::err(e.message()); @@ -458,7 +468,8 @@ namespace dxvk { *ppUAView = ref(new D3D11UnorderedAccessView( this, pResource, desc, m_dxvkDevice->createImageView( - textureInfo->image, viewInfo))); + textureInfo->image, viewInfo), + DxvkBufferSlice())); return S_OK; } catch (const DxvkError& e) { Logger::err(e.message()); @@ -1273,6 +1284,27 @@ namespace dxvk { } + DxvkBufferSlice D3D11Device::AllocateCounterSlice() { + std::lock_guard lock(m_counterMutex); + + if (m_counterSlices.size() == 0) + throw DxvkError("D3D11Device: Failed to allocate counter slice"); + + uint32_t sliceId = m_counterSlices.back(); + m_counterSlices.pop_back(); + + return DxvkBufferSlice(m_counterBuffer, + sizeof(D3D11UavCounter) * sliceId, + sizeof(D3D11UavCounter)); + } + + + void D3D11Device::FreeCounterSlice(const DxvkBufferSlice& Slice) { + std::lock_guard lock(m_counterMutex); + m_counterSlices.push_back(Slice.offset() / sizeof(D3D11UavCounter)); + } + + VkPipelineStageFlags D3D11Device::GetEnabledShaderStages() const { VkPipelineStageFlags enabledShaderPipelineStages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT @@ -1751,4 +1783,30 @@ namespace dxvk { return S_OK; } + + void D3D11Device::CreateCounterBuffer() { + const uint32_t MaxCounterStructs = 1 << 16; + + // The counter buffer is used as a storage buffer + DxvkBufferCreateInfo info; + info.size = MaxCounterStructs * sizeof(D3D11UavCounter); + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT + | VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT + | GetEnabledShaderStages(); + info.access = VK_ACCESS_TRANSFER_READ_BIT + | VK_ACCESS_TRANSFER_WRITE_BIT + | VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_SHADER_WRITE_BIT; + m_counterBuffer = m_dxvkDevice->createBuffer( + info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + // Init the counter struct allocator as well + m_counterSlices.resize(MaxCounterStructs); + + for (uint32_t i = 0; i < MaxCounterStructs; i++) + m_counterSlices[i] = MaxCounterStructs - i - 1; + } + } diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h index 668dea5b3..ec1069fed 100644 --- a/src/d3d11/d3d11_device.h +++ b/src/d3d11/d3d11_device.h @@ -1,5 +1,8 @@ #pragma once +#include +#include + #include "../dxbc/dxbc_options.h" #include "../dxgi/dxgi_object.h" @@ -231,6 +234,10 @@ namespace dxvk { return m_dxvkDevice; } + DxvkBufferSlice AllocateCounterSlice(); + + void FreeCounterSlice(const DxvkBufferSlice& Slice); + VkPipelineStageFlags GetEnabledShaderStages() const; DxgiFormatInfo STDMETHODCALLTYPE LookupFormat( @@ -261,6 +268,10 @@ namespace dxvk { D3D11DeviceContext* m_context = nullptr; + std::mutex m_counterMutex; + std::vector m_counterSlices; + Rc m_counterBuffer; + std::mutex m_resourceInitMutex; Rc m_resourceInitContext; @@ -303,6 +314,8 @@ namespace dxvk { HRESULT GetFormatSupportFlags(DXGI_FORMAT Format, UINT* pFlags) const; + void CreateCounterBuffer(); + }; } diff --git a/src/d3d11/d3d11_include.h b/src/d3d11/d3d11_include.h index 57fa8acc0..0aefc5260 100644 --- a/src/d3d11/d3d11_include.h +++ b/src/d3d11/d3d11_include.h @@ -30,6 +30,11 @@ typedef struct D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS { typedef enum D3D11_BUFFEREX_SRV_FLAG { D3D11_BUFFEREX_SRV_FLAG_RAW = 1 } D3D11_BUFFEREX_SRV_FLAG; +typedef enum D3D11_UAV_FLAG { + D3D11_BUFFER_UAV_FLAG_RAW = 0x1, + D3D11_BUFFER_UAV_FLAG_APPEND = 0x2, + D3D11_BUFFER_UAV_FLAG_COUNTER = 0x4 +} D3D11_UAV_FLAG; typedef struct D3D11_QUERY_DATA_PIPELINE_STATISTICS { UINT64 IAVertices; UINT64 IAPrimitives; diff --git a/src/d3d11/d3d11_uav.cpp b/src/d3d11/d3d11_uav.cpp new file mode 100644 index 000000000..cf10c959a --- /dev/null +++ b/src/d3d11/d3d11_uav.cpp @@ -0,0 +1,59 @@ +#include "d3d11_device.h" +#include "d3d11_uav.h" + +namespace dxvk { + + D3D11UnorderedAccessView::D3D11UnorderedAccessView( + D3D11Device* device, + ID3D11Resource* resource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC& desc, + const Rc& bufferView, + const DxvkBufferSlice& counterSlice) + : m_device(device), m_resource(resource), + m_desc(desc), m_bufferView(bufferView), + m_counterSlice(counterSlice) { } + + + D3D11UnorderedAccessView::D3D11UnorderedAccessView( + D3D11Device* device, + ID3D11Resource* resource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC& desc, + const Rc& imageView, + const DxvkBufferSlice& counterSlice) + : m_device(device), m_resource(resource), + m_desc(desc), m_imageView(imageView), + m_counterSlice(counterSlice) { } + + + D3D11UnorderedAccessView::~D3D11UnorderedAccessView() { + if (m_counterSlice.handle() != VK_NULL_HANDLE) + m_device->FreeCounterSlice(m_counterSlice); + } + + + HRESULT STDMETHODCALLTYPE D3D11UnorderedAccessView::QueryInterface(REFIID riid, void** ppvObject) { + COM_QUERY_IFACE(riid, ppvObject, IUnknown); + COM_QUERY_IFACE(riid, ppvObject, ID3D11DeviceChild); + COM_QUERY_IFACE(riid, ppvObject, ID3D11View); + COM_QUERY_IFACE(riid, ppvObject, ID3D11UnorderedAccessView); + + Logger::warn("ID3D11UnorderedAccessView::QueryInterface: Unknown interface query"); + return E_NOINTERFACE; + } + + + void STDMETHODCALLTYPE D3D11UnorderedAccessView::GetDevice(ID3D11Device** ppDevice) { + *ppDevice = m_device.ref(); + } + + + void STDMETHODCALLTYPE D3D11UnorderedAccessView::GetResource(ID3D11Resource** ppResource) { + *ppResource = m_resource.ref(); + } + + + void STDMETHODCALLTYPE D3D11UnorderedAccessView::GetDesc(D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc) { + *pDesc = m_desc; + } + +} diff --git a/src/d3d11/d3d11_uav.h b/src/d3d11/d3d11_uav.h new file mode 100644 index 000000000..105490880 --- /dev/null +++ b/src/d3d11/d3d11_uav.h @@ -0,0 +1,80 @@ +#pragma once + +#include "../dxvk/dxvk_device.h" + +#include "d3d11_device_child.h" + +namespace dxvk { + + class D3D11Device; + + struct D3D11UavCounter { + uint32_t atomicCtr; + }; + + + /** + * \brief Unordered access view + * + * Unordered access views are special in that they can + * have counters, which can be used inside shaders to + * atomically append or consume structures. + */ + class D3D11UnorderedAccessView : public D3D11DeviceChild { + + public: + + D3D11UnorderedAccessView( + D3D11Device* device, + ID3D11Resource* resource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC& desc, + const Rc& bufferView, + const DxvkBufferSlice& counterSlice); + + D3D11UnorderedAccessView( + D3D11Device* device, + ID3D11Resource* resource, + const D3D11_UNORDERED_ACCESS_VIEW_DESC& desc, + const Rc& imageView, + const DxvkBufferSlice& counterSlice); + + ~D3D11UnorderedAccessView(); + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, void** ppvObject) final; + + void STDMETHODCALLTYPE GetDevice(ID3D11Device** ppDevice) final; + + void STDMETHODCALLTYPE GetResource(ID3D11Resource** ppResource) final; + + void STDMETHODCALLTYPE GetDesc(D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc) final; + + D3D11_RESOURCE_DIMENSION GetResourceType() const { + D3D11_RESOURCE_DIMENSION type; + m_resource->GetType(&type); + return type; + } + + Rc GetBufferView() const { + return m_bufferView; + } + + Rc GetImageView() const { + return m_imageView; + } + + DxvkBufferSlice GetCounterSlice() const { + return m_counterSlice; + } + + private: + + Com m_device; + Com m_resource; + D3D11_UNORDERED_ACCESS_VIEW_DESC m_desc; + Rc m_bufferView; + Rc m_imageView; + DxvkBufferSlice m_counterSlice; + + }; + +} diff --git a/src/d3d11/d3d11_view.h b/src/d3d11/d3d11_view.h index da0ff062f..ad5020558 100644 --- a/src/d3d11/d3d11_view.h +++ b/src/d3d11/d3d11_view.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "../dxvk/dxvk_device.h" #include "d3d11_device_child.h" @@ -60,17 +60,17 @@ namespace dxvk { *pDesc = m_desc; } - D3D11_RESOURCE_DIMENSION GetResourceType() { + D3D11_RESOURCE_DIMENSION GetResourceType() const { D3D11_RESOURCE_DIMENSION type; m_resource->GetType(&type); return type; } - Rc GetDXVKBufferView() { + Rc GetBufferView() const { return m_bufferView; } - Rc GetDXVKImageView() { + Rc GetImageView() const { return m_imageView; } @@ -94,7 +94,4 @@ namespace dxvk { using D3D11DepthStencilView = D3D11ResourceView< ID3D11DepthStencilView, D3D11_DEPTH_STENCIL_VIEW_DESC>; - using D3D11UnorderedAccessView = D3D11ResourceView< - ID3D11UnorderedAccessView, D3D11_UNORDERED_ACCESS_VIEW_DESC>; - } diff --git a/src/d3d11/meson.build b/src/d3d11/meson.build index 51de23ece..e720d90f3 100644 --- a/src/d3d11/meson.build +++ b/src/d3d11/meson.build @@ -15,6 +15,7 @@ d3d11_src = [ 'd3d11_shader.cpp', 'd3d11_state.cpp', 'd3d11_texture.cpp', + 'd3d11_uav.cpp', 'd3d11_util.cpp', ] diff --git a/src/dxbc/dxbc_util.cpp b/src/dxbc/dxbc_util.cpp index 3064b7534..5bb25d1a1 100644 --- a/src/dxbc/dxbc_util.cpp +++ b/src/dxbc/dxbc_util.cpp @@ -11,13 +11,14 @@ namespace dxvk { // 14 - 29: Samplers // 30 - 157: Shader resources // 158 - 221: Uniform access views - const uint32_t stageOffset = 12 + 158 * 5; + const uint32_t stageOffset = 20 + 158 * 5; switch (bindingType) { case DxbcBindingType::ConstantBuffer: return bindingIndex + stageOffset + 0; case DxbcBindingType::ImageSampler: return bindingIndex + stageOffset + 14; case DxbcBindingType::ShaderResource: return bindingIndex + stageOffset + 30; case DxbcBindingType::UnorderedAccessView:return bindingIndex + stageOffset + 158; + case DxbcBindingType::UavCounter: return bindingIndex + stageOffset + 222; default: Logger::err("computeResourceSlotId: Invalid resource type"); } } else { @@ -28,11 +29,12 @@ namespace dxvk { // 0 - 13: Constant buffers // 14 - 29: Samplers // 30 - 157: Shader resources - const uint32_t stageOffset = 12 + 158 * static_cast(shaderStage); + const uint32_t stageOffset = 20 + 158 * static_cast(shaderStage); switch (bindingType) { case DxbcBindingType::UnorderedAccessView:return bindingIndex + 0; - case DxbcBindingType::StreamOutputBuffer: return bindingIndex + 8; + case DxbcBindingType::UavCounter: return bindingIndex + 8; + case DxbcBindingType::StreamOutputBuffer: return bindingIndex + 16; case DxbcBindingType::ConstantBuffer: return bindingIndex + stageOffset + 0; case DxbcBindingType::ImageSampler: return bindingIndex + stageOffset + 14; case DxbcBindingType::ShaderResource: return bindingIndex + stageOffset + 30; diff --git a/src/dxbc/dxbc_util.h b/src/dxbc/dxbc_util.h index f37502677..f830b96ba 100644 --- a/src/dxbc/dxbc_util.h +++ b/src/dxbc/dxbc_util.h @@ -17,6 +17,7 @@ namespace dxvk { ImageSampler = 2, UnorderedAccessView = 3, StreamOutputBuffer = 4, + UavCounter = 5, }; diff --git a/src/dxvk/dxvk_limits.h b/src/dxvk/dxvk_limits.h index 06364f691..617501dd1 100644 --- a/src/dxvk/dxvk_limits.h +++ b/src/dxvk/dxvk_limits.h @@ -10,7 +10,7 @@ namespace dxvk { MaxNumVertexBindings = 32, MaxNumOutputStreams = 4, MaxNumViewports = 16, - MaxNumResourceSlots = 1024, + MaxNumResourceSlots = 1096, MaxNumActiveBindings = 128, };