1
0
mirror of https://github.com/doitsujin/dxvk.git synced 2025-02-20 19:54:19 +01:00

Merge branch 'csmt'

This commit is contained in:
Philip Rebohle 2018-01-21 18:06:13 +01:00
commit 847d50d812
No known key found for this signature in database
GPG Key ID: C8CC613427A31C99
16 changed files with 746 additions and 351 deletions

View File

@ -11,7 +11,8 @@ namespace dxvk {
const D3D11_BUFFER_DESC* pDesc)
: m_device (pDevice),
m_desc (*pDesc),
m_buffer (CreateBuffer(pDesc)) {
m_buffer (CreateBuffer(pDesc)),
m_bufferInfo{ m_buffer->slice() } {
}

View File

@ -11,6 +11,17 @@ namespace dxvk {
class D3D11DeviceContext;
/**
* \brief Common buffer info
*
* Stores where the buffer was last
* mapped on the immediate context.
*/
struct D3D11BufferInfo {
DxvkPhysicalBufferSlice mappedSlice;
};
class D3D11Buffer : public D3D11DeviceChild<ID3D11Buffer> {
static constexpr VkDeviceSize BufferSliceAlignment = 64;
public:
@ -49,12 +60,17 @@ namespace dxvk {
return DxvkBufferSlice(m_buffer, offset, m_buffer->info().size - offset);
}
D3D11BufferInfo* GetBufferInfo() {
return &m_bufferInfo;
}
private:
const Com<D3D11Device> m_device;
const D3D11_BUFFER_DESC m_desc;
Rc<DxvkBuffer> m_buffer;
D3D11BufferInfo m_bufferInfo;
Rc<DxvkBuffer> CreateBuffer(
const D3D11_BUFFER_DESC* pDesc) const;

View File

@ -12,11 +12,9 @@ namespace dxvk {
D3D11DeviceContext::D3D11DeviceContext(
D3D11Device* parent,
Rc<DxvkDevice> device)
: m_parent(parent),
m_device(device) {
m_context = m_device->createContext();
m_context->beginRecording(
m_device->createCommandList());
: m_parent (parent),
m_device (device),
m_csChunk (new DxvkCsChunk()) {
// Create default state objects. We won't ever return them
// to the application, but we'll use them to apply state.
Com<ID3D11BlendState> defaultBlendState;
@ -30,21 +28,33 @@ namespace dxvk {
// Apply default state to the context. This is required
// in order to initialize the DXVK contex properly.
m_defaultBlendState = static_cast<D3D11BlendState*>(defaultBlendState.ptr());
m_defaultBlendState->BindToContext(m_context, 0xFFFFFFFF);
m_defaultBlendState = static_cast<D3D11BlendState*> (defaultBlendState.ptr());
m_defaultDepthStencilState = static_cast<D3D11DepthStencilState*>(defaultDepthStencilState.ptr());
m_defaultDepthStencilState->BindToContext(m_context);
m_defaultRasterizerState = static_cast<D3D11RasterizerState*>(defaultRasterizerState.ptr());
m_defaultRasterizerState->BindToContext(m_context);
m_context->setBlendConstants(m_state.om.blendFactor);
m_context->setStencilReference(m_state.om.stencilRef);
m_defaultRasterizerState = static_cast<D3D11RasterizerState*> (defaultRasterizerState.ptr());
// Create a default sampler that we're going to bind
// when the application binds null to a sampler slot.
m_defaultSampler = CreateDefaultSampler();
EmitCs([
dev = m_device,
bsState = m_defaultBlendState,
dsState = m_defaultDepthStencilState,
rsState = m_defaultRasterizerState,
blendConst = DxvkBlendConstants {
m_state.om.blendFactor[0], m_state.om.blendFactor[1],
m_state.om.blendFactor[2], m_state.om.blendFactor[3] },
stencilRef = m_state.om.stencilRef
] (DxvkContext* ctx) {
ctx->beginRecording(dev->createCommandList());
bsState->BindToContext(ctx, 0xFFFFFFFF);
dsState->BindToContext(ctx);
rsState->BindToContext(ctx);
ctx->setBlendConstants (blendConst);
ctx->setStencilReference(stencilRef);
});
}
@ -262,10 +272,20 @@ namespace dxvk {
srcSubresource.mipLevel,
srcSubresource.arrayLayer, 1 };
m_context->copyImage(
dstTextureInfo->image, dstLayers, dstOffset,
srcTextureInfo->image, srcLayers, srcOffset,
extent);
EmitCs([
cDstImage = dstTextureInfo->image,
cSrcImage = srcTextureInfo->image,
cDstLayers = dstLayers,
cSrcLayers = srcLayers,
cDstOffset = dstOffset,
cSrcOffset = srcOffset,
cExtent = extent
] (DxvkContext* ctx) {
ctx->copyImage(
cDstImage, cDstLayers, cDstOffset,
cSrcImage, cSrcLayers, cSrcOffset,
cExtent);
});
}
}
@ -293,12 +313,17 @@ namespace dxvk {
return;
}
m_context->copyBuffer(
dstBuffer.buffer(),
dstBuffer.offset(),
srcBuffer.buffer(),
srcBuffer.offset(),
srcBuffer.length());
EmitCs([
cDstBuffer = std::move(dstBuffer),
cSrcBuffer = std::move(srcBuffer)
] (DxvkContext* ctx) {
ctx->copyBuffer(
cDstBuffer.buffer(),
cDstBuffer.offset(),
cSrcBuffer.buffer(),
cSrcBuffer.offset(),
cSrcBuffer.length());
});
} else {
const D3D11TextureInfo* dstTextureInfo = GetCommonTextureInfo(pDstResource);
const D3D11TextureInfo* srcTextureInfo = GetCommonTextureInfo(pSrcResource);
@ -316,11 +341,19 @@ namespace dxvk {
const VkImageSubresourceLayers srcLayers = {
dstFormatInfo->aspectMask & srcFormatInfo->aspectMask,
i, 0, srcTextureInfo->image->info().numLayers };
m_context->copyImage(
dstTextureInfo->image, dstLayers, VkOffset3D { 0, 0, 0 },
srcTextureInfo->image, srcLayers, VkOffset3D { 0, 0, 0 },
extent);
EmitCs([
cDstImage = dstTextureInfo->image,
cSrcImage = srcTextureInfo->image,
cDstLayers = dstLayers,
cSrcLayers = srcLayers,
cExtent = extent
] (DxvkContext* ctx) {
ctx->copyImage(
cDstImage, cDstLayers, VkOffset3D { 0, 0, 0 },
cSrcImage, cSrcLayers, VkOffset3D { 0, 0, 0 },
cExtent);
});
}
}
}
@ -333,15 +366,17 @@ namespace dxvk {
auto buf = static_cast<D3D11Buffer*>(pDstBuffer);
auto uav = static_cast<D3D11UnorderedAccessView*>(pSrcView);
const DxvkBufferSlice dstSlice = buf->GetBufferSlice(DstAlignedByteOffset);
const DxvkBufferSlice srcSlice = uav->GetCounterSlice();
m_context->copyBuffer(
dstSlice.buffer(),
dstSlice.offset(),
srcSlice.buffer(),
srcSlice.offset(),
sizeof(uint32_t));
EmitCs([
cDstSlice = buf->GetBufferSlice(DstAlignedByteOffset),
cSrcSlice = uav->GetCounterSlice()
] (DxvkContext* ctx) {
ctx->copyBuffer(
cDstSlice.buffer(),
cDstSlice.offset(),
cSrcSlice.buffer(),
cSrcSlice.offset(),
sizeof(uint32_t));
});
}
@ -388,12 +423,22 @@ namespace dxvk {
if (m_parent->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0)
clearRect.layerCount = 1;
m_context->clearRenderTarget(clearInfo, clearRect);
EmitCs([
cClearInfo = clearInfo,
cClearRect = clearRect
] (DxvkContext* ctx) {
ctx->clearRenderTarget(cClearInfo, cClearRect);
});
} else {
// Image is not bound to the pipeline. We can still clear
// it, but we'll have to use a generic clear function.
m_context->clearColorImage(dxvkView->image(),
clearValue, dxvkView->subresources());
EmitCs([
cClearValue = clearValue,
cDstView = dxvkView
] (DxvkContext* ctx) {
ctx->clearColorImage(cDstView->image(),
cClearValue, cDstView->subresources());
});
}
}
@ -451,10 +496,20 @@ namespace dxvk {
if (m_parent->GetFeatureLevel() < D3D_FEATURE_LEVEL_10_0)
clearRect.layerCount = 1;
m_context->clearRenderTarget(clearInfo, clearRect);
EmitCs([
cClearInfo = clearInfo,
cClearRect = clearRect
] (DxvkContext* ctx) {
ctx->clearRenderTarget(cClearInfo, cClearRect);
});
} else {
m_context->clearDepthStencilImage(dxvkView->image(),
clearValue, dxvkView->subresources());
EmitCs([
cClearValue = clearValue,
cDstView = dxvkView
] (DxvkContext* ctx) {
ctx->clearDepthStencilImage(cDstView->image(),
cClearValue, cDstView->subresources());
});
}
}
@ -463,9 +518,12 @@ namespace dxvk {
auto view = static_cast<D3D11ShaderResourceView*>(pShaderResourceView);
if (view->GetResourceType() != D3D11_RESOURCE_DIMENSION_BUFFER) {
m_context->generateMipmaps(
view->GetImageView()->image(),
view->GetImageView()->subresources());
EmitCs([cDstImageView = view->GetImageView()]
(DxvkContext* ctx) {
ctx->generateMipmaps(
cDstImageView->image(),
cDstImageView->subresources());
});
} else {
Logger::err("D3D11DeviceContext: GenerateMips called on a buffer");
}
@ -488,8 +546,8 @@ namespace dxvk {
const auto bufferResource = static_cast<D3D11Buffer*>(pDstResource);
const auto bufferSlice = bufferResource->GetBufferSlice();
VkDeviceSize offset = 0;
VkDeviceSize size = bufferSlice.length();
VkDeviceSize offset = bufferSlice.offset();
VkDeviceSize size = bufferSlice.length();
if (pDstBox != nullptr) {
offset = pDstBox->left;
@ -506,13 +564,29 @@ namespace dxvk {
if (((size == bufferSlice.length())
&& (bufferSlice.buffer()->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))) {
m_context->invalidateBuffer(bufferSlice.buffer());
std::memcpy(bufferSlice.mapPtr(0), pSrcData, size);
auto physicalSlice = bufferSlice.buffer()->allocPhysicalSlice();
physicalSlice.resource()->acquire();
std::memcpy(physicalSlice.mapPtr(0), pSrcData, size);
EmitCs([
cDstBuffer = bufferSlice.buffer(),
cPhysicalSlice = std::move(physicalSlice)
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cDstBuffer, cPhysicalSlice);
cPhysicalSlice.resource()->release();
});
} else {
m_context->updateBuffer(
bufferSlice.buffer(),
bufferSlice.offset() + offset,
size, pSrcData);
EmitCs([
cDataBuffer = Rc<DxvkDataBuffer>(new DxvkDataBuffer(pSrcData, size)),
cBufferSlice = bufferSlice.subSlice(offset, size)
] (DxvkContext* ctx) {
ctx->updateBuffer(
cBufferSlice.buffer(),
cBufferSlice.offset(),
cBufferSlice.length(),
cDataBuffer->data());
});
}
} else {
const D3D11TextureInfo* textureInfo
@ -545,10 +619,36 @@ namespace dxvk {
subresource.mipLevel,
subresource.arrayLayer, 1 };
m_context->updateImage(
textureInfo->image, layers,
offset, extent, pSrcData,
auto formatInfo = imageFormatInfo(
textureInfo->image->info().format);
const VkExtent3D regionExtent = util::computeBlockCount(extent, formatInfo->blockSize);
const VkDeviceSize bytesPerRow = regionExtent.width * formatInfo->elementSize;
const VkDeviceSize bytesPerLayer = regionExtent.height * bytesPerRow;
const VkDeviceSize bytesTotal = regionExtent.depth * bytesPerLayer;
Rc<DxvkDataBuffer> imageDataBuffer = new DxvkDataBuffer(bytesTotal);
util::packImageData(
reinterpret_cast<char*>(imageDataBuffer->data()),
reinterpret_cast<const char*>(pSrcData),
regionExtent, formatInfo->elementSize,
SrcRowPitch, SrcDepthPitch);
EmitCs([
cDstImage = textureInfo->image,
cDstLayers = layers,
cDstOffset = offset,
cDstExtent = extent,
cSrcData = std::move(imageDataBuffer),
cSrcBytesPerRow = bytesPerRow,
cSrcBytesPerLayer = bytesPerLayer
] (DxvkContext* ctx) {
ctx->updateImage(cDstImage, cDstLayers,
cDstOffset, cDstExtent, cSrcData->data(),
cSrcBytesPerRow, cSrcBytesPerLayer);
});
}
}
@ -584,9 +684,12 @@ namespace dxvk {
void STDMETHODCALLTYPE D3D11DeviceContext::Draw(
UINT VertexCount,
UINT StartVertexLocation) {
m_context->draw(
VertexCount, 1,
StartVertexLocation, 0);
EmitCs([=] (DxvkContext* ctx) {
ctx->draw(
VertexCount, 1,
StartVertexLocation, 0);
});
m_drawCount += 1;
}
@ -595,10 +698,13 @@ namespace dxvk {
UINT IndexCount,
UINT StartIndexLocation,
INT BaseVertexLocation) {
m_context->drawIndexed(
IndexCount, 1,
StartIndexLocation,
BaseVertexLocation, 0);
EmitCs([=] (DxvkContext* ctx) {
ctx->drawIndexed(
IndexCount, 1,
StartIndexLocation,
BaseVertexLocation, 0);
});
m_drawCount += 1;
}
@ -608,11 +714,14 @@ namespace dxvk {
UINT InstanceCount,
UINT StartVertexLocation,
UINT StartInstanceLocation) {
m_context->draw(
VertexCountPerInstance,
InstanceCount,
StartVertexLocation,
StartInstanceLocation);
EmitCs([=] (DxvkContext* ctx) {
ctx->draw(
VertexCountPerInstance,
InstanceCount,
StartVertexLocation,
StartInstanceLocation);
});
m_drawCount += 1;
}
@ -623,12 +732,15 @@ namespace dxvk {
UINT StartIndexLocation,
INT BaseVertexLocation,
UINT StartInstanceLocation) {
m_context->drawIndexed(
IndexCountPerInstance,
InstanceCount,
StartIndexLocation,
BaseVertexLocation,
StartInstanceLocation);
EmitCs([=] (DxvkContext* ctx) {
ctx->drawIndexed(
IndexCountPerInstance,
InstanceCount,
StartIndexLocation,
BaseVertexLocation,
StartInstanceLocation);
});
m_drawCount += 1;
}
@ -637,9 +749,13 @@ namespace dxvk {
ID3D11Buffer* pBufferForArgs,
UINT AlignedByteOffsetForArgs) {
D3D11Buffer* buffer = static_cast<D3D11Buffer*>(pBufferForArgs);
DxvkBufferSlice bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs);
m_context->drawIndexedIndirect(bufferSlice, 1, 0);
EmitCs([bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs)]
(DxvkContext* ctx) {
ctx->drawIndexedIndirect(
bufferSlice, 1, 0);
});
m_drawCount += 1;
}
@ -648,9 +764,12 @@ namespace dxvk {
ID3D11Buffer* pBufferForArgs,
UINT AlignedByteOffsetForArgs) {
D3D11Buffer* buffer = static_cast<D3D11Buffer*>(pBufferForArgs);
DxvkBufferSlice bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs);
m_context->drawIndirect(bufferSlice, 1, 0);
EmitCs([bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs)]
(DxvkContext* ctx) {
ctx->drawIndirect(bufferSlice, 1, 0);
});
m_drawCount += 1;
}
@ -659,10 +778,13 @@ namespace dxvk {
UINT ThreadGroupCountX,
UINT ThreadGroupCountY,
UINT ThreadGroupCountZ) {
m_context->dispatch(
ThreadGroupCountX,
ThreadGroupCountY,
ThreadGroupCountZ);
EmitCs([=] (DxvkContext* ctx) {
ctx->dispatch(
ThreadGroupCountX,
ThreadGroupCountY,
ThreadGroupCountZ);
});
m_drawCount += 1;
}
@ -671,23 +793,32 @@ namespace dxvk {
ID3D11Buffer* pBufferForArgs,
UINT AlignedByteOffsetForArgs) {
D3D11Buffer* buffer = static_cast<D3D11Buffer*>(pBufferForArgs);
DxvkBufferSlice bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs);
m_context->dispatchIndirect(bufferSlice);
EmitCs([bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs)]
(DxvkContext* ctx) {
ctx->dispatchIndirect(bufferSlice);
});
m_drawCount += 1;
}
void STDMETHODCALLTYPE D3D11DeviceContext::IASetInputLayout(ID3D11InputLayout* pInputLayout) {
auto inputLayout = static_cast<D3D11InputLayout*>(pInputLayout);
Com<D3D11InputLayout> inputLayout =
static_cast<D3D11InputLayout*>(pInputLayout);
if (m_state.ia.inputLayout != inputLayout) {
m_state.ia.inputLayout = inputLayout;
if (inputLayout != nullptr)
inputLayout->BindToContext(m_context);
else
m_context->setInputLayout(0, nullptr, 0, nullptr);
if (inputLayout != nullptr) {
EmitCs([inputLayout] (DxvkContext* ctx) {
inputLayout->BindToContext(ctx);
});
} else {
EmitCs([inputLayout] (DxvkContext* ctx) {
ctx->setInputLayout(0, nullptr, 0, nullptr);
});
}
}
}
@ -727,7 +858,9 @@ namespace dxvk {
}();
m_context->setInputAssemblyState(iaState);
EmitCs([iaState] (DxvkContext* ctx) {
ctx->setInputAssemblyState(iaState);
});
}
}
@ -748,12 +881,19 @@ namespace dxvk {
m_state.ia.vertexBuffers[i].stride = pStrides[i];
if (newBuffer != nullptr) {
m_context->bindVertexBuffer(StartSlot + i,
newBuffer->GetBufferSlice(pOffsets[i]),
pStrides[i]);
EmitCs([
slotId = StartSlot + i,
offset = pOffsets[i],
stride = pStrides[i],
slice = newBuffer->GetBufferSlice(pOffsets[i])
] (DxvkContext* ctx) {
ctx->bindVertexBuffer(
slotId, slice, stride);
});
} else {
m_context->bindVertexBuffer(StartSlot + i,
DxvkBufferSlice(), 0);
EmitCs([cSlotId = StartSlot + i] (DxvkContext* ctx) {
ctx->bindVertexBuffer(cSlotId, DxvkBufferSlice(), 0);
});
}
}
}
@ -781,9 +921,11 @@ namespace dxvk {
default: Logger::err(str::format("D3D11: Invalid index format: ", Format));
}
m_context->bindIndexBuffer(
newBuffer->GetBufferSlice(Offset),
indexType);
EmitCs([indexType,
slice = newBuffer->GetBufferSlice(Offset)
] (DxvkContext* ctx) {
ctx->bindIndexBuffer(slice, indexType);
});
}
}
@ -828,8 +970,10 @@ namespace dxvk {
if (m_state.vs.shader != shader) {
m_state.vs.shader = shader;
m_context->bindShader(VK_SHADER_STAGE_VERTEX_BIT,
shader != nullptr ? shader->GetShader() : nullptr);
EmitCs([cShader = shader != nullptr ? shader->GetShader() : nullptr]
(DxvkContext* ctx) {
ctx->bindShader(VK_SHADER_STAGE_VERTEX_BIT, cShader);
});
}
}
@ -1089,8 +1233,10 @@ namespace dxvk {
if (m_state.gs.shader != shader) {
m_state.gs.shader = shader;
m_context->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT,
shader != nullptr ? shader->GetShader() : nullptr);
EmitCs([cShader = shader != nullptr ? shader->GetShader() : nullptr]
(DxvkContext* ctx) {
ctx->bindShader(VK_SHADER_STAGE_GEOMETRY_BIT, cShader);
});
}
}
@ -1182,8 +1328,10 @@ namespace dxvk {
if (m_state.ps.shader != shader) {
m_state.ps.shader = shader;
m_context->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT,
shader != nullptr ? shader->GetShader() : nullptr);
EmitCs([cShader = shader != nullptr ? shader->GetShader() : nullptr]
(DxvkContext* ctx) {
ctx->bindShader(VK_SHADER_STAGE_FRAGMENT_BIT, cShader);
});
}
}
@ -1275,8 +1423,10 @@ namespace dxvk {
if (m_state.cs.shader != shader) {
m_state.cs.shader = shader;
m_context->bindShader(VK_SHADER_STAGE_COMPUTE_BIT,
shader != nullptr ? shader->GetShader() : nullptr);
EmitCs([cShader = shader != nullptr ? shader->GetShader() : nullptr]
(DxvkContext* ctx) {
ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, cShader);
});
}
}
@ -1409,14 +1559,12 @@ namespace dxvk {
// unbind overlapping shader resource views. Since this comes
// with a large performance penalty we'll ignore this until an
// application actually relies on this behaviour.
Rc<DxvkFramebuffer> framebuffer = nullptr;
DxvkRenderTargets attachments;
// D3D11 doesn't have the concept of a framebuffer object,
// so we'll just create a new one every time the render
// target bindings are updated. Set up the attachments.
if (ppRenderTargetViews != nullptr || pDepthStencilView != nullptr) {
// D3D11 doesn't have the concept of a framebuffer object,
// so we'll just create a new one every time the render
// target bindings are updated. Set up the attachments.
DxvkRenderTargets attachments;
for (UINT i = 0; i < m_state.om.renderTargetViews.size(); i++) {
if (m_state.om.renderTargetViews.at(i) != nullptr)
attachments.setColorTarget(i, m_state.om.renderTargetViews.at(i)->GetImageView());
@ -1424,13 +1572,15 @@ namespace dxvk {
if (m_state.om.depthStencilView != nullptr)
attachments.setDepthTarget(m_state.om.depthStencilView->GetImageView());
if (attachments.hasAttachments())
framebuffer = m_device->createFramebuffer(attachments);
}
// Bind the framebuffer object to the context
m_context->bindFramebuffer(framebuffer);
// Create and bind the framebuffer object to the context
EmitCs([attachments, dev = m_device] (DxvkContext* ctx) {
Rc<DxvkFramebuffer> framebuffer = nullptr;
if (attachments.hasAttachments())
framebuffer = dev->createFramebuffer(attachments);
ctx->bindFramebuffer(framebuffer);
});
}
@ -1468,7 +1618,8 @@ namespace dxvk {
ID3D11BlendState* pBlendState,
const FLOAT BlendFactor[4],
UINT SampleMask) {
auto blendState = static_cast<D3D11BlendState*>(pBlendState);
Com<D3D11BlendState> blendState =
static_cast<D3D11BlendState*>(pBlendState);
if (m_state.om.cbState != blendState
|| m_state.om.sampleMask != SampleMask) {
@ -1478,12 +1629,31 @@ namespace dxvk {
if (blendState == nullptr)
blendState = m_defaultBlendState.ptr();
blendState->BindToContext(m_context, SampleMask);
EmitCs([
cBlendState = std::move(blendState),
cSampleMask = SampleMask
] (DxvkContext* ctx) {
cBlendState->BindToContext(ctx, cSampleMask);
});
}
if ((BlendFactor != nullptr) && (!std::memcmp(m_state.om.blendFactor, BlendFactor, 4 * sizeof(FLOAT)))) {
std::memcpy(m_state.om.blendFactor, BlendFactor, 4 * sizeof(FLOAT));
m_context->setBlendConstants(BlendFactor);
if (BlendFactor != nullptr) {
bool updateBlendFactor = false;
for (uint32_t i = 0; i < 4; i++) {
updateBlendFactor |= m_state.om.blendFactor[i] != BlendFactor[i];
m_state.om.blendFactor[i] = BlendFactor[i];
}
if (updateBlendFactor) {
EmitCs([
cBlendConstants = DxvkBlendConstants {
BlendFactor[0], BlendFactor[1],
BlendFactor[2], BlendFactor[3] }
] (DxvkContext* ctx) {
ctx->setBlendConstants(cBlendConstants);
});
}
}
}
@ -1491,7 +1661,8 @@ namespace dxvk {
void STDMETHODCALLTYPE D3D11DeviceContext::OMSetDepthStencilState(
ID3D11DepthStencilState* pDepthStencilState,
UINT StencilRef) {
auto depthStencilState = static_cast<D3D11DepthStencilState*>(pDepthStencilState);
Com<D3D11DepthStencilState> depthStencilState =
static_cast<D3D11DepthStencilState*>(pDepthStencilState);
if (m_state.om.dsState != depthStencilState) {
m_state.om.dsState = depthStencilState;
@ -1499,12 +1670,18 @@ namespace dxvk {
if (depthStencilState == nullptr)
depthStencilState = m_defaultDepthStencilState.ptr();
depthStencilState->BindToContext(m_context);
EmitCs([cDepthStencilState = std::move(depthStencilState)]
(DxvkContext* ctx) {
cDepthStencilState->BindToContext(ctx);
});
}
if (m_state.om.stencilRef != StencilRef) {
m_state.om.stencilRef = StencilRef;
m_context->setStencilReference(StencilRef);
EmitCs([cStencilRef = StencilRef] (DxvkContext* ctx) {
ctx->setStencilReference(cStencilRef);
});
}
}
@ -1563,7 +1740,8 @@ namespace dxvk {
void STDMETHODCALLTYPE D3D11DeviceContext::RSSetState(ID3D11RasterizerState* pRasterizerState) {
auto rasterizerState = static_cast<D3D11RasterizerState*>(pRasterizerState);
Com<D3D11RasterizerState> rasterizerState =
static_cast<D3D11RasterizerState*>(pRasterizerState);
if (m_state.rs.state != rasterizerState) {
m_state.rs.state = rasterizerState;
@ -1571,7 +1749,10 @@ namespace dxvk {
if (rasterizerState == nullptr)
rasterizerState = m_defaultRasterizerState.ptr();
rasterizerState->BindToContext(m_context);
EmitCs([cRasterizerState = std::move(rasterizerState)]
(DxvkContext* ctx) {
cRasterizerState->BindToContext(ctx);
});
// In D3D11, the rasterizer state defines
// whether the scissor test is enabled, so
@ -1690,13 +1871,14 @@ namespace dxvk {
if (Bindings[StartSlot + i] != newBuffer) {
Bindings[StartSlot + i] = newBuffer;
if (newBuffer != nullptr) {
m_context->bindResourceBuffer(
slotId + i, newBuffer->GetBufferSlice(0));
} else {
m_context->bindResourceBuffer(
slotId + i, DxvkBufferSlice());
}
EmitCs([
cSlotId = slotId + i,
cSlice = newBuffer != nullptr
? newBuffer->GetBufferSlice()
: DxvkBufferSlice()
] (DxvkContext* ctx) {
ctx->bindResourceBuffer(cSlotId, cSlice);
});
}
}
}
@ -1718,13 +1900,14 @@ namespace dxvk {
if (Bindings[StartSlot + i] != sampler) {
Bindings[StartSlot + i] = sampler;
if (sampler != nullptr) {
m_context->bindResourceSampler(
slotId + i, sampler->GetDXVKSampler());
} else {
m_context->bindResourceSampler(
slotId + i, m_defaultSampler);
}
EmitCs([
cSlotId = slotId + i,
cSampler = sampler != nullptr
? sampler->GetDXVKSampler()
: m_defaultSampler
] (DxvkContext* ctx) {
ctx->bindResourceSampler(cSlotId, cSampler);
});
}
}
}
@ -1749,17 +1932,23 @@ namespace dxvk {
if (resView != nullptr) {
// Figure out what we have to bind based on the resource type
if (resView->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) {
m_context->bindResourceTexelBuffer(
slotId + i, resView->GetBufferView());
EmitCs([cSlotId = slotId + i, cView = resView->GetBufferView()]
(DxvkContext* ctx) {
ctx->bindResourceTexelBuffer(cSlotId, cView);
});
} else {
m_context->bindResourceImage(
slotId + i, resView->GetImageView());
EmitCs([cSlotId = slotId + i, cView = resView->GetImageView()]
(DxvkContext* ctx) {
ctx->bindResourceImage(cSlotId, cView);
});
}
} else {
// When unbinding a resource, it doesn't really matter if
// the resource type is correct, so we'll just bind a null
// image to the given resource slot
m_context->bindResourceImage(slotId + i, nullptr);
EmitCs([cSlotId = slotId + i] (DxvkContext* ctx) {
ctx->bindResourceImage(cSlotId, nullptr);
});
}
}
}
@ -1789,20 +1978,30 @@ namespace dxvk {
if (uav != nullptr) {
// Figure out what we have to bind based on the resource type
if (uav->GetResourceType() == D3D11_RESOURCE_DIMENSION_BUFFER) {
m_context->bindResourceTexelBuffer(
uavSlotId + i, uav->GetBufferView());
m_context->bindResourceBuffer(
ctrSlotId + i, uav->GetCounterSlice());
EmitCs([
cUavSlotId = uavSlotId + i,
cCtrSlotId = ctrSlotId + i,
cUavBuffer = uav->GetBufferView(),
cCtrBuffer = uav->GetCounterSlice()
] (DxvkContext* ctx) {
ctx->bindResourceTexelBuffer(cUavSlotId, cUavBuffer);
ctx->bindResourceBuffer (cCtrSlotId, cCtrBuffer);
});
} else {
m_context->bindResourceImage(
uavSlotId + i, uav->GetImageView());
EmitCs([cUavSlotId = uavSlotId + i, cUavImage = uav->GetImageView()]
(DxvkContext* ctx) {
ctx->bindResourceImage(cUavSlotId, cUavImage);
});
}
} else {
// When unbinding a resource, it doesn't really matter if
// the resource type is correct, so we'll just bind a null
// image to the given resource slot
m_context->bindResourceTexelBuffer(uavSlotId + i, nullptr);
m_context->bindResourceBuffer (ctrSlotId + i, DxvkBufferSlice());
EmitCs([cUavSlotId = uavSlotId + i, cCtrSlotId = ctrSlotId + i]
(DxvkContext* ctx) {
ctx->bindResourceTexelBuffer(cUavSlotId, nullptr);
ctx->bindResourceBuffer (cCtrSlotId, DxvkBufferSlice());
});
}
}
}
@ -1822,11 +2021,13 @@ namespace dxvk {
if (counterSlice.defined()
&& counterValue.atomicCtr != 0xFFFFFFFFu) {
m_context->updateBuffer(
counterSlice.buffer(),
counterSlice.offset(),
counterSlice.length(),
&counterValue);
EmitCs([counterSlice, counterValue] (DxvkContext* ctx) {
ctx->updateBuffer(
counterSlice.buffer(),
counterSlice.offset(),
counterSlice.length(),
&counterValue);
});
}
}
}
@ -1887,10 +2088,16 @@ namespace dxvk {
}
}
m_context->setViewports(
m_state.rs.numViewports,
viewports.data(),
scissors.data());
EmitCs([
cViewportCount = m_state.rs.numViewports,
cViewports = viewports,
cScissors = scissors
] (DxvkContext* ctx) {
ctx->setViewports(
cViewportCount,
cViewports.data(),
cScissors.data());
});
}

View File

@ -1,6 +1,7 @@
#pragma once
#include "../dxvk/dxvk_adapter.h"
#include "../dxvk/dxvk_cs.h"
#include "../dxvk/dxvk_device.h"
#include "d3d11_context_state.h"
@ -517,7 +518,7 @@ namespace dxvk {
D3D11Device* const m_parent;
Rc<DxvkDevice> m_device;
Rc<DxvkContext> m_context;
Rc<DxvkCsChunk> m_csChunk;
Rc<DxvkSampler> m_defaultSampler;
Com<D3D11BlendState> m_defaultBlendState;
@ -564,6 +565,16 @@ namespace dxvk {
Rc<DxvkSampler> CreateDefaultSampler();
template<typename Cmd>
void EmitCs(Cmd&& command) {
if (!m_csChunk->push(command)) {
EmitCsChunk();
m_csChunk->push(command);
}
}
virtual void EmitCsChunk() = 0;
};
}

View File

@ -7,13 +7,16 @@ namespace dxvk {
D3D11ImmediateContext::D3D11ImmediateContext(
D3D11Device* parent,
Rc<DxvkDevice> device)
: D3D11DeviceContext(parent, device) {
: D3D11DeviceContext(parent, device),
m_csThread(device->createContext()) {
}
D3D11ImmediateContext::~D3D11ImmediateContext() {
Flush();
SynchronizeCsThread();
SynchronizeDevice();
}
@ -41,12 +44,18 @@ namespace dxvk {
m_parent->FlushInitContext();
m_drawCount = 0;
m_device->submitCommandList(
m_context->endRecording(),
nullptr, nullptr);
// Add commands to flush the threaded
// context, then flush the command list
EmitCs([dev = m_device] (DxvkContext* ctx) {
dev->submitCommandList(
ctx->endRecording(),
nullptr, nullptr);
ctx->beginRecording(
dev->createCommandList());
});
m_context->beginRecording(
m_device->createCommandList());
EmitCsChunk();
}
@ -75,8 +84,8 @@ namespace dxvk {
pResource->GetType(&resourceDim);
if (resourceDim == D3D11_RESOURCE_DIMENSION_BUFFER) {
const D3D11Buffer* resource = static_cast<D3D11Buffer*>(pResource);
const Rc<DxvkBuffer> buffer = resource->GetBufferSlice().buffer();
D3D11Buffer* resource = static_cast<D3D11Buffer*>(pResource);
Rc<DxvkBuffer> buffer = resource->GetBufferSlice().buffer();
if (!(buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
Logger::err("D3D11: Cannot map a device-local buffer");
@ -84,28 +93,47 @@ namespace dxvk {
}
if (pMappedResource == nullptr)
return S_OK;
return S_FALSE;
if (buffer->isInUse()) {
// Don't wait if the application tells us not to
if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT)
return DXGI_ERROR_WAS_STILL_DRAWING;
if (MapType == D3D11_MAP_WRITE_DISCARD) {
// Allocate a new backing slice for the buffer and set
// it as the 'new' mapped slice. This assumes that the
// only way to invalidate a buffer is by mapping it.
auto physicalSlice = buffer->allocPhysicalSlice();
physicalSlice.resource()->acquire();
// Invalidate the buffer in order to avoid synchronization
// if the application does not need the buffer contents to
// be preserved. The No Overwrite mode does not require any
// sort of synchronization, but should be used with care.
if (MapType == D3D11_MAP_WRITE_DISCARD) {
m_context->invalidateBuffer(buffer);
} else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) {
this->Flush();
this->Synchronize();
resource->GetBufferInfo()->mappedSlice = physicalSlice;
EmitCs([
cBuffer = buffer,
cPhysicalSlice = physicalSlice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cBuffer, cPhysicalSlice);
cPhysicalSlice.resource()->release();
});
} else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) {
// Synchronize with CS thread so that we know whether
// the buffer is currently in use by the GPU or not
SynchronizeCsThread();
if (buffer->isInUse()) {
if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT)
return DXGI_ERROR_WAS_STILL_DRAWING;
Flush();
SynchronizeDevice();
}
}
pMappedResource->pData = buffer->mapPtr(0);
pMappedResource->RowPitch = buffer->info().size;
pMappedResource->DepthPitch = buffer->info().size;
// Use map pointer from previous map operation. This
// way we don't have to synchronize with the CS thread
// if the map mode is D3D11_MAP_WRITE_NO_OVERWRITE.
const DxvkPhysicalBufferSlice physicalSlice
= resource->GetBufferInfo()->mappedSlice;
pMappedResource->pData = physicalSlice.mapPtr(0);
pMappedResource->RowPitch = physicalSlice.length();
pMappedResource->DepthPitch = physicalSlice.length();
return S_OK;
} else {
// Mapping an image is sadly not as simple as mapping a buffer
@ -134,36 +162,51 @@ namespace dxvk {
const VkExtent3D levelExtent = textureInfo->image
->mipLevelExtent(textureInfo->mappedSubresource.mipLevel);
const VkExtent3D blockCount = {
levelExtent.width / formatInfo->blockSize.width,
levelExtent.height / formatInfo->blockSize.height,
levelExtent.depth / formatInfo->blockSize.depth };
const VkExtent3D blockCount = util::computeBlockCount(
levelExtent, formatInfo->blockSize);
DxvkPhysicalBufferSlice physicalSlice;
// When using any map mode which requires the image contents
// to be preserved, copy image contents into the buffer.
if (MapType != D3D11_MAP_WRITE_DISCARD) {
// to be preserved, copy the image's contents into the buffer.
if (MapType == D3D11_MAP_WRITE_DISCARD) {
physicalSlice = textureInfo->imageBuffer->allocPhysicalSlice();
physicalSlice.resource()->acquire();
EmitCs([
cImageBuffer = textureInfo->imageBuffer,
cPhysicalSlice = physicalSlice
] (DxvkContext* ctx) {
ctx->invalidateBuffer(cImageBuffer, cPhysicalSlice);
cPhysicalSlice.resource()->release();
});
} else {
const VkImageSubresourceLayers subresourceLayers = {
textureInfo->mappedSubresource.aspectMask,
textureInfo->mappedSubresource.mipLevel,
textureInfo->mappedSubresource.arrayLayer, 1 };
m_context->copyImageToBuffer(
textureInfo->imageBuffer, 0, { 0u, 0u },
textureInfo->image, subresourceLayers,
VkOffset3D { 0, 0, 0 }, levelExtent);
}
if (textureInfo->imageBuffer->isInUse()) {
if (MapType == D3D11_MAP_WRITE_DISCARD) {
m_context->invalidateBuffer(textureInfo->imageBuffer);
} else {
this->Flush();
this->Synchronize();
}
EmitCs([
cImageBuffer = textureInfo->imageBuffer,
cImage = textureInfo->image,
cSubresources = subresourceLayers,
cLevelExtent = levelExtent
] (DxvkContext* ctx) {
ctx->copyImageToBuffer(
cImageBuffer, 0, VkExtent2D { 0u, 0u },
cImage, cSubresources, VkOffset3D { 0, 0, 0 },
cLevelExtent);
});
Flush();
SynchronizeCsThread();
SynchronizeDevice();
physicalSlice = textureInfo->imageBuffer->slice();
}
// Set up map pointer. Data is tightly packed within the mapped buffer.
pMappedResource->pData = textureInfo->imageBuffer->mapPtr(0);
pMappedResource->pData = physicalSlice.mapPtr(0);
pMappedResource->RowPitch = formatInfo->elementSize * blockCount.width;
pMappedResource->DepthPitch = formatInfo->elementSize * blockCount.width * blockCount.height;
return S_OK;
@ -191,16 +234,41 @@ namespace dxvk {
textureInfo->mappedSubresource.mipLevel,
textureInfo->mappedSubresource.arrayLayer, 1 };
m_context->copyBufferToImage(
textureInfo->image, subresourceLayers,
VkOffset3D { 0, 0, 0 }, levelExtent,
textureInfo->imageBuffer, 0, { 0u, 0u });
EmitCs([
cSrcBuffer = textureInfo->imageBuffer,
cDstImage = textureInfo->image,
cDstLayers = subresourceLayers,
cDstLevelExtent = levelExtent
] (DxvkContext* ctx) {
ctx->copyBufferToImage(cDstImage, cDstLayers,
VkOffset3D { 0, 0, 0 }, cDstLevelExtent,
cSrcBuffer, 0, { 0u, 0u });
});
}
}
void D3D11ImmediateContext::Synchronize() {
void D3D11ImmediateContext::SynchronizeCsThread() {
// Dispatch current chunk so that all commands
// recorded prior to this function will be run
EmitCsChunk();
m_csThread.synchronize();
}
void D3D11ImmediateContext::SynchronizeDevice() {
// FIXME waiting until the device finished executing *all*
// pending commands is too pessimistic. Instead we should
// wait for individual command submissions to complete.
// This will require changes in the DxvkDevice class.
m_device->waitForIdle();
}
void D3D11ImmediateContext::EmitCsChunk() {
if (m_csChunk->commandCount() > 0)
m_csChunk = m_csThread.dispatchChunk(std::move(m_csChunk));
}
}

View File

@ -42,10 +42,16 @@ namespace dxvk {
ID3D11Resource* pResource,
UINT Subresource) final;
void Synchronize();
void SynchronizeCsThread();
private:
DxvkCsThread m_csThread;
void SynchronizeDevice();
void EmitCsChunk();
};
}

View File

@ -1,4 +1,5 @@
#include "d3d11_device.h"
#include "d3d11_context_imm.h"
#include "d3d11_present.h"
namespace dxvk {
@ -57,7 +58,11 @@ namespace dxvk {
Com<ID3D11DeviceContext> deviceContext = nullptr;
m_device->GetImmediateContext(&deviceContext);
deviceContext->Flush();
// The presentation code is run from the main rendering thread
// rather than the command stream thread, so we synchronize.
auto immediateContext = static_cast<D3D11ImmediateContext*>(deviceContext.ptr());
immediateContext->Flush();
immediateContext->SynchronizeCsThread();
return S_OK;
}

View File

@ -262,6 +262,18 @@ namespace dxvk {
return m_buffer->info();
}
/**
* \brief Buffer sub slice
*
* Takes a sub slice from this slice.
* \param [in] offset Sub slice offset
* \param [in] length Sub slice length
* \returns The sub slice object
*/
DxvkBufferSlice subSlice(VkDeviceSize offset, VkDeviceSize length) const {
return DxvkBufferSlice(m_buffer, offset, length);
}
/**
* \brief Checks whether the slice is valid
*

View File

@ -8,6 +8,17 @@
namespace dxvk {
/**
* \brief Blend constants
*
* Stores a blend factor
* as an RGBA color value.
*/
struct DxvkBlendConstants {
float r, g, b, a;
};
/**
* \brief Input assembly state
*

View File

@ -732,9 +732,11 @@ namespace dxvk {
}
void DxvkContext::invalidateBuffer(const Rc<DxvkBuffer>& buffer) {
void DxvkContext::invalidateBuffer(
const Rc<DxvkBuffer>& buffer,
const DxvkPhysicalBufferSlice& slice) {
// Allocate new backing resource
buffer->rename(buffer->allocPhysicalSlice());
buffer->rename(slice);
// We also need to update all bindings that the buffer
// may be bound to either directly or through views.
@ -890,53 +892,23 @@ namespace dxvk {
const DxvkFormatInfo* formatInfo
= imageFormatInfo(image->info().format);
VkExtent3D elementCount = imageExtent;
elementCount.depth *= subresources.layerCount;
// Align image extent to a full block. This is necessary in
// case the image size is not a multiple of the block size.
elementCount.width += formatInfo->blockSize.width - 1;
elementCount.height += formatInfo->blockSize.height - 1;
elementCount.depth += formatInfo->blockSize.depth - 1;
elementCount.width /= formatInfo->blockSize.width;
elementCount.height /= formatInfo->blockSize.height;
elementCount.depth /= formatInfo->blockSize.depth;
VkDeviceSize bytesPerRow = elementCount.width * formatInfo->elementSize;
VkDeviceSize bytesPerLayer = elementCount.height * bytesPerRow;
VkDeviceSize bytesTotal = elementCount.depth * bytesPerLayer;
VkExtent3D elementCount = util::computeBlockCount(
imageExtent, formatInfo->blockSize);
elementCount.depth *= subresources.layerCount;
// Allocate staging buffer memory for the image data. The
// pixels or blocks will be tightly packed within the buffer.
DxvkStagingBufferSlice slice = m_cmd->stagedAlloc(bytesTotal);
const DxvkStagingBufferSlice slice = m_cmd->stagedAlloc(
formatInfo->elementSize * util::flattenImageExtent(elementCount));
auto dstData = reinterpret_cast<char*>(slice.mapPtr);
auto srcData = reinterpret_cast<const char*>(data);
// If the application provides tightly packed data as well,
// we can minimize the number of memcpy calls in order to
// improve performance.
bool useDirectCopy = true;
useDirectCopy &= (pitchPerLayer == bytesPerLayer) || (elementCount.depth == 1);
useDirectCopy &= (pitchPerRow == bytesPerRow) || (elementCount.height == 1);
if (useDirectCopy) {
std::memcpy(dstData, srcData, bytesTotal);
} else {
for (uint32_t i = 0; i < elementCount.depth; i++) {
for (uint32_t j = 0; j < elementCount.height; j++) {
std::memcpy(
dstData + j * bytesPerRow,
srcData + j * pitchPerRow,
bytesPerRow);
}
srcData += pitchPerLayer;
dstData += bytesPerLayer;
}
}
util::packImageData(dstData, srcData,
elementCount, formatInfo->elementSize,
pitchPerRow, pitchPerLayer);
// Prepare the image layout. If the given extent covers
// the entire image, we may discard its previous contents.
@ -1008,9 +980,9 @@ namespace dxvk {
void DxvkContext::setBlendConstants(
const float blendConstants[4]) {
const DxvkBlendConstants& blendConstants) {
for (uint32_t i = 0; i < 4; i++)
m_state.om.blendConstants[i] = blendConstants[i];
m_state.om.blendConstants = blendConstants;
this->updateBlendConstants();
}
@ -1406,7 +1378,7 @@ namespace dxvk {
void DxvkContext::updateBlendConstants() {
m_cmd->cmdSetBlendConstants(m_state.om.blendConstants);
m_cmd->cmdSetBlendConstants(&m_state.om.blendConstants.r);
}

View File

@ -352,7 +352,7 @@ namespace dxvk {
/**
* \brief Invalidates a buffer's contents
*
* Discards a buffer's contents by allocating a new
* Discards a buffer's contents by replacing the
* backing resource. This allows the host to access
* the buffer while the GPU is still accessing the
* original backing resource.
@ -360,9 +360,11 @@ namespace dxvk {
* \warning If the buffer is used by another context,
* invalidating it will result in undefined behaviour.
* \param [in] buffer The buffer to invalidate
* \param [in] slice New physical buffer slice
*/
void invalidateBuffer(
const Rc<DxvkBuffer>& buffer);
const Rc<DxvkBuffer>& buffer,
const DxvkPhysicalBufferSlice& slice);
/**
* \brief Resolves a multisampled image resource
@ -438,7 +440,7 @@ namespace dxvk {
* \param [in] blendConstants Blend constants
*/
void setBlendConstants(
const float blendConstants[4]);
const DxvkBlendConstants& blendConstants);
/**
* \brief Sets stencil reference

View File

@ -57,8 +57,8 @@ namespace dxvk {
struct DxvkOutputMergerState {
Rc<DxvkFramebuffer> framebuffer;
float blendConstants[4];
uint32_t stencilReference;
DxvkBlendConstants blendConstants;
uint32_t stencilReference;
};

View File

@ -25,9 +25,7 @@ namespace dxvk {
DxvkCsThread::DxvkCsThread(const Rc<DxvkContext>& context)
: m_context(context),
m_curChunk(new DxvkCsChunk()),
m_thread([this] { threadFunc(); }) {
: m_context(context), m_thread([this] { threadFunc(); }) {
}
@ -42,18 +40,37 @@ namespace dxvk {
}
void DxvkCsThread::dispatchChunk(Rc<DxvkCsChunk>&& chunk) {
Rc<DxvkCsChunk> DxvkCsThread::dispatchChunk(Rc<DxvkCsChunk>&& chunk) {
Rc<DxvkCsChunk> nextChunk = nullptr;
{ std::unique_lock<std::mutex> lock(m_mutex);
m_chunks.push(std::move(m_curChunk));
m_chunksQueued.push(std::move(chunk));
m_chunksPending += 1;
// If a large number of chunks are queued up, wait for
// some of them to be processed in order to avoid memory
// leaks, stuttering, input lag and similar issues.
if (m_chunksPending >= MaxChunksInFlight) {
m_condOnSync.wait(lock, [this] {
return (m_chunksPending < MaxChunksInFlight / 2)
|| (m_stopped.load());
});
}
if (m_chunksUnused.size() != 0) {
nextChunk = std::move(m_chunksUnused.front());
m_chunksUnused.pop();
}
}
// Wake CS thread
m_condOnAdd.notify_one();
}
void DxvkCsThread::flush() {
dispatchChunk(std::move(m_curChunk));
m_curChunk = new DxvkCsChunk();
// Allocate new chunk if needed
if (nextChunk == nullptr)
nextChunk = new DxvkCsChunk();
return nextChunk;
}
@ -61,27 +78,32 @@ namespace dxvk {
std::unique_lock<std::mutex> lock(m_mutex);
m_condOnSync.wait(lock, [this] {
return m_chunks.size() == 0;
return m_chunksPending == 0;
});
}
void DxvkCsThread::threadFunc() {
Rc<DxvkCsChunk> chunk;
while (!m_stopped.load()) {
Rc<DxvkCsChunk> chunk;
{ std::unique_lock<std::mutex> lock(m_mutex);
if (chunk != nullptr) {
m_chunksPending -= 1;
m_chunksUnused.push(std::move(chunk));
m_condOnSync.notify_one();
}
m_condOnAdd.wait(lock, [this] {
return m_stopped.load() || (m_chunks.size() != 0);
return m_stopped.load() || (m_chunksQueued.size() != 0);
});
if (m_chunks.size() != 0) {
chunk = std::move(m_chunks.front());
m_chunks.pop();
if (m_chunks.size() == 0)
m_condOnSync.notify_one();
if (m_chunksQueued.size() != 0) {
chunk = std::move(m_chunksQueued.front());
m_chunksQueued.pop();
} else {
chunk = nullptr;
}
}

View File

@ -65,13 +65,23 @@ namespace dxvk {
* Stores a list of commands.
*/
class DxvkCsChunk : public RcObject {
constexpr static size_t MaxCommands = 64;
constexpr static size_t MaxCommands = 1024;
constexpr static size_t MaxBlockSize = 64 * MaxCommands;
public:
DxvkCsChunk();
~DxvkCsChunk();
/**
* \brief Number of commands recorded to the chunk
*
* Can be used to check whether the chunk needs to
* be dispatched or just to keep track of statistics.
*/
size_t commandCount() const {
return m_commandCount;
}
/**
* \brief Tries to add a command to the chunk
*
@ -128,42 +138,23 @@ namespace dxvk {
* commands on a DXVK context.
*/
class DxvkCsThread {
// Limit the number of chunks in the queue
// to prevent memory leaks, stuttering etc.
constexpr static uint32_t MaxChunksInFlight = 16;
public:
DxvkCsThread(const Rc<DxvkContext>& context);
~DxvkCsThread();
/**
* \brief Dispatches a new command
*
* Adds the command to the current chunk and
* dispatches the chunk in case it is full.
* \param [in] command The command
*/
template<typename T>
void dispatch(T&& command) {
while (!m_curChunk->push(command))
this->flush();
}
/**
* \brief Dispatches an entire chunk
*
* Can be used to efficiently play back large
* command lists recorded on another thread.
* \param [in] chunk The chunk to dispatch
* \returns New chunk for the next submissions
*/
void dispatchChunk(Rc<DxvkCsChunk>&& chunk);
/**
* \brief Dispatches current chunk
*
* Adds the current chunk to the dispatch
* queue and makes an empty chunk current.
* Call this before \ref synchronize.
*/
void flush();
Rc<DxvkCsChunk> dispatchChunk(Rc<DxvkCsChunk>&& chunk);
/**
* \brief Synchronizes with the thread
@ -179,17 +170,15 @@ namespace dxvk {
const Rc<DxvkContext> m_context;
// Chunk that is being recorded
Rc<DxvkCsChunk> m_curChunk;
// Chunks that are executing
std::atomic<bool> m_stopped = { false };
std::mutex m_mutex;
std::condition_variable m_condOnAdd;
std::condition_variable m_condOnSync;
std::queue<Rc<DxvkCsChunk>> m_chunks;
std::queue<Rc<DxvkCsChunk>> m_chunksQueued;
std::queue<Rc<DxvkCsChunk>> m_chunksUnused;
std::thread m_thread;
std::thread m_thread;
uint32_t m_chunksPending = 0;
void threadFunc();

View File

@ -1,3 +1,5 @@
#include <cstring>
#include "dxvk_util.h"
namespace dxvk::util {
@ -34,30 +36,36 @@ namespace dxvk::util {
return mipCnt;
}
}
bool operator == (VkExtent3D a, VkExtent3D b) {
return a.width == b.width
&& a.height == b.height
&& a.depth == b.depth;
}
bool operator != (VkExtent3D a, VkExtent3D b) {
return a.width != b.width
|| a.height != b.height
|| a.depth != b.depth;
}
bool operator == (VkExtent2D a, VkExtent2D b) {
return a.width == b.width
&& a.height == b.height;
}
bool operator != (VkExtent2D a, VkExtent2D b) {
return a.width != b.width
|| a.height != b.height;
void packImageData(
char* dstData,
const char* srcData,
VkExtent3D blockCount,
VkDeviceSize blockSize,
VkDeviceSize pitchPerRow,
VkDeviceSize pitchPerLayer) {
const VkDeviceSize bytesPerRow = blockCount.width * blockSize;
const VkDeviceSize bytesPerLayer = blockCount.height * bytesPerRow;
const VkDeviceSize bytesTotal = blockCount.depth * bytesPerLayer;
const bool directCopy = ((bytesPerRow == pitchPerRow ) || (blockCount.height == 1))
&& ((bytesPerLayer == pitchPerLayer) || (blockCount.depth == 1));
if (directCopy) {
std::memcpy(dstData, srcData, bytesTotal);
} else {
for (uint32_t i = 0; i < blockCount.depth; i++) {
for (uint32_t j = 0; j < blockCount.height; j++) {
std::memcpy(
dstData + j * bytesPerRow,
srcData + j * pitchPerRow,
bytesPerRow);
}
srcData += pitchPerLayer;
dstData += bytesPerLayer;
}
}
}
}

View File

@ -21,10 +21,75 @@ namespace dxvk::util {
*/
uint32_t computeMipLevelCount(VkExtent3D imageSize);
/**
* \brief Writes tightly packed image data to a buffer
*
* \param [in] dstData Destination buffer pointer
* \param [in] srcData Pointer to source data
* \param [in] blockCount Number of blocks to copy
* \param [in] blockSize Number of bytes per block
* \param [in] pitchPerRow Number of bytes between rows
* \param [in] pitchPerLayer Number of bytes between layers
*/
void packImageData(
char* dstData,
const char* srcData,
VkExtent3D blockCount,
VkDeviceSize blockSize,
VkDeviceSize pitchPerRow,
VkDeviceSize pitchPerLayer);
/**
* \brief Computes block count for compressed images
*
* Convenience function to compute the size, in
* blocks, of compressed images subresources.
* \param [in] imageSize The image size
* \param [in] blockSize Size per pixel block
* \returns Number of blocks in the image
*/
inline VkExtent3D computeBlockCount(VkExtent3D imageSize, VkExtent3D blockSize) {
return VkExtent3D {
(imageSize.width + blockSize.width - 1) / blockSize.width,
(imageSize.height + blockSize.height - 1) / blockSize.height,
(imageSize.depth + blockSize.depth - 1) / blockSize.depth };
}
/**
* \brief Computes number of pixels or blocks of an image
*
* Basically returns the product of width, height and depth.
* \param [in] extent Image extent, in pixels or blocks
* \returns Flattened number of pixels or blocks
*/
inline uint32_t flattenImageExtent(VkExtent3D extent) {
return extent.width * extent.height * extent.depth;
}
}
bool operator == (VkExtent3D a, VkExtent3D b);
bool operator != (VkExtent3D a, VkExtent3D b);
bool operator == (VkExtent2D a, VkExtent2D b);
bool operator != (VkExtent2D a, VkExtent2D b);
inline bool operator == (VkExtent3D a, VkExtent3D b) {
return a.width == b.width
&& a.height == b.height
&& a.depth == b.depth;
}
inline bool operator != (VkExtent3D a, VkExtent3D b) {
return a.width != b.width
|| a.height != b.height
|| a.depth != b.depth;
}
inline bool operator == (VkExtent2D a, VkExtent2D b) {
return a.width == b.width
&& a.height == b.height;
}
inline bool operator != (VkExtent2D a, VkExtent2D b) {
return a.width != b.width
|| a.height != b.height;
}