diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 8696ddab8..0aeb09518 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -80,6 +80,7 @@ namespace dxvk { ] (DxvkContext* ctx) { ctx->beginRecording(cDevice->createCommandList()); + // Disable logic op once and for all. DxvkLogicOpState loState; loState.enableLogicOp = VK_FALSE; loState.logicOp = VK_LOGIC_OP_CLEAR; @@ -93,6 +94,8 @@ namespace dxvk { m_dxsoOptions = DxsoOptions(this, m_d3d9Options); + // Check if VK_EXT_robustness2 is supported, so we can optimize the number of constants we need to copy. + // Also check the required alignments. const bool supportsRobustness2 = m_dxvkDevice->features().extRobustness2.robustBufferAccess2; bool useRobustConstantAccess = supportsRobustness2; if (useRobustConstantAccess) { @@ -108,8 +111,9 @@ namespace dxvk { } useRobustConstantAccess &= m_psLayout.totalSize() % m_robustUBOAlignment == 0; } - + if (!useRobustConstantAccess) { + // Disable optimized constant copies, we always have to copy all constants. m_vsFloatConstsCount = m_vsLayout.floatCount; m_vsIntConstsCount = m_vsLayout.intCount; m_vsBoolConstsCount = m_vsLayout.boolCount; @@ -120,13 +124,15 @@ namespace dxvk { } } + // Check for VK_EXT_graphics_pipeline_libraries m_usingGraphicsPipelines = dxvkDevice->features().extGraphicsPipelineLibrary.graphicsPipelineLibrary; + // Check for VK_EXT_depth_bias_control and set up initial state m_depthBiasRepresentation = { VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORMAT_EXT, false }; if (dxvkDevice->features().extDepthBiasControl.depthBiasControl) { if (dxvkDevice->features().extDepthBiasControl.depthBiasExact) m_depthBiasRepresentation.depthBiasExact = true; - + if (dxvkDevice->features().extDepthBiasControl.floatRepresentation) { m_depthBiasRepresentation.depthBiasRepresentation = VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT; m_depthBiasScale = 1.0f; @@ -222,7 +228,7 @@ namespace dxvk { *ppvObject = ref(this); return S_OK; } - + if (riid == __uuidof(IDxvkD3D8Bridge)) { *ppvObject = ref(&m_d3d8Bridge); return S_OK; @@ -490,7 +496,7 @@ namespace dxvk { * an application should release any explicit render targets, * depth stencil surfaces, additional swap chains, state blocks, * and D3DPOOL_DEFAULT resources associated with the device. - * + * * We have to check after ResetState clears the references held by SetTexture, etc. * This matches what Windows D3D9 does. */ @@ -628,11 +634,14 @@ namespace dxvk { try { void* initialData = nullptr; + // On Windows Vista (so most likely D3D9Ex), pSharedHandle can be used to pass initial data for a texture, + // but only for a very specific type of texture. if (Pool == D3DPOOL_SYSTEMMEM && Levels == 1 && pSharedHandle != nullptr) { initialData = *(reinterpret_cast(pSharedHandle)); pSharedHandle = nullptr; } + // Shared textures have to be in POOL_DEFAULT if (pSharedHandle != nullptr && Pool != D3DPOOL_DEFAULT) return D3DERR_INVALIDCALL; @@ -699,7 +708,8 @@ namespace dxvk { const Com texture = new D3D9Texture3D(this, &desc); m_initializer->InitTexture(texture->GetCommonTexture()); *ppVolumeTexture = texture.ref(); - + + // The device cannot be reset if there's any remaining default resources if (desc.Pool == D3DPOOL_DEFAULT) m_losableResourceCounter++; @@ -756,7 +766,8 @@ namespace dxvk { const Com texture = new D3D9TextureCube(this, &desc); m_initializer->InitTexture(texture->GetCommonTexture()); *ppCubeTexture = texture.ref(); - + + // The device cannot be reset if there's any remaining default resources if (desc.Pool == D3DPOOL_DEFAULT) m_losableResourceCounter++; @@ -799,6 +810,8 @@ namespace dxvk { const Com buffer = new D3D9VertexBuffer(this, &desc); m_initializer->InitBuffer(buffer->GetCommonBuffer()); *ppVertexBuffer = buffer.ref(); + + // The device cannot be reset if there's any remaining default resources if (desc.Pool == D3DPOOL_DEFAULT) m_losableResourceCounter++; @@ -840,6 +853,8 @@ namespace dxvk { const Com buffer = new D3D9IndexBuffer(this, &desc); m_initializer->InitBuffer(buffer->GetCommonBuffer()); *ppIndexBuffer = buffer.ref(); + + // The device cannot be reset if there's any remaining default resources if (desc.Pool == D3DPOOL_DEFAULT) m_losableResourceCounter++; @@ -963,8 +978,10 @@ namespace dxvk { 0u }; } + // The source surface must be in D3DPOOL_SYSTEMMEM so we just treat it as just another texture upload except with a different source. UpdateTextureFromBuffer(dstTextureInfo, srcTextureInfo, dst->GetSubresource(), src->GetSubresource(), srcOffset, extent, dstOffset); + // The contents of the mapping no longer match the image. dstTextureInfo->SetNeedsReadback(dst->GetSubresource(), true); if (dstTextureInfo->IsAutomaticMip()) @@ -1008,6 +1025,8 @@ namespace dxvk { if (srcFirstMipExtent != dstFirstMipExtent) { // UpdateTexture can be used with textures that have different mip lengths. // It will either match the the top mips or the bottom ones. + // If the largest mip maps don't match in size, we try to take the smallest ones + // of the source. srcMipOffset = srcTexInfo->Desc()->MipLevels - mipLevels; srcFirstMipExtent = util::computeMipLevelExtent(srcTexInfo->GetExtent(), srcMipOffset); @@ -1018,10 +1037,12 @@ namespace dxvk { return D3DERR_INVALIDCALL; for (uint32_t a = 0; a < arraySlices; a++) { + // The docs claim that the dirty box is just a performance optimization, however in practice games rely on it. const D3DBOX& box = srcTexInfo->GetDirtyBox(a); if (box.Left >= box.Right || box.Top >= box.Bottom || box.Front >= box.Back) continue; + // The dirty box is only tracked for mip level 0 VkExtent3D mip0Extent = { uint32_t(box.Right - box.Left), uint32_t(box.Bottom - box.Top), @@ -1030,13 +1051,17 @@ namespace dxvk { VkOffset3D mip0Offset = { int32_t(box.Left), int32_t(box.Top), int32_t(box.Front) }; for (uint32_t dstMip = 0; dstMip < mipLevels; dstMip++) { + // Scale the dirty box for the respective mip level uint32_t srcMip = dstMip + srcMipOffset; uint32_t srcSubresource = srcTexInfo->CalcSubresource(a, srcMip); uint32_t dstSubresource = dstTexInfo->CalcSubresource(a, dstMip); VkExtent3D extent = util::computeMipLevelExtent(mip0Extent, srcMip); VkOffset3D offset = util::computeMipLevelOffset(mip0Offset, srcMip); + // The source surface must be in D3DPOOL_SYSTEMMEM so we just treat it as just another texture upload except with a different source. UpdateTextureFromBuffer(dstTexInfo, srcTexInfo, dstSubresource, srcSubresource, offset, extent, offset); + + // The contents of the mapping no longer match the image. dstTexInfo->SetNeedsReadback(dstSubresource, true); } } @@ -1529,7 +1554,7 @@ namespace dxvk { RECT scissorRect; scissorRect.left = 0; scissorRect.top = 0; - + if (likely(rt != nullptr)) { auto rtSize = rt->GetSurfaceExtent(); viewport.Width = rtSize.width; @@ -1560,15 +1585,18 @@ namespace dxvk { return D3D_OK; // Do a strong flush if the first render target is changed. - ConsiderFlush(RenderTargetIndex == 0 + ConsiderFlush(RenderTargetIndex == 0 ? GpuFlushType::ImplicitStrongHint : GpuFlushType::ImplicitWeakHint); m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); m_state.renderTargets[RenderTargetIndex] = rt; + // Update feedback loop tracking bitmasks UpdateActiveRTs(RenderTargetIndex); + // Update render target alpha swizzle bitmask if we need to fix up the alpha channel + // for XRGB formats uint32_t originalAlphaSwizzleRTs = m_alphaSwizzleRTs; m_alphaSwizzleRTs &= ~(1 << RenderTargetIndex); @@ -1643,6 +1671,7 @@ namespace dxvk { ConsiderFlush(GpuFlushType::ImplicitWeakHint); m_flags.set(D3D9DeviceFlag::DirtyFramebuffer); + // Update depth bias if necessary if (ds != nullptr && m_depthBiasRepresentation.depthBiasRepresentation != VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT) { const int32_t vendorId = m_dxvkDevice->adapter()->deviceProperties().vendorID; const bool exact = m_depthBiasRepresentation.depthBiasExact; @@ -1789,7 +1818,7 @@ namespace dxvk { const Rc& imageView, VkImageAspectFlags aspectMask, VkClearValue clearValue) { - + VkExtent3D imageExtent = imageView->mipLevelExtent(0); extent.width = std::min(imageExtent.width, extent.width); extent.height = std::min(imageExtent.height, extent.height); @@ -3519,6 +3548,9 @@ namespace dxvk { m_state.indices = buffer; + // Don't unbind the buffer if the game sets a nullptr here. + // Operation Flashpoint Red River breaks if we do that. + // EndScene will clean it up if necessary. if (buffer != nullptr) BindIndices(); @@ -4212,17 +4244,24 @@ namespace dxvk { // We need to check our ops and disable respective stages. // Given we have transition from a null resource to // a valid resource or vice versa. - if (StateSampler < caps::MaxTexturesPS) { - const uint32_t offset = StateSampler * 2; + const bool isPSSampler = StateSampler < caps::MaxTexturesPS; + if (isPSSampler) { const uint32_t textureType = newTexture != nullptr ? uint32_t(newTexture->GetType() - D3DRTYPE_TEXTURE) : 0; + // There are 4 texture types, so we need 2 bits. + const uint32_t offset = StateSampler * 2; const uint32_t textureBitMask = 0b11u << offset; const uint32_t textureBits = textureType << offset; + // In fixed function shaders and SM < 3 we put the type mask + // into a spec constant to select the used sampler type. m_textureTypes &= ~textureBitMask; m_textureTypes |= textureBits; + // If we either bind a new texture or unbind the old one, + // we need to update the fixed function shader + // because we generate a different shader based on whether each texture is bound. if (newTexture == nullptr || oldTexture == nullptr) m_flags.set(D3D9DeviceFlag::DirtyFFPixelShader); } @@ -4512,7 +4551,7 @@ namespace dxvk { } - bool D3D9DeviceEx::ShouldRecord() { + inline bool D3D9DeviceEx::ShouldRecord() { return m_recorder != nullptr && !m_recorder->IsApplying(); } @@ -4651,7 +4690,6 @@ namespace dxvk { } auto& formatMapping = pResource->GetFormatMapping(); - const DxvkFormatInfo* formatInfo = formatMapping.IsValid() ? lookupFormatInfo(formatMapping.FormatColor) : UnsupportedFormatInfo(pResource->Desc()->Format); @@ -4663,6 +4701,7 @@ namespace dxvk { bool fullResource = pBox == nullptr; if (unlikely(!fullResource)) { + // Check whether the box passed as argument matches or exceeds the entire texture. VkOffset3D lockOffset; VkExtent3D lockExtent; @@ -4677,7 +4716,7 @@ namespace dxvk { // If we are not locking the entire image // a partial discard is meant to occur. // We can't really implement that, so just ignore discard - // if we are not locking the full resource + // if we are not locking the full resource. // DISCARD is also ignored for MANAGED and SYSTEMEM. // DISCARD is not ignored for non-DYNAMIC unlike what the docs say. @@ -4700,7 +4739,6 @@ namespace dxvk { needsReadback &= pResource->GetImage() != nullptr || !(Flags & D3DLOCK_DISCARD); pResource->SetNeedsReadback(Subresource, false); - if (unlikely(pResource->GetMapMode() == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED || needsReadback)) { // Create mapping buffer if it doesn't exist yet. (POOL_DEFAULT) pResource->CreateBuffer(!needsReadback); @@ -4710,6 +4748,10 @@ namespace dxvk { void* mapPtr = pResource->GetData(Subresource); if (unlikely(needsReadback)) { + // The texture was written to on the GPU. + // This can be either the image (for D3DPOOL_DEFAULT) + // or the buffer directly (for D3DPOOL_SYSTEMMEM). + DxvkBufferSlice mappedBufferSlice = pResource->GetBufferSlice(Subresource); const Rc mappedBuffer = pResource->GetBuffer(); @@ -4736,6 +4778,9 @@ namespace dxvk { // lock MSAA render targets even though // that's entirely illegal and they explicitly // tell us that they do NOT want to lock them... + // + // resourceImage is null because the image reference was moved to mappedImage + // for images that need to be resolved. if (resourceImage != nullptr) { EmitCs([ cMainImage = resourceImage, @@ -4763,6 +4808,8 @@ namespace dxvk { }); } + // if packedFormat is VK_FORMAT_UNDEFINED + // DxvkContext::copyImageToBuffer will automatically take the format from the image VkFormat packedFormat = GetPackedDepthStencilFormat(desc.Format); EmitCs([ @@ -4780,6 +4827,7 @@ namespace dxvk { TrackTextureMappingBufferSequenceNumber(pResource, Subresource); } + // Wait until the buffer is idle which may include the copy (and resolve) we just issued. if (!WaitForResource(*mappedBuffer, pResource->GetMappingBufferSequenceNumber(Subresource), Flags)) return D3DERR_WASSTILLDRAWING; } @@ -4787,7 +4835,8 @@ namespace dxvk { const bool atiHack = desc.Format == D3D9Format::ATI1 || desc.Format == D3D9Format::ATI2; // Set up map pointer. if (atiHack) { - // We need to lie here. The game is expected to use this info and do a workaround. + // The API didn't treat this as a block compressed format here. + // So we need to lie here. The game is expected to use this info and do a workaround. // It's stupid. I know. pLockedBox->RowPitch = align(std::max(desc.Width >> MipLevel, 1u), 4); pLockedBox->SlicePitch = pLockedBox->RowPitch * std::max(desc.Height >> MipLevel, 1u); @@ -4806,6 +4855,7 @@ namespace dxvk { pResource->SetLocked(Subresource, true); + // Make sure the amount of mapped texture memory stays below the threshold. UnmapTextures(); const bool readOnly = Flags & D3DLOCK_READONLY; @@ -4826,6 +4876,7 @@ namespace dxvk { } if (IsPoolManaged(desc.Pool) && !readOnly) { + // Managed textures are uploaded at draw time. pResource->SetNeedsUpload(Subresource, true); for (uint32_t i : bit::BitMask(m_activeTextures)) { @@ -4908,6 +4959,7 @@ namespace dxvk { const D3DBOX& box = pResource->GetDirtyBox(subresource.arrayLayer); + // The dirty box is only tracked for mip 0. Scale it for the mip level we're gonna upload. VkExtent3D mip0Extent = { box.Right - box.Left, box.Bottom - box.Top, box.Back - box.Front }; VkExtent3D extent = util::computeMipLevelExtent(mip0Extent, subresource.mipLevel); VkOffset3D mip0Offset = { int32_t(box.Left), int32_t(box.Top), int32_t(box.Front) }; @@ -4929,6 +4981,8 @@ namespace dxvk { VkOffset3D SrcOffset, VkExtent3D SrcExtent, VkOffset3D DestOffset) { + // Wait until the amount of used staging memory is under a certain threshold to avoid using + // too much memory and even more so to avoid using too much address space. WaitStagingBuffer(); const Rc image = pDestTexture->GetImage(); @@ -4959,6 +5013,9 @@ namespace dxvk { } if (likely(convertFormat.FormatType == D3D9ConversionFormat_None)) { + // The texture does not use a format that needs to be converted in a compute shader. + // So we just need to make sure the passed size and offset are not out of range and properly aligned, + // copy the data to a staging buffer and then copy that on the GPU to the actual image. VkOffset3D alignedDestOffset = { int32_t(alignDown(DestOffset.x, formatInfo->blockSize.width)), int32_t(alignDown(DestOffset.y, formatInfo->blockSize.height)), @@ -4985,6 +5042,8 @@ namespace dxvk { + srcOffsetBlockCount.y * pitch + srcOffsetBlockCount.x * formatInfo->elementSize; + // Get the mapping pointer from MapTexture to map the texture and keep track of that + // in case it is unmappable. const void* mapPtr = MapTexture(pSrcTexture, SrcSubresource); VkDeviceSize dirtySize = extentBlockCount.width * extentBlockCount.height * extentBlockCount.depth * formatInfo->elementSize; D3D9BufferSlice slice = AllocStagingBuffer(dirtySize); @@ -5013,8 +5072,10 @@ namespace dxvk { TrackTextureMappingBufferSequenceNumber(pSrcTexture, SrcSubresource); } else { + // The texture uses a format which gets converted by a compute shader. const void* mapPtr = MapTexture(pSrcTexture, SrcSubresource); + // The compute shader does not support only converting a subrect of the texture if (unlikely(SrcOffset.x != 0 || SrcOffset.y != 0 || SrcOffset.z != 0 || DestOffset.x != 0 || DestOffset.y != 0 || DestOffset.z != 0 || SrcExtent != srcTexLevelExtent)) { @@ -5039,7 +5100,7 @@ namespace dxvk { D3D9BufferSlice slice = AllocStagingBuffer(pSrcTexture->GetMipSize(SrcSubresource)); VkDeviceSize pitch = align(srcBlockCount.width * formatElementSize, 4); - const DxvkFormatInfo* convertedFormatInfo = lookupFormatInfo(convertFormat.FormatColor); + const DxvkFormatInfo* convertedFormatInfo = lookupFormatInfo(convertFormat.FormatColor); VkImageSubresourceLayers convertedDstLayers = { convertedFormatInfo->aspectMask, dstSubresource.mipLevel, dstSubresource.arrayLayer, 1 }; util::packImageData( @@ -5141,14 +5202,17 @@ namespace dxvk { const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT; const bool needsReadback = pResource->NeedsReadback(); - Rc mappingBuffer = pResource->GetBuffer(); - uint8_t* data = nullptr; if ((Flags & D3DLOCK_DISCARD) && (directMapping || needsReadback)) { + // If we're not directly mapped and don't need readback, + // the buffer is not currently getting used anyway + // so there's no reason to waste memory by discarding. + // Allocate a new backing slice for the buffer and set // it as the 'new' mapped slice. This assumes that the // only way to invalidate a buffer is by mapping it. + Rc mappingBuffer = pResource->GetBuffer(); auto bufferSlice = pResource->DiscardMapSlice(); data = reinterpret_cast(bufferSlice->mapPtr()); @@ -5162,17 +5226,21 @@ namespace dxvk { pResource->SetNeedsReadback(false); } else { + // The application either didn't specify DISCARD or the buffer is guaranteed to be idle anyway. + // Use map pointer from previous map operation. This // way we don't have to synchronize with the CS thread // if the map mode is D3DLOCK_NOOVERWRITE. data = reinterpret_cast(pResource->GetMappedSlice()->mapPtr()); - const bool needsReadback = pResource->NeedsReadback(); const bool readOnly = Flags & D3DLOCK_READONLY; // NOOVERWRITE promises that they will not write in a currently used area. const bool noOverwrite = Flags & D3DLOCK_NOOVERWRITE; const bool directMapping = pResource->GetMapMode() == D3D9_COMMON_BUFFER_MAP_MODE_DIRECT; + + // If we're not directly mapped, we can rely on needsReadback to tell us if a sync is required. const bool skipWait = (!needsReadback && (readOnly || !directMapping)) || noOverwrite; + if (!skipWait) { const Rc mappingBuffer = pResource->GetBuffer(); if (!WaitForResource(*mappingBuffer, pResource->GetMappingBufferSequenceNumber(), Flags)) @@ -5184,7 +5252,6 @@ namespace dxvk { // The offset/size is not clamped to or affected by the desc size. data += OffsetToLock; - *ppbData = reinterpret_cast(data); DWORD oldFlags = pResource->GetMapFlags(); @@ -5197,13 +5264,18 @@ namespace dxvk { pResource->SetMapFlags(Flags | oldFlags); pResource->IncrementLockCount(); + // We just mapped a buffer which may have come with an address space cost. + // Unmap textures if the amount of mapped texture memory is exceeding the threshold. UnmapTextures(); + return D3D_OK; } HRESULT D3D9DeviceEx::FlushBuffer( D3D9CommonBuffer* pResource) { + // Wait until the amount of used staging memory is under a certain threshold to avoid using + // too much memory and even more so to avoid using too much address space. WaitStagingBuffer(); auto dstBuffer = pResource->GetBufferSlice(); @@ -5245,14 +5317,19 @@ namespace dxvk { if (pResource->DecrementLockCount() != 0) return D3D_OK; + // Nothing else to do for directly mapped buffers. Those were already written. if (pResource->GetMapMode() != D3D9_COMMON_BUFFER_MAP_MODE_BUFFER) return D3D_OK; + // There is no part of the buffer that hasn't been uploaded yet. + // This shouldn't happen. if (pResource->DirtyRange().IsDegenerate()) return D3D_OK; pResource->SetMapFlags(0); + // Only D3DPOOL_DEFAULT buffers get uploaded in UnlockBuffer. + // D3DPOOL_SYSTEMMEM and D3DPOOL_MANAGED get uploaded at draw time. if (pResource->Desc()->Pool != D3DPOOL_DEFAULT) return D3D_OK; @@ -5613,6 +5690,7 @@ namespace dxvk { ? sizeof(D3D9FixedFunctionVertexBlendDataSW) : sizeof(D3D9FixedFunctionVertexBlendDataHW)); + // Allocate constant buffer for values that would otherwise get passed as spec constants for fast-linked pipelines to use. if (m_usingGraphicsPipelines) { m_specBuffer = D3D9ConstantBuffer(this, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, @@ -5640,11 +5718,15 @@ namespace dxvk { uint32_t floatCount = m_vsFloatConstsCount; if (constSet.meta.needsConstantCopies) { + // If the shader requires us to preserve shader defined constants, + // we copy those over. We need to adjust the amount of used floats accordingly. auto shader = GetCommonShader(m_state.vertexShader); floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1); } + // If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest. floatCount = std::min(floatCount, constSet.meta.maxConstIndexF); + // Calculate data sizes for each constant type. const uint32_t floatDataSize = floatCount * sizeof(Vector4); const uint32_t intDataSize = std::min(constSet.meta.maxConstIndexI, m_vsIntConstsCount) * sizeof(Vector4i); const uint32_t boolDataSize = divCeil(std::min(constSet.meta.maxConstIndexB, m_vsBoolConstsCount), 32u) * uint32_t(sizeof(uint32_t)); @@ -5655,6 +5737,8 @@ namespace dxvk { auto mapPtr = CopySoftwareConstants(constSet.buffer, Src.fConsts, floatDataSize); if (constSet.meta.needsConstantCopies) { + // Copy shader defined constants over so they can be accessed + // with relative addressing. Vector4* data = reinterpret_cast(mapPtr); auto& shaderConsts = GetCommonShader(m_state.vertexShader)->GetConstants(); @@ -5702,14 +5786,19 @@ namespace dxvk { uint32_t floatCount = ShaderStage == DxsoProgramType::VertexShader ? m_vsFloatConstsCount : m_psFloatConstsCount; if (constSet.meta.needsConstantCopies) { + // If the shader requires us to preserve shader defined constants, + // we copy those over. We need to adjust the amount of used floats accordingly. auto shader = GetCommonShader(Shader); floatCount = std::max(floatCount, shader->GetMaxDefinedConstant() + 1); } + // If we statically know which is the last float constant accessed by the shader, we don't need to copy the rest. floatCount = std::min(constSet.meta.maxConstIndexF, floatCount); + // There are very few int constants, so we put those into the same buffer at the start. + // We always allocate memory for all possible int constants to make sure alignment works out. const uint32_t intRange = caps::MaxOtherConstants * sizeof(Vector4i); - const uint32_t intDataSize = constSet.meta.maxConstIndexI * sizeof(Vector4i); uint32_t floatDataSize = floatCount * sizeof(Vector4); + // Determine amount of floats and buffer size based on highest used float constant and alignment const uint32_t alignment = constSet.buffer.GetAlignment(); const uint32_t bufferSize = align(std::max(floatDataSize + intRange, alignment), alignment); floatDataSize = bufferSize - intRange; @@ -5717,12 +5806,15 @@ namespace dxvk { void* mapPtr = constSet.buffer.Alloc(bufferSize); auto* dst = reinterpret_cast(mapPtr); + const uint32_t intDataSize = constSet.meta.maxConstIndexI * sizeof(Vector4i); if (constSet.meta.maxConstIndexI != 0) std::memcpy(dst->iConsts, Src.iConsts, intDataSize); if (constSet.meta.maxConstIndexF != 0) std::memcpy(dst->fConsts, Src.fConsts, floatDataSize); if (constSet.meta.needsConstantCopies) { + // Copy shader defined constants over so they can be accessed + // with relative addressing. Vector4* data = reinterpret_cast(dst->fConsts); auto& shaderConsts = GetCommonShader(Shader)->GetConstants(); @@ -6549,6 +6641,9 @@ namespace dxvk { if (i != 0) mode.writeMask = cWriteMasks[i - 1]; + // Adjust the blend factor based on the render target alpha swizzle bit mask. + // Specific formats such as the XRGB ones require a ONE swizzle for alpha + // which cannot be directly applied with the image view of the attachment. const bool alphaSwizzle = cAlphaMasks & (1 << i); auto NormalizeFactor = [alphaSwizzle](VkBlendFactor Factor) { @@ -6870,7 +6965,7 @@ namespace dxvk { if (inactiveMask) UnbindTextures(inactiveMask); - + m_dirtyTextures &= ~usedMask; } @@ -8405,6 +8500,7 @@ namespace dxvk { ctx->setSpecConstant(VK_PIPELINE_BIND_POINT_GRAPHICS, i, cSpecInfo.data[i]); }); + // Write spec constants into buffer for fast-linked pipelines to use it. if (m_usingGraphicsPipelines) { // TODO: Make uploading specialization information less naive. auto mapPtr = m_specBuffer.AllocSlice(); diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index abb6f343c..2abe35336 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -673,6 +673,9 @@ namespace dxvk { static DxvkDeviceFeatures GetDeviceFeatures(const Rc& adapter); + /** + * \brief Returns whether the Vulkan device supports the required features for ProcessVertices + */ bool SupportsSWVP(); bool IsExtended(); @@ -719,7 +722,7 @@ namespace dxvk { /** * \brief Unlocks a subresource of an image - * + * * Passthrough to device unlock. * \param [in] Subresource The subresource of the image to unlock * \returns \c D3D_OK if the parameters are valid or D3DERR_INVALIDCALL if it fails. @@ -729,10 +732,17 @@ namespace dxvk { UINT Face, UINT MipLevel); + /** + * \brief Uploads the given texture subresource from its local system memory copy. + */ HRESULT FlushImage( D3D9CommonTexture* pResource, UINT Subresource); + /** + * \brief Copies the given part of a texture from the local system memory copy of the source texture + * to the image of the destination texture. + */ void UpdateTextureFromBuffer( D3D9CommonTexture* pDestTexture, D3D9CommonTexture* pSrcTexture, @@ -752,6 +762,9 @@ namespace dxvk { void** ppbData, DWORD Flags); + /** + * \brief Uploads the given buffer from its local system memory copy. + */ HRESULT FlushBuffer( D3D9CommonBuffer* pResource); @@ -760,7 +773,7 @@ namespace dxvk { /** * @brief Uploads data from D3DPOOL_SYSMEM + D3DUSAGE_DYNAMIC buffers and binds the temporary buffers. - * + * * @param FirstVertexIndex The first vertex * @param NumVertices The number of vertices that are accessed. If this is 0, the vertex buffer binding will not be modified. * @param FirstIndex The first index @@ -876,10 +889,22 @@ namespace dxvk { void UploadConstants(); void UpdateClipPlanes(); - + + /** + * \brief Updates the push constant data at the given offset with data from the specified pointer. + * + * \param Offset Offset at which the push constant data gets written. + * \param Length Length of the push constant data to write. + * \param pData Push constant data + */ template void UpdatePushConstant(const void* pData); + /** + * \brief Updates the specified push constant based on the device state. + * + * \param Item Render state push constant to update + */ template void UpdatePushConstant(); @@ -971,12 +996,28 @@ namespace dxvk { HRESULT InitialReset(D3DPRESENT_PARAMETERS* pPresentationParameters, D3DDISPLAYMODEEX* pFullscreenDisplayMode); + /** + * \brief Returns the allocator used for unmappable system memory texture data + */ D3D9MemoryAllocator* GetAllocator() { return &m_memoryAllocator; } + /** + * \brief Gets the pointer of the system memory copy of the texture + * + * Also tracks the texture if it is unmappable. + */ void* MapTexture(D3D9CommonTexture* pTexture, UINT Subresource); + + /** + * \brief Moves the texture to the front of the LRU list of mapped textures + */ void TouchMappedTexture(D3D9CommonTexture* pTexture); + + /** + * \brief Removes the texture from the LRU list of mapped textures + */ void RemoveMappedTexture(D3D9CommonTexture* pTexture); bool IsD3D8Compatible() const { @@ -998,34 +1039,59 @@ namespace dxvk { void NotifyFullscreen(HWND window, bool fullscreen); void NotifyWindowActivated(HWND window, bool activated); + /** + * \brief Increases the amount of D3DPOOL_DEFAULT resources that block a device reset + */ void IncrementLosableCounter() { m_losableResourceCounter++; } + /** + * \brief Decreases the amount of D3DPOOL_DEFAULT resources that block a device reset + */ void DecrementLosableCounter() { m_losableResourceCounter--; } + /** + * \brief Returns whether the device is configured to only support vertex processing. + */ bool CanOnlySWVP() const { return m_behaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING; } + /** + * \brief Returns whether the device can be set to do software vertex processing. + * It may also be set up to only support software vertex processing. + */ bool CanSWVP() const { return m_behaviorFlags & (D3DCREATE_MIXED_VERTEXPROCESSING | D3DCREATE_SOFTWARE_VERTEXPROCESSING); } + /** + * \brief Returns whether or not the device is currently set to do software vertex processing. + */ bool IsSWVP() const { return m_isSWVP; } + /** + * \brief Returns the number of vertex shader modules generated for fixed function state. + */ UINT GetFixedFunctionVSCount() const { return m_ffModules.GetVSCount(); } + /** + * \brief Returns the number of fragment shader modules generated for fixed function state. + */ UINT GetFixedFunctionFSCount() const { return m_ffModules.GetFSCount(); } + /** + * \brief Returns the number of shader modules generated for ProcessVertices. + */ UINT GetSWVPShaderCount() const { return m_swvpEmulator.GetShaderCount(); } @@ -1072,7 +1138,11 @@ namespace dxvk { } } - // Device Reset detection for D3D9SwapChainEx::Present + /** + * \brief Returns whether the device has been reset and marks it as true. + * Used for the deferred surface creation workaround. + * (Device Reset detection for D3D9SwapChainEx::Present) + */ bool IsDeviceReset() { return std::exchange(m_deviceHasBeenReset, false); } @@ -1082,13 +1152,25 @@ namespace dxvk { void DetermineConstantLayouts(bool canSWVP); + /** + * \brief Allocates buffer memory for DrawPrimitiveUp draws + */ D3D9BufferSlice AllocUPBuffer(VkDeviceSize size); + /** + * \brief Allocates buffer memory for resource uploads + */ D3D9BufferSlice AllocStagingBuffer(VkDeviceSize size); + /** + * \brief Waits until the amount of used staging memory is below a certain threshold. + */ void WaitStagingBuffer(); - bool ShouldRecord(); + /** + * \brief Returns whether the device is currently recording a StateBlock + */ + inline bool ShouldRecord(); HRESULT CreateShaderModule( D3D9CommonShader* pShaderModule, @@ -1105,6 +1187,9 @@ namespace dxvk { return (vertexCount - 1) * stride + std::max(m_state.vertexDecl->GetSize(0), stride); } + /** + * \brief Writes data to the given pointer and zeroes any access buffer space + */ inline void FillUPVertexBuffer(void* buffer, const void* userData, uint32_t dataSize, uint32_t bufferSize) { uint8_t* data = reinterpret_cast(buffer); // Don't copy excess data if we don't end up needing it. @@ -1256,25 +1341,24 @@ namespace dxvk { D3D9CommonTexture* pResource, UINT Subresource); - void UnmapTextures(); - uint64_t GetCurrentSequenceNumber(); /** - * @brief Get the swapchain that was used the most recently for presenting + * \brief Will unmap the least recently used textures if the amount of mapped texture memory exceeds a threshold. + */ + void UnmapTextures(); + + /** + * \brief Get the swapchain that was used the most recently for presenting * Has to be externally synchronized. - * - * @return D3D9SwapChainEx* Swapchain */ D3D9SwapChainEx* GetMostRecentlyUsedSwapchain() { return m_mostRecentlyUsedSwapchain; } /** - * @brief Set the swapchain that was used the most recently for presenting + * \brief Set the swapchain that was used the most recently for presenting * Has to be externally synchronized. - * - * @param swapchain Swapchain */ void SetMostRecentlyUsedSwapchain(D3D9SwapChainEx* swapchain) { m_mostRecentlyUsedSwapchain = swapchain; diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index 769b21fa8..36c09a750 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -861,14 +861,17 @@ namespace dxvk { DxsoRegisterValue DxsoCompiler::emitLoadConstant( const DxsoBaseRegister& reg, const DxsoBaseRegister* relative) { - // struct cBuffer_t { + + // SWVP cbuffers: Member Binding index + // float f[8192]; 0 + // int32_t i[2048]; 1 + // bool (uint32_t bitmask) i[[256]]; 2 + + // HWVP cbuffer: Member Member index + // int32_t i[16]; 0 + // float f[256 or 224]; 1 // - // Type Member Index - // - // float f[256 or 224]; 0 - // int32_t i[16]; 1 - // uint32_t boolBitmask; 2 - // } + // bools as spec constant bitmasks DxsoRegisterValue result = { }; switch (reg.id.type) {